From f24a9ea8de333cd57eae62d0e936444bcefd3bd6 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 6 Mar 2023 17:16:33 +0100 Subject: [PATCH 01/37] A minor AtlasEngine refactoring --- .github/actions/spelling/allow/apis.txt | 1 + .github/actions/spelling/expect/expect.txt | 1 + NOTICE.md | 25 + OpenConsole.sln | 43 + oss/stb/LICENSE | 37 + oss/stb/MAINTAINER_README.md | 4 + oss/stb/cgmanifest.json | 15 + oss/stb/stb_rect_pack.h | 623 ++++++ src/inc/til/color.h | 3 +- src/inc/til/generational.h | 53 + src/interactivity/win32/window.hpp | 7 +- src/renderer/atlas/AtlasEngine.api.cpp | 180 +- src/renderer/atlas/AtlasEngine.cpp | 1694 ++++------------- src/renderer/atlas/AtlasEngine.h | 1014 +--------- src/renderer/atlas/AtlasEngine.r.cpp | 1126 ++--------- src/renderer/atlas/Backend.cpp | 339 ++++ src/renderer/atlas/Backend.h | 84 + src/renderer/atlas/BackendD2D.cpp | 381 ++++ src/renderer/atlas/BackendD2D.h | 43 + src/renderer/atlas/BackendD3D11.cpp | 1283 +++++++++++++ src/renderer/atlas/BackendD3D11.h | 214 +++ src/renderer/atlas/DWriteTextAnalysis.cpp | 2 +- src/renderer/atlas/DWriteTextAnalysis.h | 2 +- src/renderer/atlas/atlas.vcxproj | 23 +- src/renderer/atlas/common.h | 437 +++++ src/renderer/atlas/pch.h | 6 +- src/renderer/atlas/shader_common.hlsl | 47 + src/renderer/atlas/shader_ps.hlsl | 238 +-- src/renderer/atlas/shader_vs.hlsl | 34 +- src/renderer/atlas/stb_rect_pack.cpp | 4 + src/renderer/base/renderer.cpp | 24 +- src/renderer/base/renderer.hpp | 2 +- src/renderer/dx/DxRenderer.cpp | 77 +- src/renderer/dx/DxRenderer.hpp | 2 - src/renderer/inc/IRenderEngine.hpp | 28 +- .../RenderingTests/RenderingTests.vcxproj | 26 + .../RenderingTests.vcxproj.filters | 4 + src/tools/RenderingTests/main.cpp | 175 ++ 38 files changed, 4659 insertions(+), 3642 deletions(-) create mode 100644 oss/stb/LICENSE create mode 100644 oss/stb/MAINTAINER_README.md create mode 100644 oss/stb/cgmanifest.json create mode 100644 oss/stb/stb_rect_pack.h create mode 100644 src/inc/til/generational.h create mode 100644 src/renderer/atlas/Backend.cpp create mode 100644 src/renderer/atlas/Backend.h create mode 100644 src/renderer/atlas/BackendD2D.cpp create mode 100644 src/renderer/atlas/BackendD2D.h create mode 100644 src/renderer/atlas/BackendD3D11.cpp create mode 100644 src/renderer/atlas/BackendD3D11.h create mode 100644 src/renderer/atlas/common.h create mode 100644 src/renderer/atlas/shader_common.hlsl create mode 100644 src/renderer/atlas/stb_rect_pack.cpp create mode 100644 src/tools/RenderingTests/RenderingTests.vcxproj create mode 100644 src/tools/RenderingTests/RenderingTests.vcxproj.filters create mode 100644 src/tools/RenderingTests/main.cpp diff --git a/.github/actions/spelling/allow/apis.txt b/.github/actions/spelling/allow/apis.txt index ce36d8d2a2b..e0f1a9cd37f 100644 --- a/.github/actions/spelling/allow/apis.txt +++ b/.github/actions/spelling/allow/apis.txt @@ -214,6 +214,7 @@ userenv USEROBJECTFLAGS Viewbox virtualalloc +vsnwprintf wcsstr wcstoui WDJ diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 171d4c68a12..5ea356fa5b8 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -1785,6 +1785,7 @@ STARTWPARMS STARTWPARMSA STARTWPARMSW Statusline +stb stdafx STDAPI stdc diff --git a/NOTICE.md b/NOTICE.md index e1488fd4a20..e5046604574 100644 --- a/NOTICE.md +++ b/NOTICE.md @@ -276,6 +276,31 @@ OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to ``` +## stb + +**Source**: [https://github.com/nothings/stb](https://github.com/nothings/stb) + +### License + +``` +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +``` + ## ConEmu **Source**: [https://github.com/Maximus5/ConEmu](https://github.com/Maximus5/ConEmu) diff --git a/OpenConsole.sln b/OpenConsole.sln index 523af7e0297..73d5b4fbb60 100644 --- a/OpenConsole.sln +++ b/OpenConsole.sln @@ -415,6 +415,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "MidiAudio", "src\audio\midi EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TerminalStress", "src\tools\TerminalStress\TerminalStress.csproj", "{613CCB57-5FA9-48EF-80D0-6B1E319E20C4}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "RenderingTests", "src\tools\RenderingTests\RenderingTests.vcxproj", "{37C995E0-2349-4154-8E77-4A52C0C7F46D}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution AuditMode|Any CPU = AuditMode|Any CPU @@ -2767,6 +2769,46 @@ Global {613CCB57-5FA9-48EF-80D0-6B1E319E20C4}.Release|x64.ActiveCfg = Release|Any CPU {613CCB57-5FA9-48EF-80D0-6B1E319E20C4}.Release|x64.Build.0 = Release|Any CPU {613CCB57-5FA9-48EF-80D0-6B1E319E20C4}.Release|x86.ActiveCfg = Release|Any CPU + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|Any CPU.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|Any CPU.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|ARM.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|ARM.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|ARM64.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|ARM64.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|x64.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|x64.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|x86.ActiveCfg = Debug|Win32 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.AuditMode|x86.Build.0 = Debug|Win32 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|Any CPU.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|Any CPU.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|ARM.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|ARM.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|ARM64.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|ARM64.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|x64.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|x64.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|x86.ActiveCfg = Debug|Win32 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Debug|x86.Build.0 = Debug|Win32 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|Any CPU.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|Any CPU.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|ARM.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|ARM.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|ARM64.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|ARM64.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|x64.ActiveCfg = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|x64.Build.0 = Debug|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|x86.ActiveCfg = Debug|Win32 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Fuzzing|x86.Build.0 = Debug|Win32 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|Any CPU.ActiveCfg = Release|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|Any CPU.Build.0 = Release|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|ARM.ActiveCfg = Release|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|ARM.Build.0 = Release|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|ARM64.ActiveCfg = Release|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|ARM64.Build.0 = Release|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|x64.ActiveCfg = Release|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|x64.Build.0 = Release|x64 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|x86.ActiveCfg = Release|Win32 + {37C995E0-2349-4154-8E77-4A52C0C7F46D}.Release|x86.Build.0 = Release|Win32 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -2872,6 +2914,7 @@ Global {40BD8415-DD93-4200-8D82-498DDDC08CC8} = {89CDCC5C-9F53-4054-97A4-639D99F169CD} {3C67784E-1453-49C2-9660-483E2CC7F7AD} = {40BD8415-DD93-4200-8D82-498DDDC08CC8} {613CCB57-5FA9-48EF-80D0-6B1E319E20C4} = {A10C4720-DCA4-4640-9749-67F4314F527C} + {37C995E0-2349-4154-8E77-4A52C0C7F46D} = {A10C4720-DCA4-4640-9749-67F4314F527C} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {3140B1B7-C8EE-43D1-A772-D82A7061A271} diff --git a/oss/stb/LICENSE b/oss/stb/LICENSE new file mode 100644 index 00000000000..21635cc163c --- /dev/null +++ b/oss/stb/LICENSE @@ -0,0 +1,37 @@ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/oss/stb/MAINTAINER_README.md b/oss/stb/MAINTAINER_README.md new file mode 100644 index 00000000000..2f23f5346c8 --- /dev/null +++ b/oss/stb/MAINTAINER_README.md @@ -0,0 +1,4 @@ +### Notes for Future Maintainers + +Search for files prefixed with `stb_` in this project. +At the time of writing, the only file being used is `stb_rect_pack.h`. diff --git a/oss/stb/cgmanifest.json b/oss/stb/cgmanifest.json new file mode 100644 index 00000000000..69fa8d1e26a --- /dev/null +++ b/oss/stb/cgmanifest.json @@ -0,0 +1,15 @@ +{ + "$schema": "https://json.schemastore.org/component-detection-manifest.json", + "Registrations": [ + { + "component": { + "type": "git", + "git": { + "repositoryUrl": "https://github.com/nothings/stb", + "commitHash": "5736b15f7ea0ffb08dd38af21067c314d6a3aae9" + } + } + } + ], + "Version": 1 +} diff --git a/oss/stb/stb_rect_pack.h b/oss/stb/stb_rect_pack.h new file mode 100644 index 00000000000..6a633ce666a --- /dev/null +++ b/oss/stb/stb_rect_pack.h @@ -0,0 +1,623 @@ +// stb_rect_pack.h - v1.01 - public domain - rectangle packing +// Sean Barrett 2014 +// +// Useful for e.g. packing rectangular textures into an atlas. +// Does not do rotation. +// +// Before #including, +// +// #define STB_RECT_PACK_IMPLEMENTATION +// +// in the file that you want to have the implementation. +// +// Not necessarily the awesomest packing method, but better than +// the totally naive one in stb_truetype (which is primarily what +// this is meant to replace). +// +// Has only had a few tests run, may have issues. +// +// More docs to come. +// +// No memory allocations; uses qsort() and assert() from stdlib. +// Can override those by defining STBRP_SORT and STBRP_ASSERT. +// +// This library currently uses the Skyline Bottom-Left algorithm. +// +// Please note: better rectangle packers are welcome! Please +// implement them to the same API, but with a different init +// function. +// +// Credits +// +// Library +// Sean Barrett +// Minor features +// Martins Mozeiko +// github:IntellectualKitty +// +// Bugfixes / warning fixes +// Jeremy Jaussaud +// Fabian Giesen +// +// Version history: +// +// 1.01 (2021-07-11) always use large rect mode, expose STBRP__MAXVAL in public section +// 1.00 (2019-02-25) avoid small space waste; gracefully fail too-wide rectangles +// 0.99 (2019-02-07) warning fixes +// 0.11 (2017-03-03) return packing success/fail result +// 0.10 (2016-10-25) remove cast-away-const to avoid warnings +// 0.09 (2016-08-27) fix compiler warnings +// 0.08 (2015-09-13) really fix bug with empty rects (w=0 or h=0) +// 0.07 (2015-09-13) fix bug with empty rects (w=0 or h=0) +// 0.06 (2015-04-15) added STBRP_SORT to allow replacing qsort +// 0.05: added STBRP_ASSERT to allow replacing assert +// 0.04: fixed minor bug in STBRP_LARGE_RECTS support +// 0.01: initial release +// +// LICENSE +// +// See end of file for license information. + +////////////////////////////////////////////////////////////////////////////// +// +// INCLUDE SECTION +// + +#ifndef STB_INCLUDE_STB_RECT_PACK_H +#define STB_INCLUDE_STB_RECT_PACK_H + +#define STB_RECT_PACK_VERSION 1 + +#ifdef STBRP_STATIC +#define STBRP_DEF static +#else +#define STBRP_DEF extern +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct stbrp_context stbrp_context; +typedef struct stbrp_node stbrp_node; +typedef struct stbrp_rect stbrp_rect; + +typedef int stbrp_coord; + +#define STBRP__MAXVAL 0x7fffffff +// Mostly for internal use, but this is the maximum supported coordinate value. + +STBRP_DEF int stbrp_pack_rects (stbrp_context *context, stbrp_rect *rects, int num_rects); +// Assign packed locations to rectangles. The rectangles are of type +// 'stbrp_rect' defined below, stored in the array 'rects', and there +// are 'num_rects' many of them. +// +// Rectangles which are successfully packed have the 'was_packed' flag +// set to a non-zero value and 'x' and 'y' store the minimum location +// on each axis (i.e. bottom-left in cartesian coordinates, top-left +// if you imagine y increasing downwards). Rectangles which do not fit +// have the 'was_packed' flag set to 0. +// +// You should not try to access the 'rects' array from another thread +// while this function is running, as the function temporarily reorders +// the array while it executes. +// +// To pack into another rectangle, you need to call stbrp_init_target +// again. To continue packing into the same rectangle, you can call +// this function again. Calling this multiple times with multiple rect +// arrays will probably produce worse packing results than calling it +// a single time with the full rectangle array, but the option is +// available. +// +// The function returns 1 if all of the rectangles were successfully +// packed and 0 otherwise. + +struct stbrp_rect +{ + // reserved for your use: + int id; + + // input: + stbrp_coord w, h; + + // output: + stbrp_coord x, y; + int was_packed; // non-zero if valid packing + +}; // 16 bytes, nominally + + +STBRP_DEF void stbrp_init_target (stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes); +// Initialize a rectangle packer to: +// pack a rectangle that is 'width' by 'height' in dimensions +// using temporary storage provided by the array 'nodes', which is 'num_nodes' long +// +// You must call this function every time you start packing into a new target. +// +// There is no "shutdown" function. The 'nodes' memory must stay valid for +// the following stbrp_pack_rects() call (or calls), but can be freed after +// the call (or calls) finish. +// +// Note: to guarantee best results, either: +// 1. make sure 'num_nodes' >= 'width' +// or 2. call stbrp_allow_out_of_mem() defined below with 'allow_out_of_mem = 1' +// +// If you don't do either of the above things, widths will be quantized to multiples +// of small integers to guarantee the algorithm doesn't run out of temporary storage. +// +// If you do #2, then the non-quantized algorithm will be used, but the algorithm +// may run out of temporary storage and be unable to pack some rectangles. + +STBRP_DEF void stbrp_setup_allow_out_of_mem (stbrp_context *context, int allow_out_of_mem); +// Optionally call this function after init but before doing any packing to +// change the handling of the out-of-temp-memory scenario, described above. +// If you call init again, this will be reset to the default (false). + + +STBRP_DEF void stbrp_setup_heuristic (stbrp_context *context, int heuristic); +// Optionally select which packing heuristic the library should use. Different +// heuristics will produce better/worse results for different data sets. +// If you call init again, this will be reset to the default. + +enum +{ + STBRP_HEURISTIC_Skyline_default=0, + STBRP_HEURISTIC_Skyline_BL_sortHeight = STBRP_HEURISTIC_Skyline_default, + STBRP_HEURISTIC_Skyline_BF_sortHeight +}; + + +////////////////////////////////////////////////////////////////////////////// +// +// the details of the following structures don't matter to you, but they must +// be visible so you can handle the memory allocations for them + +struct stbrp_node +{ + stbrp_coord x,y; + stbrp_node *next; +}; + +struct stbrp_context +{ + int width; + int height; + int align; + int init_mode; + int heuristic; + int num_nodes; + stbrp_node *active_head; + stbrp_node *free_head; + stbrp_node extra[2]; // we allocate two extra nodes so optimal user-node-count is 'width' not 'width+2' +}; + +#ifdef __cplusplus +} +#endif + +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// IMPLEMENTATION SECTION +// + +#ifdef STB_RECT_PACK_IMPLEMENTATION +#ifndef STBRP_SORT +#include +#define STBRP_SORT qsort +#endif + +#ifndef STBRP_ASSERT +#include +#define STBRP_ASSERT assert +#endif + +#ifdef _MSC_VER +#define STBRP__NOTUSED(v) (void)(v) +#define STBRP__CDECL __cdecl +#else +#define STBRP__NOTUSED(v) (void)sizeof(v) +#define STBRP__CDECL +#endif + +enum +{ + STBRP__INIT_skyline = 1 +}; + +STBRP_DEF void stbrp_setup_heuristic(stbrp_context *context, int heuristic) +{ + switch (context->init_mode) { + case STBRP__INIT_skyline: + STBRP_ASSERT(heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight || heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight); + context->heuristic = heuristic; + break; + default: + STBRP_ASSERT(0); + } +} + +STBRP_DEF void stbrp_setup_allow_out_of_mem(stbrp_context *context, int allow_out_of_mem) +{ + if (allow_out_of_mem) + // if it's ok to run out of memory, then don't bother aligning them; + // this gives better packing, but may fail due to OOM (even though + // the rectangles easily fit). @TODO a smarter approach would be to only + // quantize once we've hit OOM, then we could get rid of this parameter. + context->align = 1; + else { + // if it's not ok to run out of memory, then quantize the widths + // so that num_nodes is always enough nodes. + // + // I.e. num_nodes * align >= width + // align >= width / num_nodes + // align = ceil(width/num_nodes) + + context->align = (context->width + context->num_nodes-1) / context->num_nodes; + } +} + +STBRP_DEF void stbrp_init_target(stbrp_context *context, int width, int height, stbrp_node *nodes, int num_nodes) +{ + int i; + + for (i=0; i < num_nodes-1; ++i) + nodes[i].next = &nodes[i+1]; + nodes[i].next = NULL; + context->init_mode = STBRP__INIT_skyline; + context->heuristic = STBRP_HEURISTIC_Skyline_default; + context->free_head = &nodes[0]; + context->active_head = &context->extra[0]; + context->width = width; + context->height = height; + context->num_nodes = num_nodes; + stbrp_setup_allow_out_of_mem(context, 0); + + // node 0 is the full width, node 1 is the sentinel (lets us not store width explicitly) + context->extra[0].x = 0; + context->extra[0].y = 0; + context->extra[0].next = &context->extra[1]; + context->extra[1].x = (stbrp_coord) width; + context->extra[1].y = (1<<30); + context->extra[1].next = NULL; +} + +// find minimum y position if it starts at x1 +static int stbrp__skyline_find_min_y(stbrp_context *c, stbrp_node *first, int x0, int width, int *pwaste) +{ + stbrp_node *node = first; + int x1 = x0 + width; + int min_y, visited_width, waste_area; + + STBRP__NOTUSED(c); + + STBRP_ASSERT(first->x <= x0); + + #if 0 + // skip in case we're past the node + while (node->next->x <= x0) + ++node; + #else + STBRP_ASSERT(node->next->x > x0); // we ended up handling this in the caller for efficiency + #endif + + STBRP_ASSERT(node->x <= x0); + + min_y = 0; + waste_area = 0; + visited_width = 0; + while (node->x < x1) { + if (node->y > min_y) { + // raise min_y higher. + // we've accounted for all waste up to min_y, + // but we'll now add more waste for everything we've visted + waste_area += visited_width * (node->y - min_y); + min_y = node->y; + // the first time through, visited_width might be reduced + if (node->x < x0) + visited_width += node->next->x - x0; + else + visited_width += node->next->x - node->x; + } else { + // add waste area + int under_width = node->next->x - node->x; + if (under_width + visited_width > width) + under_width = width - visited_width; + waste_area += under_width * (min_y - node->y); + visited_width += under_width; + } + node = node->next; + } + + *pwaste = waste_area; + return min_y; +} + +typedef struct +{ + int x,y; + stbrp_node **prev_link; +} stbrp__findresult; + +static stbrp__findresult stbrp__skyline_find_best_pos(stbrp_context *c, int width, int height) +{ + int best_waste = (1<<30), best_x, best_y = (1 << 30); + stbrp__findresult fr; + stbrp_node **prev, *node, *tail, **best = NULL; + + // align to multiple of c->align + width = (width + c->align - 1); + width -= width % c->align; + STBRP_ASSERT(width % c->align == 0); + + // if it can't possibly fit, bail immediately + if (width > c->width || height > c->height) { + fr.prev_link = NULL; + fr.x = fr.y = 0; + return fr; + } + + node = c->active_head; + prev = &c->active_head; + while (node->x + width <= c->width) { + int y,waste; + y = stbrp__skyline_find_min_y(c, node, node->x, width, &waste); + if (c->heuristic == STBRP_HEURISTIC_Skyline_BL_sortHeight) { // actually just want to test BL + // bottom left + if (y < best_y) { + best_y = y; + best = prev; + } + } else { + // best-fit + if (y + height <= c->height) { + // can only use it if it first vertically + if (y < best_y || (y == best_y && waste < best_waste)) { + best_y = y; + best_waste = waste; + best = prev; + } + } + } + prev = &node->next; + node = node->next; + } + + best_x = (best == NULL) ? 0 : (*best)->x; + + // if doing best-fit (BF), we also have to try aligning right edge to each node position + // + // e.g, if fitting + // + // ____________________ + // |____________________| + // + // into + // + // | | + // | ____________| + // |____________| + // + // then right-aligned reduces waste, but bottom-left BL is always chooses left-aligned + // + // This makes BF take about 2x the time + + if (c->heuristic == STBRP_HEURISTIC_Skyline_BF_sortHeight) { + tail = c->active_head; + node = c->active_head; + prev = &c->active_head; + // find first node that's admissible + while (tail->x < width) + tail = tail->next; + while (tail) { + int xpos = tail->x - width; + int y,waste; + STBRP_ASSERT(xpos >= 0); + // find the left position that matches this + while (node->next->x <= xpos) { + prev = &node->next; + node = node->next; + } + STBRP_ASSERT(node->next->x > xpos && node->x <= xpos); + y = stbrp__skyline_find_min_y(c, node, xpos, width, &waste); + if (y + height <= c->height) { + if (y <= best_y) { + if (y < best_y || waste < best_waste || (waste==best_waste && xpos < best_x)) { + best_x = xpos; + STBRP_ASSERT(y <= best_y); + best_y = y; + best_waste = waste; + best = prev; + } + } + } + tail = tail->next; + } + } + + fr.prev_link = best; + fr.x = best_x; + fr.y = best_y; + return fr; +} + +static stbrp__findresult stbrp__skyline_pack_rectangle(stbrp_context *context, int width, int height) +{ + // find best position according to heuristic + stbrp__findresult res = stbrp__skyline_find_best_pos(context, width, height); + stbrp_node *node, *cur; + + // bail if: + // 1. it failed + // 2. the best node doesn't fit (we don't always check this) + // 3. we're out of memory + if (res.prev_link == NULL || res.y + height > context->height || context->free_head == NULL) { + res.prev_link = NULL; + return res; + } + + // on success, create new node + node = context->free_head; + node->x = (stbrp_coord) res.x; + node->y = (stbrp_coord) (res.y + height); + + context->free_head = node->next; + + // insert the new node into the right starting point, and + // let 'cur' point to the remaining nodes needing to be + // stiched back in + + cur = *res.prev_link; + if (cur->x < res.x) { + // preserve the existing one, so start testing with the next one + stbrp_node *next = cur->next; + cur->next = node; + cur = next; + } else { + *res.prev_link = node; + } + + // from here, traverse cur and free the nodes, until we get to one + // that shouldn't be freed + while (cur->next && cur->next->x <= res.x + width) { + stbrp_node *next = cur->next; + // move the current node to the free list + cur->next = context->free_head; + context->free_head = cur; + cur = next; + } + + // stitch the list back in + node->next = cur; + + if (cur->x < res.x + width) + cur->x = (stbrp_coord) (res.x + width); + +#ifdef _DEBUG + cur = context->active_head; + while (cur->x < context->width) { + STBRP_ASSERT(cur->x < cur->next->x); + cur = cur->next; + } + STBRP_ASSERT(cur->next == NULL); + + { + int count=0; + cur = context->active_head; + while (cur) { + cur = cur->next; + ++count; + } + cur = context->free_head; + while (cur) { + cur = cur->next; + ++count; + } + STBRP_ASSERT(count == context->num_nodes+2); + } +#endif + + return res; +} + +static int STBRP__CDECL rect_height_compare(const void *a, const void *b) +{ + const stbrp_rect *p = (const stbrp_rect *) a; + const stbrp_rect *q = (const stbrp_rect *) b; + if (p->h > q->h) + return -1; + if (p->h < q->h) + return 1; + return (p->w > q->w) ? -1 : (p->w < q->w); +} + +static int STBRP__CDECL rect_original_order(const void *a, const void *b) +{ + const stbrp_rect *p = (const stbrp_rect *) a; + const stbrp_rect *q = (const stbrp_rect *) b; + return (p->was_packed < q->was_packed) ? -1 : (p->was_packed > q->was_packed); +} + +STBRP_DEF int stbrp_pack_rects(stbrp_context *context, stbrp_rect *rects, int num_rects) +{ + int i, all_rects_packed = 1; + + // we use the 'was_packed' field internally to allow sorting/unsorting + for (i=0; i < num_rects; ++i) { + rects[i].was_packed = i; + } + + // sort according to heuristic + STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_height_compare); + + for (i=0; i < num_rects; ++i) { + if (rects[i].w == 0 || rects[i].h == 0) { + rects[i].x = rects[i].y = 0; // empty rect needs no space + } else { + stbrp__findresult fr = stbrp__skyline_pack_rectangle(context, rects[i].w, rects[i].h); + if (fr.prev_link) { + rects[i].x = (stbrp_coord) fr.x; + rects[i].y = (stbrp_coord) fr.y; + } else { + rects[i].x = rects[i].y = STBRP__MAXVAL; + } + } + } + + // unsort + STBRP_SORT(rects, num_rects, sizeof(rects[0]), rect_original_order); + + // set was_packed flags and all_rects_packed status + for (i=0; i < num_rects; ++i) { + rects[i].was_packed = !(rects[i].x == STBRP__MAXVAL && rects[i].y == STBRP__MAXVAL); + if (!rects[i].was_packed) + all_rects_packed = 0; + } + + // return the all_rects_packed status + return all_rects_packed; +} +#endif + +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/src/inc/til/color.h b/src/inc/til/color.h index e90ca88efce..cfeee352288 100644 --- a/src/inc/til/color.h +++ b/src/inc/til/color.h @@ -9,7 +9,8 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" // a number of other color types. #pragma warning(push) // we can't depend on GSL here, so we use static_cast for explicit narrowing -#pragma warning(disable : 26472) +#pragma warning(disable : 26472) // Don't use a static_cast for arithmetic conversions. Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1). +#pragma warning(disable : 26495) // Variable 'til::color::::abgr' is uninitialized. Always initialize a member variable (type.6). struct color { // Clang (10) has no trouble optimizing the COLORREF conversion operator, below, to a diff --git a/src/inc/til/generational.h b/src/inc/til/generational.h new file mode 100644 index 00000000000..d1631133251 --- /dev/null +++ b/src/inc/til/generational.h @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +namespace til // Terminal Implementation Library. Also: "Today I Learned" +{ + struct generation_t + { + auto operator<=>(const generation_t&) const = default; + + uint32_t _value = 0; + }; + + template + struct generational + { + generational() = default; + explicit constexpr generational(auto&&... args) : + _value{ std::forward(args)... } {} + explicit constexpr generational(generation_t generation, auto&&... args) : + _generation{ generation }, + _value{ std::forward(args)... } {} + + constexpr bool operator==(const generational& rhs) const noexcept { return generation() == rhs.generation(); } + constexpr bool operator!=(const generational& rhs) const noexcept { return generation() != rhs.generation(); } + + constexpr generation_t generation() const noexcept + { + return _generation; + } + + [[nodiscard]] constexpr const T* operator->() const noexcept + { + return &_value; + } + + [[nodiscard]] constexpr const T& operator*() const noexcept + { + return _value; + } + + [[nodiscard]] constexpr T* write() noexcept + { + _generation = generation_t{ _generation._value + 1u }; + return &_value; + } + + private: + generation_t _generation; + T _value; + }; +} diff --git a/src/interactivity/win32/window.hpp b/src/interactivity/win32/window.hpp index c7a53aa74cf..45db4e7fbe6 100644 --- a/src/interactivity/win32/window.hpp +++ b/src/interactivity/win32/window.hpp @@ -16,9 +16,14 @@ Author(s): #include "../inc/IConsoleWindow.hpp" -namespace Microsoft::Console::Render +namespace Microsoft::Console::Render::Atlas { class AtlasEngine; +} + +namespace Microsoft::Console::Render +{ + using AtlasEngine = Atlas::AtlasEngine; class DxEngine; class GdiEngine; } diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index 5cf54d18583..bf99768474d 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -4,6 +4,7 @@ #include "pch.h" #include "AtlasEngine.h" +#include "Backend.h" #include "../base/FontCache.h" // #### NOTE #### @@ -20,7 +21,7 @@ #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). #pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). -using namespace Microsoft::Console::Render; +using namespace Microsoft::Console::Render::Atlas; // Like gsl::narrow but returns a HRESULT. #pragma warning(push) @@ -34,7 +35,7 @@ constexpr HRESULT api_narrow(U val, T& out) noexcept #pragma warning(pop) template -constexpr HRESULT vec2_narrow(U x, U y, AtlasEngine::vec2& out) noexcept +constexpr HRESULT vec2_narrow(U x, U y, vec2& out) noexcept { return api_narrow(x, out.x) | api_narrow(y, out.y); } @@ -71,8 +72,8 @@ constexpr HRESULT vec2_narrow(U x, U y, AtlasEngine::vec2& out) noexcept [[nodiscard]] HRESULT AtlasEngine::InvalidateSystem(const til::rect* const prcDirtyClient) noexcept { - const auto top = prcDirtyClient->top / _api.fontMetrics.cellSize.y; - const auto bottom = prcDirtyClient->bottom / _api.fontMetrics.cellSize.y; + const auto top = prcDirtyClient->top / _api.s->font->cellSize.y; + const auto bottom = prcDirtyClient->bottom / _api.s->font->cellSize.y; // BeginPaint() protects against invalid out of bounds numbers. til::rect rect; @@ -118,7 +119,7 @@ constexpr HRESULT vec2_narrow(U x, U y, AtlasEngine::vec2& out) noexcept if (delta < 0) { _api.invalidatedRows.x = gsl::narrow_cast(clamp(_api.invalidatedRows.x + delta, u16min, u16max)); - _api.invalidatedRows.y = _api.cellCount.y; + _api.invalidatedRows.y = _api.s->cellCount.y; } else { @@ -145,7 +146,7 @@ constexpr HRESULT vec2_narrow(U x, U y, AtlasEngine::vec2& out) noexcept [[nodiscard]] HRESULT AtlasEngine::InvalidateTitle(const std::wstring_view proposedTitle) noexcept { - WI_SetFlag(_api.invalidations, ApiInvalidations::Title); + _api.invalidatedTitle = true; return S_OK; } @@ -169,10 +170,9 @@ constexpr HRESULT vec2_narrow(U x, U y, AtlasEngine::vec2& out) noexcept u16 newDPI; RETURN_IF_FAILED(api_narrow(dpi, newDPI)); - if (_api.dpi != newDPI) + if (_api.s->font->dpi != newDPI) { - _api.dpi = newDPI; - WI_SetFlag(_api.invalidations, ApiInvalidations::Font); + _api.s.write()->font.write()->dpi = newDPI; } return S_OK; @@ -181,13 +181,12 @@ constexpr HRESULT vec2_narrow(U x, U y, AtlasEngine::vec2& out) noexcept [[nodiscard]] HRESULT AtlasEngine::UpdateViewport(const til::inclusive_rect& srNewViewport) noexcept { const u16x2 cellCount{ - gsl::narrow_cast(std::max(1, srNewViewport.right - srNewViewport.left + 1)), - gsl::narrow_cast(std::max(1, srNewViewport.bottom - srNewViewport.top + 1)), + gsl::narrow_cast(srNewViewport.right - srNewViewport.left + 1), + gsl::narrow_cast(srNewViewport.bottom - srNewViewport.top + 1), }; - if (_api.cellCount != cellCount) + if (_api.s->cellCount != cellCount) { - _api.cellCount = cellCount; - WI_SetFlag(_api.invalidations, ApiInvalidations::Size); + _api.s.write()->cellCount = cellCount; } return S_OK; } @@ -263,8 +262,8 @@ CATCH_RETURN() [[nodiscard]] HRESULT AtlasEngine::GetFontSize(_Out_ til::size* pFontSize) noexcept { RETURN_HR_IF_NULL(E_INVALIDARG, pFontSize); - pFontSize->width = _api.fontMetrics.cellSize.x; - pFontSize->height = _api.fontMetrics.cellSize.y; + pFontSize->width = _api.s->font->cellSize.x; + pFontSize->height = _api.s->font->cellSize.y; return S_OK; } @@ -272,13 +271,26 @@ CATCH_RETURN() { RETURN_HR_IF_NULL(E_INVALIDARG, pResult); + wil::com_ptr textFormat; + RETURN_IF_FAILED(_p.dwriteFactory->CreateTextFormat( + /* fontFamilyName */ _api.s->font->fontName.c_str(), + /* fontCollection */ _api.s->font->fontCollection.get(), + /* fontWeight */ static_cast(_api.s->font->fontWeight), + /* fontStyle */ DWRITE_FONT_STYLE_NORMAL, + /* fontStretch */ DWRITE_FONT_STRETCH_NORMAL, + /* fontSize */ _api.s->font->fontSizeInDIP, + /* localeName */ L"", + /* textFormat */ textFormat.put())); + wil::com_ptr textLayout; - RETURN_IF_FAILED(_sr.dwriteFactory->CreateTextLayout(glyph.data(), gsl::narrow_cast(glyph.size()), _getTextFormat(false, false), FLT_MAX, FLT_MAX, textLayout.addressof())); + RETURN_IF_FAILED(_p.dwriteFactory->CreateTextLayout(glyph.data(), gsl::narrow_cast(glyph.size()), textFormat.get(), FLT_MAX, FLT_MAX, textLayout.addressof())); - DWRITE_TEXT_METRICS metrics; + DWRITE_TEXT_METRICS metrics{}; RETURN_IF_FAILED(textLayout->GetMetrics(&metrics)); - *pResult = static_cast(std::ceilf(metrics.width)) > _api.fontMetrics.cellSize.x; + const auto minWidth = (_api.s->font->cellSize.x * 1.2f); + const auto width = metrics.width * GetScaling(); + *pResult = width > minWidth; return S_OK; } @@ -298,31 +310,31 @@ HRESULT AtlasEngine::Enable() noexcept [[nodiscard]] std::wstring_view AtlasEngine::GetPixelShaderPath() noexcept { - return _api.customPixelShaderPath; + return _api.s->misc->customPixelShaderPath; } [[nodiscard]] bool AtlasEngine::GetRetroTerminalEffect() const noexcept { - return _api.useRetroTerminalEffect; + return _api.s->misc->useRetroTerminalEffect; } [[nodiscard]] float AtlasEngine::GetScaling() const noexcept { - return static_cast(_api.dpi) / static_cast(USER_DEFAULT_SCREEN_DPI); + return static_cast(_api.s->font->dpi) / static_cast(USER_DEFAULT_SCREEN_DPI); } [[nodiscard]] Microsoft::Console::Types::Viewport AtlasEngine::GetViewportInCharacters(const Types::Viewport& viewInPixels) const noexcept { - assert(_api.fontMetrics.cellSize.x != 0); - assert(_api.fontMetrics.cellSize.y != 0); - return Types::Viewport::FromDimensions(viewInPixels.Origin(), { viewInPixels.Width() / _api.fontMetrics.cellSize.x, viewInPixels.Height() / _api.fontMetrics.cellSize.y }); + assert(_api.s->font->cellSize.x != 0); + assert(_api.s->font->cellSize.y != 0); + return Types::Viewport::FromDimensions(viewInPixels.Origin(), { viewInPixels.Width() / _api.s->font->cellSize.x, viewInPixels.Height() / _api.s->font->cellSize.y }); } [[nodiscard]] Microsoft::Console::Types::Viewport AtlasEngine::GetViewportInPixels(const Types::Viewport& viewInCharacters) const noexcept { - assert(_api.fontMetrics.cellSize.x != 0); - assert(_api.fontMetrics.cellSize.y != 0); - return Types::Viewport::FromDimensions(viewInCharacters.Origin(), { viewInCharacters.Width() * _api.fontMetrics.cellSize.x, viewInCharacters.Height() * _api.fontMetrics.cellSize.y }); + assert(_api.s->font->cellSize.x != 0); + assert(_api.s->font->cellSize.y != 0); + return Types::Viewport::FromDimensions(viewInCharacters.Origin(), { viewInCharacters.Width() * _api.s->font->cellSize.x, viewInCharacters.Height() * _api.s->font->cellSize.y }); } void AtlasEngine::SetAntialiasingMode(const D2D1_TEXT_ANTIALIAS_MODE antialiasingMode) noexcept @@ -332,13 +344,12 @@ void AtlasEngine::SetAntialiasingMode(const D2D1_TEXT_ANTIALIAS_MODE antialiasin { _api.antialiasingMode = mode; _resolveTransparencySettings(); - WI_SetFlag(_api.invalidations, ApiInvalidations::Font); } } void AtlasEngine::SetCallback(std::function pfn) noexcept { - _api.swapChainChangedCallback = std::move(pfn); + _p.swapChainChangedCallback = std::move(pfn); } void AtlasEngine::EnableTransparentBackground(const bool isTransparent) noexcept @@ -347,7 +358,6 @@ void AtlasEngine::EnableTransparentBackground(const bool isTransparent) noexcept { _api.enableTransparentBackground = isTransparent; _resolveTransparencySettings(); - WI_SetFlag(_api.invalidations, ApiInvalidations::SwapChain); } } @@ -357,56 +367,53 @@ void AtlasEngine::SetForceFullRepaintRendering(bool enable) noexcept [[nodiscard]] HRESULT AtlasEngine::SetHwnd(const HWND hwnd) noexcept { - if (_api.hwnd != hwnd) + if (_api.s->target->hwnd != hwnd) { - _api.hwnd = hwnd; - WI_SetFlag(_api.invalidations, ApiInvalidations::SwapChain); + _api.s.write()->target.write()->hwnd = hwnd; } return S_OK; } void AtlasEngine::SetPixelShaderPath(std::wstring_view value) noexcept +try { - if (_api.customPixelShaderPath != value) + if (_api.s->misc->customPixelShaderPath != value) { - _api.customPixelShaderPath = value; + _api.s.write()->misc.write()->customPixelShaderPath = value; _resolveTransparencySettings(); - WI_SetFlag(_api.invalidations, ApiInvalidations::Device); } } +CATCH_LOG() void AtlasEngine::SetRetroTerminalEffect(bool enable) noexcept { - if (_api.useRetroTerminalEffect != enable) + if (_api.s->misc->useRetroTerminalEffect != enable) { - _api.useRetroTerminalEffect = enable; + _api.s.write()->misc.write()->useRetroTerminalEffect = enable; _resolveTransparencySettings(); - WI_SetFlag(_api.invalidations, ApiInvalidations::Device); } } void AtlasEngine::SetSelectionBackground(const COLORREF color, const float alpha) noexcept { const u32 selectionColor = (color & 0xffffff) | gsl::narrow_cast(std::lroundf(alpha * 255.0f)) << 24; - if (_api.selectionColor != selectionColor) + if (_api.s->misc->selectionColor != selectionColor) { - _api.selectionColor = selectionColor; - WI_SetFlag(_api.invalidations, ApiInvalidations::Settings); + _api.s.write()->misc.write()->selectionColor = selectionColor; } } void AtlasEngine::SetSoftwareRendering(bool enable) noexcept { - if (_api.useSoftwareRendering != enable) + if (_api.s->target->useSoftwareRendering != enable) { - _api.useSoftwareRendering = enable; - WI_SetFlag(_api.invalidations, ApiInvalidations::Device); + _api.s.write()->target.write()->useSoftwareRendering = enable; } } void AtlasEngine::SetWarningCallback(std::function pfn) noexcept { - _api.warningCallback = std::move(pfn); + _p.warningCallback = std::move(pfn); } [[nodiscard]] HRESULT AtlasEngine::SetWindowSize(const til::size pixels) noexcept @@ -418,10 +425,9 @@ void AtlasEngine::SetWarningCallback(std::function pfn) noexcept // When Win+D is pressed, `TriggerRedrawCursor` is called and a render pass is initiated. // As conhost is in the background, GetClientRect will return {0,0} and we'll get called with {0,0}. // This isn't a valid value for _api.sizeInPixel and would crash _recreateSizeDependentResources(). - if (_api.sizeInPixel != newSize && newSize != u16x2{}) + if (_api.s->targetSize != newSize && newSize != u16x2{}) { - _api.sizeInPixel = newSize; - WI_SetFlag(_api.invalidations, ApiInvalidations::Size); + _api.s.write()->targetSize = newSize; } return S_OK; @@ -467,10 +473,18 @@ void AtlasEngine::_resolveTransparencySettings() noexcept // If the user asks for ClearType, but also for a transparent background // (which our ClearType shader doesn't simultaneously support) // then we need to sneakily force the renderer to grayscale AA. - _api.realizedAntialiasingMode = _api.enableTransparentBackground && _api.antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : _api.antialiasingMode; - // An opaque background allows us to use true "independent" flips. See AtlasEngine::_createSwapChain(). - // We can't enable them if custom shaders are specified, because it's unknown, whether they support opaque inputs. - _api.backgroundOpaqueMixin = _api.enableTransparentBackground || !_api.customPixelShaderPath.empty() || _api.useRetroTerminalEffect ? 0x00000000 : 0xff000000; + const u8 antialiasingMode = _api.enableTransparentBackground && _api.antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : _api.antialiasingMode; + const bool enableTransparentBackground = _api.enableTransparentBackground || !_api.s->misc->customPixelShaderPath.empty() || _api.s->misc->useRetroTerminalEffect; + + if (antialiasingMode != _api.s->font->antialiasingMode || enableTransparentBackground != _api.s->target->enableTransparentBackground) + { + const auto s = _api.s.write(); + s->font.write()->antialiasingMode = antialiasingMode; + // An opaque background allows us to use true "independent" flips. See AtlasEngine::_createSwapChain(). + // We can't enable them if custom shaders are specified, because it's unknown, whether they support opaque inputs. + s->target.write()->enableTransparentBackground = enableTransparentBackground; + _api.backgroundOpaqueMixin = enableTransparentBackground ? 0x00000000 : 0xff000000; + } } void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, const std::unordered_map& features, const std::unordered_map& axes) @@ -521,9 +535,9 @@ void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fo // AtlasEngine::_recreateFontDependentResources() relies on these fields to // exist in this particular order in order to create appropriate default axes. - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_WEIGHT, -1.0f }); - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_ITALIC, -1.0f }); - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_SLANT, -1.0f }); + fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_WEIGHT, NAN }); + fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_ITALIC, NAN }); + fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_SLANT, NAN }); for (const auto& p : axes) { @@ -549,20 +563,13 @@ void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fo } } - const auto previousCellSize = _api.fontMetrics.cellSize; - _resolveFontMetrics(faceName, fontInfoDesired, fontInfo, &_api.fontMetrics); - _api.fontFeatures = std::move(fontFeatures); - _api.fontAxisValues = std::move(fontAxisValues); - - WI_SetFlag(_api.invalidations, ApiInvalidations::Font); - - if (previousCellSize != _api.fontMetrics.cellSize) - { - WI_SetFlag(_api.invalidations, ApiInvalidations::Size); - } + const auto font = _api.s.write()->font.write(); + _resolveFontMetrics(faceName, fontInfoDesired, fontInfo, font); + font->fontFeatures = std::move(fontFeatures); + font->fontAxisValues = std::move(fontAxisValues); } -void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, FontMetrics* fontMetrics) const +void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, FontSettings* fontMetrics) const { const auto requestedFamily = fontInfoDesired.GetFamily(); auto requestedWeight = fontInfoDesired.GetWeight(); @@ -603,7 +610,7 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo wil::com_ptr fontFace; THROW_IF_FAILED(font->CreateFontFace(fontFace.addressof())); - DWRITE_FONT_METRICS metrics; + DWRITE_FONT_METRICS metrics{}; fontFace->GetMetrics(&metrics); // According to Wikipedia: @@ -615,28 +622,29 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo u16 glyphIndex; THROW_IF_FAILED(fontFace->GetGlyphIndicesW(&codePoint, 1, &glyphIndex)); - DWRITE_GLYPH_METRICS glyphMetrics; + DWRITE_GLYPH_METRICS glyphMetrics{}; THROW_IF_FAILED(fontFace->GetDesignGlyphMetrics(&glyphIndex, 1, &glyphMetrics)); // Point sizes are commonly treated at a 72 DPI scale // (including by OpenType), whereas DirectWrite uses 96 DPI. // Since we want the height in px we multiply by the display's DPI. + const auto dpi = static_cast(_api.s->font->dpi); const auto fontSizeInDIP = fontSize / 72.0f * 96.0f; - const auto fontSizeInPx = fontSize / 72.0f * _api.dpi; - - const auto designUnitsPerPx = fontSizeInPx / static_cast(metrics.designUnitsPerEm); - const auto ascent = static_cast(metrics.ascent) * designUnitsPerPx; - const auto descent = static_cast(metrics.descent) * designUnitsPerPx; - const auto lineGap = static_cast(metrics.lineGap) * designUnitsPerPx; - const auto underlinePosition = static_cast(-metrics.underlinePosition) * designUnitsPerPx; - const auto underlineThickness = static_cast(metrics.underlineThickness) * designUnitsPerPx; - const auto strikethroughPosition = static_cast(-metrics.strikethroughPosition) * designUnitsPerPx; - const auto strikethroughThickness = static_cast(metrics.strikethroughThickness) * designUnitsPerPx; - const auto advanceWidth = static_cast(glyphMetrics.advanceWidth) * designUnitsPerPx; + const auto fontSizeInPx = fontSize / 72.0f * dpi; + + const auto designUnitsPerPx = fontSizeInPx / static_cast(metrics.designUnitsPerEm); + const auto ascent = static_cast(metrics.ascent) * designUnitsPerPx; + const auto descent = static_cast(metrics.descent) * designUnitsPerPx; + const auto lineGap = static_cast(metrics.lineGap) * designUnitsPerPx; + const auto underlinePosition = static_cast(-metrics.underlinePosition) * designUnitsPerPx; + const auto underlineThickness = static_cast(metrics.underlineThickness) * designUnitsPerPx; + const auto strikethroughPosition = static_cast(-metrics.strikethroughPosition) * designUnitsPerPx; + const auto strikethroughThickness = static_cast(metrics.strikethroughThickness) * designUnitsPerPx; + const auto advanceWidth = static_cast(glyphMetrics.advanceWidth) * designUnitsPerPx; const auto advanceHeight = ascent + descent + lineGap; - auto adjustedWidth = std::roundf(fontInfoDesired.GetCellWidth().Resolve(advanceWidth, _api.dpi, fontSizeInPx, advanceWidth)); - auto adjustedHeight = std::roundf(fontInfoDesired.GetCellHeight().Resolve(advanceHeight, _api.dpi, fontSizeInPx, advanceWidth)); + auto adjustedWidth = std::roundf(fontInfoDesired.GetCellWidth().Resolve(advanceWidth, dpi, fontSizeInPx, advanceWidth)); + auto adjustedHeight = std::roundf(fontInfoDesired.GetCellHeight().Resolve(advanceHeight, dpi, fontSizeInPx, advanceWidth)); // Protection against bad user values in GetCellWidth/Y. // AtlasEngine fails hard with 0 cell sizes. @@ -667,7 +675,7 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo doubleUnderlinePosTop = std::max(doubleUnderlinePosTop, baseline + thinLineWidth); // 5. The gap is only the distance _between_ the lines, but we need the distance from the // top border of the top and bottom lines, which includes an additional line width. - const auto doubleUnderlineGap = std::max(1.0f, std::roundf(1.2f / 72.0f * _api.dpi)); + const auto doubleUnderlineGap = std::max(1.0f, std::roundf(1.2f / 72.0f * dpi)); doubleUnderlinePosBottom = std::max(doubleUnderlinePosBottom, doubleUnderlinePosTop + doubleUnderlineGap + thinLineWidth); // Our cells can't overlap each other so we additionally clamp the bottom line to be inside the cell boundaries. doubleUnderlinePosBottom = std::min(doubleUnderlinePosBottom, adjustedHeight - thinLineWidth); @@ -709,8 +717,8 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo fontMetrics->fontCollection = std::move(fontCollection); fontMetrics->fontFamily = std::move(fontFamily); fontMetrics->fontName = std::move(fontName); + fontMetrics->baselineInDIP = baseline / dpi * 96.0f; fontMetrics->fontSizeInDIP = fontSizeInDIP; - fontMetrics->baselineInDIP = baseline / static_cast(_api.dpi) * 96.0f; fontMetrics->advanceScale = cellWidth / advanceWidth; fontMetrics->cellSize = { cellWidth, cellHeight }; fontMetrics->fontWeight = fontWeightU16; diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index d6f1562cead..33b4878c5f0 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -4,11 +4,7 @@ #include "pch.h" #include "AtlasEngine.h" -#include -#include -#include -#include - +#include "Backend.h" #include "../../interactivity/win32/CustomWindowMessages.h" // #### NOTE #### @@ -25,44 +21,27 @@ #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). #pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). -using namespace Microsoft::Console::Render; +using namespace Microsoft::Console::Render::Atlas; #pragma warning(suppress : 26455) // Default constructor may not throw. Declare it 'noexcept' (f.6). AtlasEngine::AtlasEngine() { #ifdef NDEBUG - THROW_IF_FAILED(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, _sr.d2dFactory.addressof())); + THROW_IF_FAILED(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(_p.d2dFactory), nullptr, _p.d2dFactory.put_void())); #else - static constexpr D2D1_FACTORY_OPTIONS options{ D2D1_DEBUG_LEVEL_INFORMATION }; - THROW_IF_FAILED(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, options, _sr.d2dFactory.addressof())); + static constexpr D2D1_FACTORY_OPTIONS options{ .debugLevel = D2D1_DEBUG_LEVEL_INFORMATION }; + THROW_IF_FAILED(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(_p.d2dFactory), &options, _p.d2dFactory.put_void())); #endif - THROW_IF_FAILED(DWriteCreateFactory(DWRITE_FACTORY_TYPE_SHARED, __uuidof(_sr.dwriteFactory), reinterpret_cast<::IUnknown**>(_sr.dwriteFactory.addressof()))); - if (const auto factory2 = _sr.dwriteFactory.try_query()) - { - THROW_IF_FAILED(factory2->GetSystemFontFallback(_sr.systemFontFallback.addressof())); - } - { - wil::com_ptr textAnalyzer; - THROW_IF_FAILED(_sr.dwriteFactory->CreateTextAnalyzer(textAnalyzer.addressof())); - _sr.textAnalyzer = textAnalyzer.query(); - } + THROW_IF_FAILED(DWriteCreateFactory(DWRITE_FACTORY_TYPE_SHARED, __uuidof(_p.dwriteFactory), reinterpret_cast<::IUnknown**>(_p.dwriteFactory.addressof()))); + _p.dwriteFactory4 = _p.dwriteFactory.try_query(); - _sr.isWindows10OrGreater = IsWindows10OrGreater(); + THROW_IF_FAILED(_p.dwriteFactory->GetSystemFontFallback(_p.systemFontFallback.addressof())); + _p.systemFontFallback = _p.systemFontFallback.try_query(); -#ifndef NDEBUG - { - _sr.sourceDirectory = std::filesystem::path{ __FILE__ }.parent_path(); - _sr.sourceCodeWatcher = wil::make_folder_change_reader_nothrow(_sr.sourceDirectory.c_str(), false, wil::FolderChangeEvents::FileName | wil::FolderChangeEvents::LastWriteTime, [this](wil::FolderChangeEvent, PCWSTR path) { - if (til::ends_with(path, L".hlsl")) - { - auto expected = INT64_MAX; - const auto invalidationTime = std::chrono::steady_clock::now() + std::chrono::milliseconds(100); - _sr.sourceCodeInvalidationTime.compare_exchange_strong(expected, invalidationTime.time_since_epoch().count(), std::memory_order_relaxed); - } - }); - } -#endif + wil::com_ptr textAnalyzer; + THROW_IF_FAILED(_p.dwriteFactory->CreateTextAnalyzer(textAnalyzer.addressof())); + _p.textAnalyzer = textAnalyzer.query(); } #pragma region IRenderEngine @@ -72,103 +51,25 @@ AtlasEngine::AtlasEngine() [[nodiscard]] HRESULT AtlasEngine::StartPaint() noexcept try { - if (_api.hwnd) + if (const auto hwnd = _api.s->target->hwnd) { RECT rect; - LOG_IF_WIN32_BOOL_FALSE(GetClientRect(_api.hwnd, &rect)); + LOG_IF_WIN32_BOOL_FALSE(GetClientRect(hwnd, &rect)); std::ignore = SetWindowSize({ rect.right - rect.left, rect.bottom - rect.top }); - if (WI_IsFlagSet(_api.invalidations, ApiInvalidations::Title)) + if (_api.invalidatedTitle) { - LOG_IF_WIN32_BOOL_FALSE(PostMessageW(_api.hwnd, CM_UPDATE_TITLE, 0, 0)); - WI_ClearFlag(_api.invalidations, ApiInvalidations::Title); + LOG_IF_WIN32_BOOL_FALSE(PostMessageW(hwnd, CM_UPDATE_TITLE, 0, 0)); + _api.invalidatedTitle = false; } } - // It's important that we invalidate here instead of in Present() with the rest. - // Other functions, those called before Present(), might depend on _r fields. - // But most of the time _invalidations will be ::none, making this very cheap. - if (_api.invalidations != ApiInvalidations::None) + if (_p.s != _api.s) { - RETURN_HR_IF(E_UNEXPECTED, _api.cellCount == u16x2{}); - - if (WI_IsFlagSet(_api.invalidations, ApiInvalidations::Device)) - { - _createResources(); - } - if (WI_IsFlagSet(_api.invalidations, ApiInvalidations::SwapChain)) - { - _createSwapChain(); - } - if (WI_IsFlagSet(_api.invalidations, ApiInvalidations::Size)) - { - _recreateSizeDependentResources(); - } - if (WI_IsFlagSet(_api.invalidations, ApiInvalidations::Font)) - { - _recreateFontDependentResources(); - } - if (WI_IsFlagSet(_api.invalidations, ApiInvalidations::Settings)) - { - _r.selectionColor = _api.selectionColor; - WI_SetFlag(_r.invalidations, RenderInvalidations::ConstBuffer); - WI_ClearFlag(_api.invalidations, ApiInvalidations::Settings); - } - - // Equivalent to InvalidateAll(). - _api.invalidatedRows = invalidatedRowsAll; + _handleSettingsUpdate(); } -#ifndef NDEBUG - if (const auto invalidationTime = _sr.sourceCodeInvalidationTime.load(std::memory_order_relaxed); invalidationTime != INT64_MAX && invalidationTime <= std::chrono::steady_clock::now().time_since_epoch().count()) - { - _sr.sourceCodeInvalidationTime.store(INT64_MAX, std::memory_order_relaxed); - - try - { - static const auto compile = [](const std::filesystem::path& path, const char* target) { - wil::com_ptr error; - wil::com_ptr blob; - const auto hr = D3DCompileFromFile( - /* pFileName */ path.c_str(), - /* pDefines */ nullptr, - /* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE, - /* pEntrypoint */ "main", - /* pTarget */ target, - /* Flags1 */ D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS, - /* Flags2 */ 0, - /* ppCode */ blob.addressof(), - /* ppErrorMsgs */ error.addressof()); - - if (error) - { - std::thread t{ [error = std::move(error)]() noexcept { - MessageBoxA(nullptr, static_cast(error->GetBufferPointer()), "Compilation error", MB_ICONERROR | MB_OK); - } }; - t.detach(); - } - - THROW_IF_FAILED(hr); - return blob; - }; - - const auto vs = compile(_sr.sourceDirectory / L"shader_vs.hlsl", "vs_4_1"); - const auto ps = compile(_sr.sourceDirectory / L"shader_ps.hlsl", "ps_4_1"); - - THROW_IF_FAILED(_r.device->CreateVertexShader(vs->GetBufferPointer(), vs->GetBufferSize(), nullptr, _r.vertexShader.put())); - THROW_IF_FAILED(_r.device->CreatePixelShader(ps->GetBufferPointer(), ps->GetBufferSize(), nullptr, _r.pixelShader.put())); - _setShaderResources(); - } - CATCH_LOG() - } -#endif - - if constexpr (debugGlyphGenerationPerformance) - { - _r.glyphs = {}; - _r.tileAllocator = TileAllocator{ _api.fontMetrics.cellSize, _api.sizeInPixel }; - } - if constexpr (debugTextParsingPerformance) + if constexpr (debugDisablePartialInvalidation) { _api.invalidatedRows = invalidatedRowsAll; _api.scrollOffset = 0; @@ -176,32 +77,32 @@ try // Clamp invalidation rects into valid value ranges. { - _api.invalidatedCursorArea.left = std::min(_api.invalidatedCursorArea.left, _api.cellCount.x); - _api.invalidatedCursorArea.top = std::min(_api.invalidatedCursorArea.top, _api.cellCount.y); - _api.invalidatedCursorArea.right = clamp(_api.invalidatedCursorArea.right, _api.invalidatedCursorArea.left, _api.cellCount.x); - _api.invalidatedCursorArea.bottom = clamp(_api.invalidatedCursorArea.bottom, _api.invalidatedCursorArea.top, _api.cellCount.y); + _api.invalidatedCursorArea.left = std::min(_api.invalidatedCursorArea.left, _p.s->cellCount.x); + _api.invalidatedCursorArea.top = std::min(_api.invalidatedCursorArea.top, _p.s->cellCount.y); + _api.invalidatedCursorArea.right = clamp(_api.invalidatedCursorArea.right, _api.invalidatedCursorArea.left, _p.s->cellCount.x); + _api.invalidatedCursorArea.bottom = clamp(_api.invalidatedCursorArea.bottom, _api.invalidatedCursorArea.top, _p.s->cellCount.y); } { - _api.invalidatedRows.x = std::min(_api.invalidatedRows.x, _api.cellCount.y); - _api.invalidatedRows.y = clamp(_api.invalidatedRows.y, _api.invalidatedRows.x, _api.cellCount.y); + _api.invalidatedRows.x = std::min(_api.invalidatedRows.x, _p.s->cellCount.y); + _api.invalidatedRows.y = clamp(_api.invalidatedRows.y, _api.invalidatedRows.x, _p.s->cellCount.y); } { - const auto limit = gsl::narrow_cast(_api.cellCount.y & 0x7fff); + const auto limit = gsl::narrow_cast(_p.s->cellCount.y & 0x7fff); _api.scrollOffset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); } // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". - if (_api.scrollOffset != 0) + if (const auto offset = _api.scrollOffset) { const auto nothingInvalid = _api.invalidatedRows.x == _api.invalidatedRows.y; - const auto offset = static_cast(_api.scrollOffset) * _api.cellCount.x; + const auto deltaPx = offset * _p.s->font->cellSize.y; - if (_api.scrollOffset < 0) + if (offset < 0) { // Scroll up (for instance when new text is being written at the end of the buffer). - const u16 endRow = _api.cellCount.y + _api.scrollOffset; + const u16 endRow = _p.s->cellCount.y + offset; _api.invalidatedRows.x = nothingInvalid ? endRow : std::min(_api.invalidatedRows.x, endRow); - _api.invalidatedRows.y = _api.cellCount.y; + _api.invalidatedRows.y = _p.s->cellCount.y; // scrollOffset/offset = -1 // +----------+ +----------+ @@ -209,22 +110,22 @@ try // | xxxxxxxxx| -> |xxxxxxx | + src | < beg - offset // |xxxxxxx | | | | v // +----------+ +----------+ v < end + const auto dest = _p.rows.begin(); + const auto last = _p.rows.end(); + const auto first = dest - offset; + const auto end = std::move(first, last, dest); + + for (auto it = dest; it != end; ++it) { - const auto beg = _r.cells.begin(); - const auto end = _r.cells.end(); - std::move(beg - offset, end, beg); - } - { - const auto beg = _r.cellGlyphMapping.begin(); - const auto end = _r.cellGlyphMapping.end(); - std::move(beg - offset, end, beg); + it->top += deltaPx; + it->bottom += deltaPx; } } else { // Scroll down. _api.invalidatedRows.x = 0; - _api.invalidatedRows.y = nothingInvalid ? _api.scrollOffset : std::max(_api.invalidatedRows.y, _api.scrollOffset); + _api.invalidatedRows.y = nothingInvalid ? offset : std::max(_api.invalidatedRows.y, offset); // scrollOffset/offset = 1 // +----------+ +----------+ @@ -232,70 +133,48 @@ try // |xxxxxxx | -> | xxxxxxxxx| | ^ // | | |xxxxxxx | v | < end - offset // +----------+ +----------+ + dst < end + const auto first = _p.rows.begin(); + const auto dest = _p.rows.end(); + const auto last = dest - offset; + const auto beg = std::move_backward(first, last, dest); + + for (auto it = beg; it != dest; ++it) { - const auto beg = _r.cells.begin(); - const auto end = _r.cells.end(); - std::move_backward(beg, end - offset, end); - } - { - const auto beg = _r.cellGlyphMapping.begin(); - const auto end = _r.cellGlyphMapping.end(); - std::move_backward(beg, end - offset, end); + it->top += deltaPx; + it->bottom += deltaPx; } } - } - - _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _api.cellCount.x, _api.invalidatedRows.y }; - _r.dirtyRect = _api.dirtyRect; - _r.scrollOffset = _api.scrollOffset; - // Clear the previous cursor. PaintCursor() is only called if the cursor is on. - if (const auto r = _api.invalidatedCursorArea; r.non_empty()) - { - _setCellFlags(r, CellFlags::Cursor, CellFlags::None); - _r.dirtyRect |= til::rect{ r.left, r.top, r.right, r.bottom }; + // Scrolling the background bitmap is a lot easier because we can rely on memmove which works + // with both forwards and backwards copying. It's a mystery why the STL doesn't have this. + { + const auto width = _p.s->cellCount.x; + const auto beg = _p.backgroundBitmap.begin(); + const auto end = _p.backgroundBitmap.end(); + const auto dst = beg + std::max(0, offset) * width; + const auto src = beg - std::min(0, offset) * width; + const auto count = end - std::max(src, dst); + assert(dst >= beg && dst + count <= end); + assert(src >= beg && src + count <= end); + memmove(dst, src, count * sizeof(u32)); + } } - // This is an important block of code for our TileHashMap. - // We only process glyphs within the dirtyRect, but glyphs outside of the - // dirtyRect are still in use and shouldn't be discarded. This is critical - // if someone uses a tool like tmux to split the terminal horizontally. - // If they then print a lot of Unicode text on just one side, we have to - // ensure that the (for example) plain ASCII glyphs on the other half of the - // viewport are still retained. This bit of code "refreshes" those glyphs and - // brings them to the front of the LRU queue to prevent them from being reused. + for (auto y = _api.invalidatedRows.x; y < _api.invalidatedRows.y; ++y) { - const std::array ranges{ { - { 0, _api.dirtyRect.top }, - { _api.dirtyRect.bottom, _api.cellCount.y }, - } }; - const auto stride = static_cast(_r.cellCount.x); + _p.rows[y].clear(y, _p.s->font->cellSize.y); + } - for (const auto& p : ranges) - { - // We (ab)use the .x/.y members of the til::point as the - // respective [from,to) range of rows we need to makeNewest(). - const auto from = p.x; - const auto to = p.y; + assert(_p.s->cellCount.x * _p.s->font->cellSize.x <= _p.s->targetSize.x); + assert(_p.s->cellCount.y * _p.s->font->cellSize.y <= _p.s->targetSize.y); - for (auto y = from; y < to; ++y) - { - auto it = _r.cellGlyphMapping.data() + stride * y; - const auto end = it + stride; - for (; it != end; ++it) - { - _r.glyphs.makeNewest(*it); - } - } - } - } + _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _p.s->cellCount.x, _api.invalidatedRows.y }; + _p.dirtyRect = _api.dirtyRect; + _p.cursorRect = {}; + _p.scrollOffset = _api.scrollOffset; return S_OK; } -catch (const wil::ResultException& exception) -{ - return _handleException(exception); -} CATCH_RETURN() [[nodiscard]] HRESULT AtlasEngine::EndPaint() noexcept @@ -345,7 +224,7 @@ CATCH_RETURN() [[nodiscard]] HRESULT AtlasEngine::PaintBufferLine(std::span clusters, til::point coord, const bool fTrimLeft, const bool lineWrapped) noexcept try { - const auto y = gsl::narrow_cast(clamp(coord.y, 0, _api.cellCount.y)); + const auto y = gsl::narrow_cast(clamp(coord.y, 0, _p.s->cellCount.y)); if (_api.lastPaintBufferLineCoord.y != y) { @@ -359,30 +238,7 @@ try _api.bufferLineColumn.pop_back(); } - // `TextBuffer` is buggy and allows a `Trailing` `DbcsAttribute` to be written - // into the first column. Since other code then blindly assumes that there's a - // preceding `Leading` character, we'll get called with a X coordinate of -1. - // - // This block can be removed after GH#13626 is merged. - if (coord.x < 0) - { - size_t offset = 0; - for (const auto& cluster : clusters) - { - offset++; - coord.x += cluster.GetColumns(); - if (coord.x >= 0) - { - _api.bufferLine.insert(_api.bufferLine.end(), coord.x, L' '); - _api.bufferLineColumn.insert(_api.bufferLineColumn.end(), coord.x, 0u); - break; - } - } - - clusters = clusters.subspan(offset); - } - - const auto x = gsl::narrow_cast(clamp(coord.x, 0, _api.cellCount.x)); + const auto x = gsl::narrow_cast(clamp(coord.x, 0, _p.s->cellCount.x)); // Due to the current IRenderEngine interface (that wasn't refactored yet) we need to assemble // the current buffer line first as the remaining function operates on whole lines of text. @@ -401,13 +257,11 @@ try _api.bufferLineColumn.emplace_back(column); - const BufferLineMetadata metadata{ _api.currentColor, _api.flags }; - FAIL_FAST_IF(column > _api.bufferLineMetadata.size()); - std::fill_n(_api.bufferLineMetadata.data() + x, column - x, metadata); + std::fill(_api.colorsForeground.begin() + x, _api.colorsForeground.begin() + column, _api.currentColor.x); + std::fill_n(_p.backgroundBitmap.begin() + (static_cast(y) * _p.s->cellCount.x + x), column - x, _api.currentColor.y); } _api.lastPaintBufferLineCoord = { x, y }; - _api.bufferLineWasHyperlinked = false; return S_OK; } @@ -416,16 +270,13 @@ CATCH_RETURN() [[nodiscard]] HRESULT AtlasEngine::PaintBufferGridLines(const GridLineSet lines, const COLORREF color, const size_t cchLine, const til::point coordTarget) noexcept try { - if (!_api.bufferLineWasHyperlinked && lines.test(GridLines::Underline) && WI_IsFlagClear(_api.flags, CellFlags::Underline)) - { - _api.bufferLineWasHyperlinked = true; - - WI_UpdateFlagsInMask(_api.flags, CellFlags::Underline | CellFlags::UnderlineDotted | CellFlags::UnderlineDouble, CellFlags::Underline); + const auto y = gsl::narrow_cast(clamp(coordTarget.y, 0, _p.s->cellCount.y)); + const auto from = gsl::narrow_cast(clamp(coordTarget.x, 0, _p.s->cellCount.x - 1)); + const auto to = gsl::narrow_cast(clamp(coordTarget.x + cchLine, from, _p.s->cellCount.x)); + const auto fg = gsl::narrow_cast(color) | 0xff000000; + auto& row = _p.rows[y]; - const BufferLineMetadata metadata{ _api.currentColor, _api.flags }; - const size_t x = _api.lastPaintBufferLineCoord.x; - std::fill_n(_api.bufferLineMetadata.data() + x, _api.bufferLineMetadata.size() - x, metadata); - } + row.gridLineRanges.emplace_back(lines, fg, from, to); return S_OK; } CATCH_RETURN() @@ -438,14 +289,14 @@ try // As such we got to call _flushBufferLine() here just to be sure. _flushBufferLine(); - const u16r u16rect{ - rect.narrow_left(), - rect.narrow_top(), - rect.narrow_right(), - rect.narrow_bottom(), - }; - _setCellFlags(u16rect, CellFlags::Selected, CellFlags::Selected); - _r.dirtyRect |= rect; + const auto y = gsl::narrow_cast(clamp(rect.top, 0, _p.s->cellCount.y)); + const auto from = gsl::narrow_cast(clamp(rect.left, 0, _p.s->cellCount.x - 1)); + const auto to = gsl::narrow_cast(clamp(rect.right, from, _p.s->cellCount.x)); + auto& row = _p.rows[y]; + + row.selectionFrom = from; + row.selectionTo = to; + _p.dirtyRect |= rect; return S_OK; } CATCH_RETURN() @@ -459,30 +310,36 @@ try _flushBufferLine(); { - const CachedCursorOptions cachedOptions{ - gsl::narrow_cast(options.fUseColor ? options.cursorColor | 0xff000000 : INVALID_COLOR), - gsl::narrow_cast(options.cursorType), - gsl::narrow_cast(options.ulCursorHeightPercent), + const CursorSettings cachedOptions{ + .cursorColor = gsl::narrow_cast(options.fUseColor ? options.cursorColor | 0xff000000 : INVALID_COLOR), + .cursorType = gsl::narrow_cast(options.cursorType), + .heightPercentage = gsl::narrow_cast(options.ulCursorHeightPercent), }; - if (_r.cursorOptions != cachedOptions) + if (*_api.s->cursor != cachedOptions) { - _r.cursorOptions = cachedOptions; - WI_SetFlag(_r.invalidations, RenderInvalidations::Cursor); + *_api.s.write()->cursor.write() = cachedOptions; + *_p.s.write()->cursor.write() = cachedOptions; } } + // Clear the previous cursor + if (const auto r = _api.invalidatedCursorArea; r.non_empty()) + { + _p.dirtyRect |= til::rect{ r.left, r.top, r.right, r.bottom }; + } + if (options.isOn) { const auto point = options.coordCursor; // TODO: options.coordCursor can contain invalid out of bounds coordinates when // the window is being resized and the cursor is on the last line of the viewport. - const auto x = gsl::narrow_cast(clamp(point.x, 0, _r.cellCount.x - 1)); - const auto y = gsl::narrow_cast(clamp(point.y, 0, _r.cellCount.y - 1)); + const auto x = gsl::narrow_cast(clamp(point.x, 0, _p.s->cellCount.x - 1)); + const auto y = gsl::narrow_cast(clamp(point.y, 0, _p.s->cellCount.y - 1)); const auto cursorWidth = 1 + (options.fIsDoubleWidth & (options.cursorType != CursorType::VerticalBar)); - const auto right = gsl::narrow_cast(clamp(x + cursorWidth, 0, _r.cellCount.x - 0)); + const auto right = gsl::narrow_cast(clamp(x + cursorWidth, 0, _p.s->cellCount.x - 0)); const auto bottom = gsl::narrow_cast(y + 1); - _setCellFlags({ x, y, right, bottom }, CellFlags::Cursor, CellFlags::Cursor); - _r.dirtyRect |= til::rect{ x, y, right, bottom }; + _p.cursorRect = { x, y, right, bottom }; + _p.dirtyRect |= til::rect{ x, y, right, bottom }; } return S_OK; @@ -498,26 +355,11 @@ try if (!isSettingDefaultBrushes) { - const auto hyperlinkId = textAttributes.GetHyperlinkId(); - - auto flags = CellFlags::None; - WI_SetFlagIf(flags, CellFlags::BorderLeft, textAttributes.IsLeftVerticalDisplayed()); - WI_SetFlagIf(flags, CellFlags::BorderTop, textAttributes.IsTopHorizontalDisplayed()); - WI_SetFlagIf(flags, CellFlags::BorderRight, textAttributes.IsRightVerticalDisplayed()); - WI_SetFlagIf(flags, CellFlags::BorderBottom, textAttributes.IsBottomHorizontalDisplayed()); - WI_SetFlagIf(flags, CellFlags::Underline, textAttributes.IsUnderlined()); - WI_SetFlagIf(flags, CellFlags::UnderlineDotted, hyperlinkId != 0); - WI_SetFlagIf(flags, CellFlags::UnderlineDouble, textAttributes.IsDoublyUnderlined()); - WI_SetFlagIf(flags, CellFlags::Strikethrough, textAttributes.IsCrossedOut()); - - if (_api.hyperlinkHoveredId && _api.hyperlinkHoveredId == hyperlinkId) - { - WI_SetFlag(flags, CellFlags::Underline); - WI_ClearAllFlags(flags, CellFlags::UnderlineDotted | CellFlags::UnderlineDouble); - } - const u32x2 newColors{ gsl::narrow_cast(fg), gsl::narrow_cast(bg) }; - const AtlasKeyAttributes attributes{ 0, textAttributes.IsIntense() && renderSettings.GetRenderMode(RenderSettings::Mode::IntenseIsBold), textAttributes.IsItalic(), 0 }; + const AtlasKeyAttributes attributes{ + .bold = textAttributes.IsIntense() && renderSettings.GetRenderMode(RenderSettings::Mode::IntenseIsBold), + .italic = textAttributes.IsItalic() + }; if (_api.attributes != attributes) { @@ -526,12 +368,11 @@ try _api.currentColor = newColors; _api.attributes = attributes; - _api.flags = flags; } - else if (textAttributes.BackgroundIsDefault() && bg != _r.backgroundColor) + else if (textAttributes.BackgroundIsDefault() && bg != _api.s->misc->backgroundColor) { - _r.backgroundColor = bg; - WI_SetFlag(_r.invalidations, RenderInvalidations::ConstBuffer); + _api.s.write()->misc.write()->backgroundColor = bg; + _p.s.write()->misc.write()->backgroundColor = bg; } return S_OK; @@ -540,704 +381,90 @@ CATCH_RETURN() #pragma endregion -[[nodiscard]] HRESULT AtlasEngine::_handleException(const wil::ResultException& exception) noexcept +void AtlasEngine::_handleSettingsUpdate() { - const auto hr = exception.GetErrorCode(); - if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET || hr == D2DERR_RECREATE_TARGET) - { - WI_SetFlag(_api.invalidations, ApiInvalidations::Device); - return E_PENDING; // Indicate a retry to the renderer - } + const auto targetChanged = _p.s->target != _api.s->target; + const auto fontChanged = _p.s->font != _api.s->font; + const auto cellCountChanged = _p.s->cellCount != _api.s->cellCount; - // NOTE: This isn't thread safe, as _handleException is called by AtlasEngine.r.cpp. - // However it's not too much of a concern at the moment as SetWarningCallback() - // is only called once during construction in practice. - if (_api.warningCallback) - { - try - { - _api.warningCallback(hr); - } - CATCH_LOG() - } + _p.s = _api.s; - return hr; -} - -void AtlasEngine::_createResources() -{ - _releaseSwapChain(); - _r = {}; - -#ifdef NDEBUG - static constexpr -#endif - auto deviceFlags = D3D11_CREATE_DEVICE_SINGLETHREADED | D3D11_CREATE_DEVICE_BGRA_SUPPORT; - -#ifndef NDEBUG - // DXGI debug messages + enabling D3D11_CREATE_DEVICE_DEBUG if the Windows SDK was installed. - if (const wil::unique_hmodule module{ LoadLibraryExW(L"dxgi.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32) }) + if (targetChanged) { - deviceFlags |= D3D11_CREATE_DEVICE_DEBUG; - - if (const auto DXGIGetDebugInterface1 = GetProcAddressByFunctionDeclaration(module.get(), DXGIGetDebugInterface1)) - { - if (wil::com_ptr infoQueue; SUCCEEDED(DXGIGetDebugInterface1(0, IID_PPV_ARGS(infoQueue.addressof())))) - { - // I didn't want to link with dxguid.lib just for getting DXGI_DEBUG_ALL. This GUID is publicly documented. - static constexpr GUID dxgiDebugAll = { 0xe48ae283, 0xda80, 0x490b, { 0x87, 0xe6, 0x43, 0xe9, 0xa9, 0xcf, 0xda, 0x8 } }; - for (const auto severity : std::array{ DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING }) - { - infoQueue->SetBreakOnSeverity(dxgiDebugAll, severity, true); - } - } - - if (wil::com_ptr debug; SUCCEEDED(DXGIGetDebugInterface1(0, IID_PPV_ARGS(debug.addressof())))) - { - debug->EnableLeakTrackingForThread(); - } - } + _b.reset(); } -#endif // NDEBUG - - // D3D device setup (basically a D3D class factory) + if (fontChanged) { - wil::com_ptr deviceContext; - - static constexpr std::array featureLevels{ - D3D_FEATURE_LEVEL_11_1, - D3D_FEATURE_LEVEL_11_0, - D3D_FEATURE_LEVEL_10_1, - D3D_FEATURE_LEVEL_10_0, - D3D_FEATURE_LEVEL_9_3, - D3D_FEATURE_LEVEL_9_2, - D3D_FEATURE_LEVEL_9_1, - }; - - auto hr = E_UNEXPECTED; - - if (!_api.useSoftwareRendering) - { - // Why D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS: - // This flag prevents the driver from creating a large thread pool for things like shader computations - // that would be advantageous for games. For us this has only a minimal performance benefit, - // but comes with a large memory usage overhead. At the time of writing the Nvidia - // driver launches $cpu_thread_count more worker threads without this flag. - hr = D3D11CreateDevice( - /* pAdapter */ nullptr, - /* DriverType */ D3D_DRIVER_TYPE_HARDWARE, - /* Software */ nullptr, - /* Flags */ deviceFlags | D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS, - /* pFeatureLevels */ featureLevels.data(), - /* FeatureLevels */ gsl::narrow_cast(featureLevels.size()), - /* SDKVersion */ D3D11_SDK_VERSION, - /* ppDevice */ _r.device.put(), - /* pFeatureLevel */ nullptr, - /* ppImmediateContext */ deviceContext.put()); - } - if (FAILED(hr)) - { - hr = D3D11CreateDevice( - /* pAdapter */ nullptr, - /* DriverType */ D3D_DRIVER_TYPE_WARP, - /* Software */ nullptr, - /* Flags */ deviceFlags, - /* pFeatureLevels */ featureLevels.data(), - /* FeatureLevels */ gsl::narrow_cast(featureLevels.size()), - /* SDKVersion */ D3D11_SDK_VERSION, - /* ppDevice */ _r.device.put(), - /* pFeatureLevel */ nullptr, - /* ppImmediateContext */ deviceContext.put()); - } - THROW_IF_FAILED(hr); - - _r.deviceContext = deviceContext.query(); + _recreateFontDependentResources(); } - -#ifndef NDEBUG - // D3D debug messages - if (deviceFlags & D3D11_CREATE_DEVICE_DEBUG) + if (cellCountChanged) { - const auto infoQueue = _r.device.query(); - for (const auto severity : std::array{ D3D11_MESSAGE_SEVERITY_CORRUPTION, D3D11_MESSAGE_SEVERITY_ERROR, D3D11_MESSAGE_SEVERITY_WARNING }) - { - infoQueue->SetBreakOnSeverity(severity, true); - } - } -#endif // NDEBUG - - { - wil::com_ptr dxgiAdapter; - THROW_IF_FAILED(_r.device.query()->GetParent(__uuidof(dxgiAdapter), dxgiAdapter.put_void())); - THROW_IF_FAILED(dxgiAdapter->GetParent(__uuidof(_r.dxgiFactory), _r.dxgiFactory.put_void())); - - DXGI_ADAPTER_DESC1 desc; - THROW_IF_FAILED(dxgiAdapter->GetDesc1(&desc)); - _r.d2dMode = debugForceD2DMode || WI_IsAnyFlagSet(desc.Flags, DXGI_ADAPTER_FLAG_REMOTE | DXGI_ADAPTER_FLAG_SOFTWARE); + _recreateCellCountDependentResources(); } - const auto featureLevel = _r.device->GetFeatureLevel(); - - if (featureLevel < D3D_FEATURE_LEVEL_10_0) - { - _r.d2dMode = true; - } - else if (featureLevel < D3D_FEATURE_LEVEL_11_0) - { - D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS options; - THROW_IF_FAILED(_r.device->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &options, sizeof(options))); - if (!options.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x) - { - _r.d2dMode = true; - } - } - - if (!_r.d2dMode) - { - // Our constant buffer will never get resized - { - D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = sizeof(ConstBuffer); - desc.Usage = D3D11_USAGE_DEFAULT; - desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.constantBuffer.put())); - } - - THROW_IF_FAILED(_r.device->CreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _r.vertexShader.put())); - THROW_IF_FAILED(_r.device->CreatePixelShader(&shader_ps[0], sizeof(shader_ps), nullptr, _r.pixelShader.put())); - - if (!_api.customPixelShaderPath.empty()) - { - const char* target = nullptr; - switch (featureLevel) - { - case D3D_FEATURE_LEVEL_10_0: - target = "ps_4_0"; - break; - case D3D_FEATURE_LEVEL_10_1: - target = "ps_4_1"; - break; - default: - target = "ps_5_0"; - break; - } - - static constexpr auto flags = - D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR -#ifdef NDEBUG - | D3DCOMPILE_OPTIMIZATION_LEVEL3; -#else - // Only enable strictness and warnings in DEBUG mode - // as these settings makes it very difficult to develop - // shaders as windows terminal is not telling the user - // what's wrong, windows terminal just fails. - // Keep it in DEBUG mode to catch errors in shaders - // shipped with windows terminal - | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS | D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; -#endif - - wil::com_ptr error; - wil::com_ptr blob; - const auto hr = D3DCompileFromFile( - /* pFileName */ _api.customPixelShaderPath.c_str(), - /* pDefines */ nullptr, - /* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE, - /* pEntrypoint */ "main", - /* pTarget */ target, - /* Flags1 */ flags, - /* Flags2 */ 0, - /* ppCode */ blob.addressof(), - /* ppErrorMsgs */ error.addressof()); - - // Unless we can determine otherwise, assume this shader requires evaluation every frame - _r.requiresContinuousRedraw = true; - - if (SUCCEEDED(hr)) - { - THROW_IF_FAILED(_r.device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, _r.customPixelShader.put())); - - // Try to determine whether the shader uses the Time variable - wil::com_ptr reflector; - if (SUCCEEDED_LOG(D3DReflect(blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(reflector.put())))) - { - if (ID3D11ShaderReflectionConstantBuffer* constantBufferReflector = reflector->GetConstantBufferByIndex(0)) // shader buffer - { - if (ID3D11ShaderReflectionVariable* variableReflector = constantBufferReflector->GetVariableByIndex(0)) // time - { - D3D11_SHADER_VARIABLE_DESC variableDescriptor; - if (SUCCEEDED_LOG(variableReflector->GetDesc(&variableDescriptor))) - { - // only if time is used - _r.requiresContinuousRedraw = WI_IsFlagSet(variableDescriptor.uFlags, D3D_SVF_USED); - } - } - } - } - } - else - { - if (error) - { - LOG_HR_MSG(hr, "%*hs", error->GetBufferSize(), error->GetBufferPointer()); - } - else - { - LOG_HR(hr); - } - if (_api.warningCallback) - { - _api.warningCallback(D2DERR_SHADER_COMPILE_FAILED); - } - } - } - else if (_api.useRetroTerminalEffect) - { - THROW_IF_FAILED(_r.device->CreatePixelShader(&custom_shader_ps[0], sizeof(custom_shader_ps), nullptr, _r.customPixelShader.put())); - // We know the built-in retro shader doesn't require continuous redraw. - _r.requiresContinuousRedraw = false; - } - - if (_r.customPixelShader) - { - THROW_IF_FAILED(_r.device->CreateVertexShader(&custom_shader_vs[0], sizeof(custom_shader_vs), nullptr, _r.customVertexShader.put())); - - { - D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = sizeof(CustomConstBuffer); - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.customShaderConstantBuffer.put())); - } - - { - D3D11_SAMPLER_DESC desc{}; - desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - desc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; - desc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; - desc.AddressW = D3D11_TEXTURE_ADDRESS_BORDER; - desc.MaxAnisotropy = 1; - desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; - desc.MaxLOD = D3D11_FLOAT32_MAX; - THROW_IF_FAILED(_r.device->CreateSamplerState(&desc, _r.customShaderSamplerState.put())); - } - - _r.customShaderStartTime = std::chrono::steady_clock::now(); - } - } - - WI_ClearFlag(_api.invalidations, ApiInvalidations::Device); - WI_SetAllFlags(_api.invalidations, ApiInvalidations::SwapChain); -} - -void AtlasEngine::_releaseSwapChain() -{ - // Flush() docs: - // However, if an application must actually destroy an old swap chain and create a new swap chain, - // the application must force the destruction of all objects that the application freed. - // To force the destruction, call ID3D11DeviceContext::ClearState (or otherwise ensure - // no views are bound to pipeline state), and then call Flush on the immediate context. - if (_r.swapChain && _r.deviceContext) - { - if (_r.d2dMode) - { - _r.d2dRenderTarget.reset(); - } - _r.frameLatencyWaitableObject.reset(); - _r.swapChain.reset(); - _r.renderTargetView.reset(); - _r.deviceContext->ClearState(); - _r.deviceContext->Flush(); - } -} - -void AtlasEngine::_createSwapChain() -{ - _releaseSwapChain(); - - // D3D swap chain setup (the thing that allows us to present frames on the screen) - { - // With C++20 we'll finally have designated initializers. - DXGI_SWAP_CHAIN_DESC1 desc{}; - desc.Width = _api.sizeInPixel.x; - desc.Height = _api.sizeInPixel.y; - desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; - desc.SampleDesc.Count = 1; - desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - // Sometimes up to 2 buffers are locked, for instance during screen capture or when moving the window. - // 3 buffers seems to guarantee a stable framerate at display frequency at all times. - desc.BufferCount = 3; - desc.Scaling = DXGI_SCALING_NONE; - // DXGI_SWAP_EFFECT_FLIP_DISCARD is a mode that was created at a time were display drivers - // lacked support for Multiplane Overlays (MPO) and were copying buffers was expensive. - // This allowed DWM to quickly draw overlays (like gamebars) on top of rendered content. - // With faster GPU memory in general and with support for MPO in particular this isn't - // really an advantage anymore. Instead DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL allows for a - // more "intelligent" composition and display updates to occur like Panel Self Refresh - // (PSR) which requires dirty rectangles (Present1 API) to work correctly. - desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; - // If our background is opaque we can enable "independent" flips by setting DXGI_ALPHA_MODE_IGNORE. - // As our swap chain won't have to compose with DWM anymore it reduces the display latency dramatically. - desc.AlphaMode = _api.backgroundOpaqueMixin ? DXGI_ALPHA_MODE_IGNORE : DXGI_ALPHA_MODE_PREMULTIPLIED; - desc.Flags = debugGeneralPerformance ? 0 : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; - - wil::com_ptr dxgiFactory; - THROW_IF_FAILED(CreateDXGIFactory1(IID_PPV_ARGS(dxgiFactory.addressof()))); - - if (_api.hwnd) - { - THROW_IF_FAILED(dxgiFactory->CreateSwapChainForHwnd(_r.device.get(), _api.hwnd, &desc, nullptr, nullptr, _r.swapChain.put())); - } - else - { - const wil::unique_hmodule module{ LoadLibraryExW(L"dcomp.dll", nullptr, LOAD_LIBRARY_SEARCH_SYSTEM32) }; - THROW_LAST_ERROR_IF(!module); - const auto DCompositionCreateSurfaceHandle = GetProcAddressByFunctionDeclaration(module.get(), DCompositionCreateSurfaceHandle); - THROW_LAST_ERROR_IF(!DCompositionCreateSurfaceHandle); - - // As per: https://docs.microsoft.com/en-us/windows/win32/api/dcomp/nf-dcomp-dcompositioncreatesurfacehandle - static constexpr DWORD COMPOSITIONSURFACE_ALL_ACCESS = 0x0003L; - THROW_IF_FAILED(DCompositionCreateSurfaceHandle(COMPOSITIONSURFACE_ALL_ACCESS, nullptr, _api.swapChainHandle.put())); - THROW_IF_FAILED(dxgiFactory.query()->CreateSwapChainForCompositionSurfaceHandle(_r.device.get(), _api.swapChainHandle.get(), &desc, nullptr, _r.swapChain.put())); - } - - if constexpr (!debugGeneralPerformance) - { - const auto swapChain2 = _r.swapChain.query(); - _r.frameLatencyWaitableObject.reset(swapChain2->GetFrameLatencyWaitableObject()); - THROW_LAST_ERROR_IF(!_r.frameLatencyWaitableObject); - } - } - - // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: - // > For every frame it renders, the app should wait on this handle before starting any rendering operations. - // > Note that this requirement includes the first frame the app renders with the swap chain. - _r.waitForPresentation = true; - WaitUntilCanRender(); - - if (_api.swapChainChangedCallback) - { - try - { - _api.swapChainChangedCallback(_api.swapChainHandle.get()); - } - CATCH_LOG(); - } - - WI_ClearFlag(_api.invalidations, ApiInvalidations::SwapChain); - WI_SetAllFlags(_api.invalidations, ApiInvalidations::Size | ApiInvalidations::Font); -} - -void AtlasEngine::_recreateSizeDependentResources() -{ - // ResizeBuffer() docs: - // Before you call ResizeBuffers, ensure that the application releases all references [...]. - // You can use ID3D11DeviceContext::ClearState to ensure that all [internal] references are released. - // The _r.cells check exists simply to prevent us from calling ResizeBuffers() on startup (i.e. when `_r` is empty). - if (_r.cells) - { - if (_r.d2dMode) - { - _r.d2dRenderTarget.reset(); - } - _r.renderTargetView.reset(); - _r.deviceContext->ClearState(); - _r.deviceContext->Flush(); - THROW_IF_FAILED(_r.swapChain->ResizeBuffers(0, _api.sizeInPixel.x, _api.sizeInPixel.y, DXGI_FORMAT_UNKNOWN, debugGeneralPerformance ? 0 : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT)); - } - - const auto totalCellCount = static_cast(_api.cellCount.x) * static_cast(_api.cellCount.y); - const auto resize = _api.cellCount != _r.cellCount; - - if (resize) - { - // Let's guess that every cell consists of a surrogate pair. - const auto projectedTextSize = static_cast(_api.cellCount.x) * 2; - // IDWriteTextAnalyzer::GetGlyphs says: - // The recommended estimate for the per-glyph output buffers is (3 * textLength / 2 + 16). - const auto projectedGlyphSize = 3 * projectedTextSize / 2 + 16; - - // This buffer is a bit larger than the others (multiple MB). - // Prevent a memory usage spike, by first deallocating and then allocating. - _r.cells = {}; - _r.cellGlyphMapping = {}; - // Our render loop heavily relies on memcpy() which is between 1.5x - // and 40x faster for allocations with an alignment of 32 or greater. - // (40x on AMD Zen1-3, which have a rep movsb performance issue. MSFT:33358259.) - _r.cells = Buffer{ totalCellCount }; - _r.cellGlyphMapping = Buffer{ totalCellCount }; - _r.cellCount = _api.cellCount; - _r.tileAllocator.setMaxArea(_api.sizeInPixel); - - // .clear() doesn't free the memory of these buffers. - // This code allows them to shrink again. - _api.bufferLine = {}; - _api.bufferLine.reserve(projectedTextSize); - _api.bufferLineColumn.reserve(projectedTextSize + 1); - _api.bufferLineMetadata = Buffer{ _api.cellCount.x }; - _api.analysisResults = {}; - - _api.clusterMap = Buffer{ projectedTextSize }; - _api.textProps = Buffer{ projectedTextSize }; - _api.glyphIndices = Buffer{ projectedGlyphSize }; - _api.glyphProps = Buffer{ projectedGlyphSize }; - _api.glyphAdvances = Buffer{ projectedGlyphSize }; - _api.glyphOffsets = Buffer{ projectedGlyphSize }; - - // Initialize cellGlyphMapping with valid data (whitespace), so that it can be - // safely used by the TileHashMap refresh logic via makeNewest() in StartPaint(). - { - u16x2* coords{}; - AtlasKey key{ { .cellCount = 1 }, 1, L" " }; - AtlasValue value{ CellFlags::None, 1, &coords }; - - coords[0] = _r.tileAllocator.allocate(_r.glyphs); - - const auto it = _r.glyphs.insert(std::move(key), std::move(value)); - _r.glyphQueue.emplace_back(it); - - std::fill(_r.cellGlyphMapping.begin(), _r.cellGlyphMapping.end(), it); - } - } - - if (!_r.d2dMode) - { - // The RenderTargetView is later used with OMSetRenderTargets - // to tell D3D where stuff is supposed to be rendered at. - { - wil::com_ptr buffer; - THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); - THROW_IF_FAILED(_r.device->CreateRenderTargetView(buffer.get(), nullptr, _r.renderTargetView.put())); - } - if (_r.customPixelShader) - { - D3D11_TEXTURE2D_DESC desc{}; - desc.Width = _api.sizeInPixel.x; - desc.Height = _api.sizeInPixel.y; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; - desc.SampleDesc = { 1, 0 }; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; - THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, _r.customOffscreenTexture.addressof())); - THROW_IF_FAILED(_r.device->CreateShaderResourceView(_r.customOffscreenTexture.get(), nullptr, _r.customOffscreenTextureView.addressof())); - THROW_IF_FAILED(_r.device->CreateRenderTargetView(_r.customOffscreenTexture.get(), nullptr, _r.customOffscreenTextureTargetView.addressof())); - } - - // Tell D3D which parts of the render target will be visible. - // Everything outside of the viewport will be black. - { - D3D11_VIEWPORT viewport{}; - viewport.Width = static_cast(_api.sizeInPixel.x); - viewport.Height = static_cast(_api.sizeInPixel.y); - _r.deviceContext->RSSetViewports(1, &viewport); - } - - if (resize) - { - D3D11_BUFFER_DESC desc; - desc.ByteWidth = gsl::narrow(totalCellCount * sizeof(Cell)); // totalCellCount can theoretically be UINT32_MAX! - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; - desc.StructureByteStride = sizeof(Cell); - THROW_IF_FAILED(_r.device->CreateBuffer(&desc, nullptr, _r.cellBuffer.put())); - THROW_IF_FAILED(_r.device->CreateShaderResourceView(_r.cellBuffer.get(), nullptr, _r.cellView.put())); - } - - // We have called _r.deviceContext->ClearState() in the beginning and lost all D3D state. - // This forces us to set up everything up from scratch again. - _setShaderResources(); - } - - WI_ClearFlag(_api.invalidations, ApiInvalidations::Size); - WI_SetAllFlags(_r.invalidations, RenderInvalidations::ConstBuffer); + _api.invalidatedRows = invalidatedRowsAll; } void AtlasEngine::_recreateFontDependentResources() { - { - // We're likely resizing the atlas anyways and can - // thus also release any of these buffers prematurely. - _r.d2dRenderTarget.reset(); // depends on _r.atlasBuffer - _r.atlasView.reset(); - _r.atlasBuffer.reset(); - } - - // D3D - { - const auto scaling = GetScaling(); - - _r.cellSizeDIP.x = static_cast(_api.fontMetrics.cellSize.x) / scaling; - _r.cellSizeDIP.y = static_cast(_api.fontMetrics.cellSize.y) / scaling; - _r.cellCount = _api.cellCount; - _r.dpi = _api.dpi; - _r.fontMetrics = _api.fontMetrics; - _r.dipPerPixel = static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(_r.dpi); - _r.pixelPerDIP = static_cast(_r.dpi) / static_cast(USER_DEFAULT_SCREEN_DPI); - _r.atlasSizeInPixel = { 0, 0 }; - _r.tileAllocator = TileAllocator{ _api.fontMetrics.cellSize, _api.sizeInPixel }; - - _r.glyphs = {}; - _r.glyphQueue = {}; - _r.glyphQueue.reserve(64); - } - // D3D specifically for UpdateDpi() - // This compensates for the built in scaling factor in a XAML SwapChainPanel (CompositionScaleX/Y). - if (!_api.hwnd) - { - if (const auto swapChain2 = _r.swapChain.try_query()) - { - const auto inverseScale = static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(_api.dpi); - DXGI_MATRIX_3X2_F matrix{}; - matrix._11 = inverseScale; - matrix._22 = inverseScale; - THROW_IF_FAILED(swapChain2->SetMatrixTransform(&matrix)); - } - } + _p.d.font.dipPerPixel = static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(_p.s->font->dpi); + _p.d.font.pixelPerDIP = static_cast(_p.s->font->dpi) / static_cast(USER_DEFAULT_SCREEN_DPI); + _p.d.font.cellSizeDIP.x = static_cast(_p.s->font->cellSize.x) * _p.d.font.dipPerPixel; + _p.d.font.cellSizeDIP.y = static_cast(_p.s->font->cellSize.y) * _p.d.font.dipPerPixel; - // D2D + if (!_p.s->font->fontAxisValues.empty()) { // See AtlasEngine::UpdateFont. // It hardcodes indices 0/1/2 in fontAxisValues to the weight/italic/slant axes. - // If they're -1.0f they haven't been set by the user and must be filled by us. + // If they're NAN they haven't been set by the user and must be filled by us. // When we call SetFontAxisValues() we basically override (disable) DirectWrite's internal font axes, // and if either of the 3 aren't set we'd make it impossible for the user to see bold/italic text. -#pragma warning(suppress : 26494) // Variable 'standardAxes' is uninitialized. Always initialize an object (type.5). - std::array standardAxes; - - if (!_api.fontAxisValues.empty()) - { - Expects(_api.fontAxisValues.size() >= standardAxes.size()); - memcpy(standardAxes.data(), _api.fontAxisValues.data(), sizeof(standardAxes)); - } - - const auto restoreFontAxisValues = wil::scope_exit([&]() noexcept { - if (!_api.fontAxisValues.empty()) - { - memcpy(_api.fontAxisValues.data(), standardAxes.data(), sizeof(standardAxes)); - } - }); + const auto& standardAxes = _p.s->font->fontAxisValues; + auto fontAxisValues = _p.s->font->fontAxisValues; for (auto italic = 0; italic < 2; ++italic) { for (auto bold = 0; bold < 2; ++bold) { - const auto fontWeight = bold ? DWRITE_FONT_WEIGHT_BOLD : static_cast(_api.fontMetrics.fontWeight); - const auto fontStyle = italic ? DWRITE_FONT_STYLE_ITALIC : DWRITE_FONT_STYLE_NORMAL; - auto& textFormat = _r.textFormats[italic][bold]; - - wil::com_ptr font; - THROW_IF_FAILED(_r.fontMetrics.fontFamily->GetFirstMatchingFont(fontWeight, DWRITE_FONT_STRETCH_NORMAL, fontStyle, font.addressof())); - THROW_IF_FAILED(font->CreateFontFace(_r.fontFaces[italic << 1 | bold].put())); - - THROW_IF_FAILED(_sr.dwriteFactory->CreateTextFormat(_api.fontMetrics.fontName.c_str(), _api.fontMetrics.fontCollection.get(), fontWeight, fontStyle, DWRITE_FONT_STRETCH_NORMAL, _api.fontMetrics.fontSizeInDIP, L"", textFormat.put())); - THROW_IF_FAILED(textFormat->SetWordWrapping(DWRITE_WORD_WRAPPING_NO_WRAP)); - - // DWRITE_LINE_SPACING_METHOD_UNIFORM: - // > Lines are explicitly set to uniform spacing, regardless of contained font sizes. - // > This can be useful to avoid the uneven appearance that can occur from font fallback. - // We want that. Otherwise fallback fonts might be rendered with an incorrect baseline and get cut off vertically. - THROW_IF_FAILED(textFormat->SetLineSpacing(DWRITE_LINE_SPACING_METHOD_UNIFORM, _r.cellSizeDIP.y, _api.fontMetrics.baselineInDIP)); - - // NOTE: SetTextAlignment(DWRITE_TEXT_ALIGNMENT_CENTER) breaks certain - // bitmap fonts which expect glyphs to be laid out left-aligned. - - // NOTE: SetAutomaticFontAxes(DWRITE_AUTOMATIC_FONT_AXES_OPTICAL_SIZE) breaks certain - // fonts making them look fairly unslightly. With no option to easily disable this - // feature in Windows Terminal, it's better left disabled by default. - - const DWRITE_LINE_SPACING lineSpacing{ - .method = DWRITE_LINE_SPACING_METHOD_UNIFORM, - .height = _r.cellSizeDIP.y, - .baseline = _api.fontMetrics.baselineInDIP, - .fontLineGapUsage = DWRITE_FONT_LINE_GAP_USAGE_ENABLED, - }; - THROW_IF_FAILED(textFormat.query()->SetLineSpacing(&lineSpacing)); - - if (!_api.fontAxisValues.empty()) - { - if (const auto textFormat3 = textFormat.try_query()) - { - // The wght axis defaults to the font weight. - _api.fontAxisValues[0].value = bold || standardAxes[0].value == -1.0f ? static_cast(fontWeight) : standardAxes[0].value; - // The ital axis defaults to 1 if this is italic and 0 otherwise. - _api.fontAxisValues[1].value = italic ? 1.0f : (standardAxes[1].value == -1.0f ? 0.0f : standardAxes[1].value); - // The slnt axis defaults to -12 if this is italic and 0 otherwise. - _api.fontAxisValues[2].value = italic ? -12.0f : (standardAxes[2].value == -1.0f ? 0.0f : standardAxes[2].value); - - THROW_IF_FAILED(textFormat3->SetFontAxisValues(_api.fontAxisValues.data(), gsl::narrow_cast(_api.fontAxisValues.size()))); - _r.textFormatAxes[italic][bold] = { _api.fontAxisValues.data(), _api.fontAxisValues.size() }; - } - } + // The wght axis defaults to the font weight. + fontAxisValues[0].value = bold ? DWRITE_FONT_WEIGHT_BOLD : (isnan(standardAxes[0].value) ? static_cast(_p.s->font->fontWeight) : standardAxes[0].value); + // The ital axis defaults to 1 if this is italic and 0 otherwise. + fontAxisValues[1].value = italic ? 1.0f : (isnan(standardAxes[1].value) ? 0.0f : standardAxes[1].value); + // The slnt axis defaults to -12 if this is italic and 0 otherwise. + fontAxisValues[2].value = italic ? -12.0f : (isnan(standardAxes[2].value) ? 0.0f : standardAxes[2].value); + _p.d.font.textFormatAxes[italic][bold] = { fontAxisValues.data(), fontAxisValues.size() }; } } } - { - _r.typography.reset(); - - if (!_api.fontFeatures.empty()) - { - _sr.dwriteFactory->CreateTypography(_r.typography.addressof()); - for (const auto& v : _api.fontFeatures) - { - THROW_IF_FAILED(_r.typography->AddFontFeature(v)); - } - } - } - - WI_ClearFlag(_api.invalidations, ApiInvalidations::Font); - WI_SetAllFlags(_r.invalidations, RenderInvalidations::Cursor | RenderInvalidations::ConstBuffer); -} - -IDWriteTextFormat* AtlasEngine::_getTextFormat(bool bold, bool italic) const noexcept -{ - return _r.textFormats[italic][bold].get(); -} - -const AtlasEngine::Buffer& AtlasEngine::_getTextFormatAxis(bool bold, bool italic) const noexcept -{ - return _r.textFormatAxes[italic][bold]; -} -AtlasEngine::Cell* AtlasEngine::_getCell(u16 x, u16 y) noexcept -{ - assert(x < _r.cellCount.x); - assert(y < _r.cellCount.y); - return _r.cells.data() + static_cast(_r.cellCount.x) * y + x; + _api.replacementCharacterFontFace.reset(); + _api.replacementCharacterGlyphIndex = 0; + _api.replacementCharacterLookedUp = false; } -AtlasEngine::TileHashMap::iterator* AtlasEngine::_getCellGlyphMapping(u16 x, u16 y) noexcept +void AtlasEngine::_recreateCellCountDependentResources() { - assert(x < _r.cellCount.x); - assert(y < _r.cellCount.y); - return _r.cellGlyphMapping.data() + static_cast(_r.cellCount.x) * y + x; -} - -void AtlasEngine::_setCellFlags(u16r coords, CellFlags mask, CellFlags bits) noexcept -{ - assert(coords.left <= coords.right); - assert(coords.top <= coords.bottom); - assert(coords.right <= _r.cellCount.x); - assert(coords.bottom <= _r.cellCount.y); - - const auto filter = ~mask; - const auto width = static_cast(coords.right) - coords.left; - const auto height = static_cast(coords.bottom) - coords.top; - const auto stride = static_cast(_r.cellCount.x); - auto row = _r.cells.data() + static_cast(_r.cellCount.x) * coords.top + coords.left; - const auto end = row + height * stride; - - for (; row != end; row += stride) - { - const auto dataEnd = row + width; - for (auto data = row; data != dataEnd; ++data) - { - const auto current = data->flags; - data->flags = (current & filter) | bits; - } - } + // Let's guess that every cell consists of a surrogate pair. + const auto projectedTextSize = static_cast(_p.s->cellCount.x) * 2; + // IDWriteTextAnalyzer::GetGlyphs says: + // The recommended estimate for the per-glyph output buffers is (3 * textLength / 2 + 16). + const auto projectedGlyphSize = 3 * projectedTextSize / 2 + 16; + + _api.bufferLine = std::vector{}; + _api.bufferLine.reserve(projectedTextSize); + _api.bufferLineColumn.reserve(projectedTextSize + 1); + _api.colorsForeground = Buffer(_p.s->cellCount.x); + + _api.analysisResults = std::vector{}; + _api.clusterMap = Buffer{ projectedTextSize }; + _api.textProps = Buffer{ projectedTextSize }; + _api.glyphIndices = Buffer{ projectedGlyphSize }; + _api.glyphProps = Buffer{ projectedGlyphSize }; + _api.glyphAdvances = Buffer{ projectedGlyphSize }; + _api.glyphOffsets = Buffer{ projectedGlyphSize }; + + _p.rows = Buffer(_p.s->cellCount.y); + _p.backgroundBitmap = Buffer(static_cast(_p.s->cellCount.x) * _p.s->cellCount.y); } void AtlasEngine::_flushBufferLine() @@ -1255,398 +482,297 @@ void AtlasEngine::_flushBufferLine() // This would seriously blow us up otherwise. Expects(_api.bufferLineColumn.size() == _api.bufferLine.size() + 1); - // GH#13962: With the lack of proper LineRendition support, just fill - // the remaining columns with whitespace to prevent any weird artifacts. - for (auto lastColumn = _api.bufferLineColumn.back(); lastColumn < _api.cellCount.x;) - { - ++lastColumn; - _api.bufferLine.emplace_back(L' '); - _api.bufferLineColumn.emplace_back(lastColumn); - } - - // NOTE: - // This entire function is one huge hack to see if it works. - - // UH OH UNICODE MADNESS AHEAD - // - // # What do we want? - // - // Segment a line of text (_api.bufferLine) into unicode "clusters". - // Each cluster is one "whole" glyph with diacritics, ligatures, zero width joiners - // and whatever else, that should be cached as a whole in our texture atlas. - // - // # How do we get that? - // - // ## The unfortunate preface - // - // DirectWrite can be "reluctant" to segment text into clusters and I found no API which offers simply that. - // What it offers are a large number of low level APIs that can sort of be used in combination to do this. - // The resulting text parsing is very slow unfortunately, consuming up to 95% of rendering time in extreme cases. - // - // ## The actual approach - // - // DirectWrite has 2 APIs which can segment text properly (including ligatures and zero width joiners): - // * IDWriteTextAnalyzer1::GetTextComplexity - // * IDWriteTextAnalyzer::GetGlyphs - // - // Both APIs require us to attain an IDWriteFontFace as the functions themselves don't handle font fallback. - // This forces us to call IDWriteFontFallback::MapCharacters first. - // - // Additionally IDWriteTextAnalyzer::GetGlyphs requires an instance of DWRITE_SCRIPT_ANALYSIS, - // which can only be attained by running IDWriteTextAnalyzer::AnalyzeScript first. - // - // Font fallback with IDWriteFontFallback::MapCharacters is very slow. - - const auto textFormat = _getTextFormat(_api.attributes.bold, _api.attributes.italic); - const auto& textFormatAxis = _getTextFormatAxis(_api.attributes.bold, _api.attributes.italic); - - TextAnalysisSource analysisSource{ _api.bufferLine.data(), gsl::narrow(_api.bufferLine.size()) }; - TextAnalysisSink analysisSink{ _api.analysisResults }; - - wil::com_ptr fontCollection; - THROW_IF_FAILED(textFormat->GetFontCollection(fontCollection.addressof())); + auto& row = _p.rows[_api.lastPaintBufferLineCoord.y]; wil::com_ptr mappedFontFace; #pragma warning(suppress : 26494) // Variable 'mappedEnd' is uninitialized. Always initialize an object (type.5). for (u32 idx = 0, mappedEnd; idx < _api.bufferLine.size(); idx = mappedEnd) { - if (_sr.systemFontFallback) - { - auto scale = 1.0f; - u32 mappedLength = 0; + f32 scale = 1; + u32 mappedLength = 0; + _mapCharacters(_api.bufferLine.data() + idx, gsl::narrow_cast(_api.bufferLine.size()) - idx, &mappedLength, &scale, mappedFontFace.put()); + mappedEnd = idx + mappedLength; - if (textFormatAxis) - { - wil::com_ptr fontFace5; - THROW_IF_FAILED(_sr.systemFontFallback.query()->MapCharacters( - /* analysisSource */ &analysisSource, - /* textPosition */ idx, - /* textLength */ gsl::narrow_cast(_api.bufferLine.size()) - idx, - /* baseFontCollection */ fontCollection.get(), - /* baseFamilyName */ _api.fontMetrics.fontName.c_str(), - /* fontAxisValues */ textFormatAxis.data(), - /* fontAxisValueCount */ gsl::narrow_cast(textFormatAxis.size()), - /* mappedLength */ &mappedLength, - /* scale */ &scale, - /* mappedFontFace */ fontFace5.put())); - mappedFontFace = std::move(fontFace5); - } - else - { - const auto baseWeight = _api.attributes.bold ? DWRITE_FONT_WEIGHT_BOLD : static_cast(_api.fontMetrics.fontWeight); - const auto baseStyle = _api.attributes.italic ? DWRITE_FONT_STYLE_ITALIC : DWRITE_FONT_STYLE_NORMAL; - wil::com_ptr font; - - THROW_IF_FAILED(_sr.systemFontFallback->MapCharacters( - /* analysisSource */ &analysisSource, - /* textPosition */ idx, - /* textLength */ gsl::narrow_cast(_api.bufferLine.size()) - idx, - /* baseFontCollection */ fontCollection.get(), - /* baseFamilyName */ _api.fontMetrics.fontName.c_str(), - /* baseWeight */ baseWeight, - /* baseStyle */ baseStyle, - /* baseStretch */ DWRITE_FONT_STRETCH_NORMAL, - /* mappedLength */ &mappedLength, - /* mappedFont */ font.addressof(), - /* scale */ &scale)); - - mappedFontFace.reset(); - if (font) - { - THROW_IF_FAILED(font->CreateFontFace(mappedFontFace.addressof())); - } - } - - mappedEnd = idx + mappedLength; - - if (!mappedFontFace) - { - // Task: Replace all characters in this range with unicode replacement characters. - // Input (where "n" is a narrow and "ww" is a wide character): - // _api.bufferLine = "nwwnnw" - // _api.bufferLineColumn = {0, 1, 1, 3, 4, 5, 5, 6} - // n w w n n w w - // Solution: - // Iterate through bufferLineColumn until the value changes, because this indicates we passed over a - // complete (narrow or wide) cell. To do so we'll use col1 (previous column) and col2 (next column). - // Then we emit a replacement character by telling _emplaceGlyph that this range has no font face. - auto pos1 = idx; - auto col1 = _api.bufferLineColumn[pos1]; - for (auto pos2 = idx + 1; pos2 <= mappedEnd; ++pos2) - { - if (const auto col2 = _api.bufferLineColumn[pos2]; col1 != col2) - { - _emplaceGlyph(nullptr, pos1, pos2); - pos1 = pos2; - col1 = col2; - } - } - - continue; - } - } - else + if (!mappedFontFace) { - if (!mappedFontFace) - { - const auto baseWeight = _api.attributes.bold ? DWRITE_FONT_WEIGHT_BOLD : static_cast(_api.fontMetrics.fontWeight); - const auto baseStyle = _api.attributes.italic ? DWRITE_FONT_STYLE_ITALIC : DWRITE_FONT_STYLE_NORMAL; - - wil::com_ptr fontFamily; - THROW_IF_FAILED(fontCollection->GetFontFamily(0, fontFamily.addressof())); + _mapReplacementCharacter(idx, mappedEnd, row); + continue; + } - wil::com_ptr font; - THROW_IF_FAILED(fontFamily->GetFirstMatchingFont(baseWeight, DWRITE_FONT_STRETCH_NORMAL, baseStyle, font.addressof())); + const auto initialIndicesCount = row.glyphIndices.size(); - THROW_IF_FAILED(font->CreateFontFace(mappedFontFace.put())); - } - - mappedEnd = gsl::narrow_cast(_api.bufferLine.size()); + if (mappedLength > _api.glyphIndices.size()) + { + auto size = _api.glyphIndices.size(); + size = size + (size >> 1); + size = std::max(size, mappedLength); + Expects(size > _api.glyphIndices.size()); + _api.glyphIndices = Buffer{ size }; + _api.glyphProps = Buffer{ size }; } // We can reuse idx here, as it'll be reset to "idx = mappedEnd" in the outer loop anyways. for (u32 complexityLength = 0; idx < mappedEnd; idx += complexityLength) { BOOL isTextSimple; - THROW_IF_FAILED(_sr.textAnalyzer->GetTextComplexity(_api.bufferLine.data() + idx, mappedEnd - idx, mappedFontFace.get(), &isTextSimple, &complexityLength, _api.glyphIndices.data())); + THROW_IF_FAILED(_p.textAnalyzer->GetTextComplexity(_api.bufferLine.data() + idx, mappedEnd - idx, mappedFontFace.get(), &isTextSimple, &complexityLength, _api.glyphIndices.data())); +#pragma warning(suppress : 4127) if (isTextSimple) { - size_t beg = 0; for (size_t i = 0; i < complexityLength; ++i) { - if (_emplaceGlyph(mappedFontFace.get(), idx + beg, idx + i + 1)) - { - beg = i + 1; - } + const auto col1 = _api.bufferLineColumn[idx + i + 0]; + const auto fg = _api.colorsForeground[col1]; + const auto col2 = _api.bufferLineColumn[idx + i + 1]; + const auto glyphAdvance = (col2 - col1) * _p.d.font.cellSizeDIP.x; + row.glyphIndices.emplace_back(_api.glyphIndices[i]); + row.glyphAdvances.emplace_back(glyphAdvance); + row.glyphOffsets.emplace_back(); + row.colors.emplace_back(fg); } } else { - _api.analysisResults.clear(); - THROW_IF_FAILED(_sr.textAnalyzer->AnalyzeScript(&analysisSource, idx, complexityLength, &analysisSink)); - //_sr.textAnalyzer->AnalyzeBidi(&atlasAnalyzer, idx, complexityLength, &atlasAnalyzer); + _mapComplex(mappedFontFace.get(), idx, complexityLength, row); + } + } - for (const auto& a : _api.analysisResults) - { - DWRITE_SCRIPT_ANALYSIS scriptAnalysis{ a.script, static_cast(a.shapes) }; - u32 actualGlyphCount = 0; + const auto indicesCount = row.glyphIndices.size(); + if (indicesCount > initialIndicesCount) + { + row.mappings.emplace_back(std::move(mappedFontFace), _p.s->font->fontSizeInDIP * scale, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); + } + } +} -#pragma warning(push) -#pragma warning(disable : 26494) // Variable '...' is uninitialized. Always initialize an object (type.5). - // None of these variables need to be initialized. - // features/featureRangeLengths are marked _In_reads_opt_(featureRanges). - // featureRanges is only > 0 when we also initialize all these variables. - DWRITE_TYPOGRAPHIC_FEATURES feature; - const DWRITE_TYPOGRAPHIC_FEATURES* features; - u32 featureRangeLengths; -#pragma warning(pop) - u32 featureRanges = 0; - - if (!_api.fontFeatures.empty()) - { - feature.features = _api.fontFeatures.data(); - feature.featureCount = gsl::narrow_cast(_api.fontFeatures.size()); - features = &feature; - featureRangeLengths = a.textLength; - featureRanges = 1; - } - - if (_api.clusterMap.size() < a.textLength) - { - _api.clusterMap = Buffer{ a.textLength }; - _api.textProps = Buffer{ a.textLength }; - } - - for (auto retry = 0;;) - { - const auto hr = _sr.textAnalyzer->GetGlyphs( - /* textString */ _api.bufferLine.data() + a.textPosition, - /* textLength */ a.textLength, - /* fontFace */ mappedFontFace.get(), - /* isSideways */ false, - /* isRightToLeft */ a.bidiLevel & 1, - /* scriptAnalysis */ &scriptAnalysis, - /* localeName */ nullptr, - /* numberSubstitution */ nullptr, - /* features */ &features, - /* featureRangeLengths */ &featureRangeLengths, - /* featureRanges */ featureRanges, - /* maxGlyphCount */ gsl::narrow_cast(_api.glyphProps.size()), - /* clusterMap */ _api.clusterMap.data(), - /* textProps */ _api.textProps.data(), - /* glyphIndices */ _api.glyphIndices.data(), - /* glyphProps */ _api.glyphProps.data(), - /* actualGlyphCount */ &actualGlyphCount); - - if (hr == HRESULT_FROM_WIN32(ERROR_INSUFFICIENT_BUFFER) && ++retry < 8) - { - // Grow factor 1.5x. - auto size = _api.glyphProps.size(); - size = size + (size >> 1); - // Overflow check. - Expects(size > _api.glyphProps.size()); - _api.glyphIndices = Buffer{ size }; - _api.glyphProps = Buffer(size); - continue; - } - - THROW_IF_FAILED(hr); - break; - } - - if (_api.glyphAdvances.size() < actualGlyphCount) - { - // Grow the buffer by at least 1.5x and at least of `actualGlyphCount` items. - // The 1.5x growth ensures we don't reallocate every time we need 1 more slot. - auto size = _api.glyphAdvances.size(); - size = size + (size >> 1); - size = std::max(size, actualGlyphCount); - _api.glyphAdvances = Buffer{ size }; - _api.glyphOffsets = Buffer{ size }; - } - - THROW_IF_FAILED(_sr.textAnalyzer->GetGlyphPlacements( - /* textString */ _api.bufferLine.data() + a.textPosition, - /* clusterMap */ _api.clusterMap.data(), - /* textProps */ _api.textProps.data(), - /* textLength */ a.textLength, - /* glyphIndices */ _api.glyphIndices.data(), - /* glyphProps */ _api.glyphProps.data(), - /* glyphCount */ actualGlyphCount, - /* fontFace */ mappedFontFace.get(), - /* fontEmSize */ _api.fontMetrics.fontSizeInDIP, - /* isSideways */ false, - /* isRightToLeft */ a.bidiLevel & 1, - /* scriptAnalysis */ &scriptAnalysis, - /* localeName */ nullptr, - /* features */ &features, - /* featureRangeLengths */ &featureRangeLengths, - /* featureRanges */ featureRanges, - /* glyphAdvances */ _api.glyphAdvances.data(), - /* glyphOffsets */ _api.glyphOffsets.data())); - - _api.textProps[a.textLength - 1].canBreakShapingAfter = 1; - - size_t beg = 0; - for (size_t i = 0; i < a.textLength; ++i) - { - if (_api.textProps[i].canBreakShapingAfter) - { - if (_emplaceGlyph(mappedFontFace.get(), a.textPosition + beg, a.textPosition + i + 1)) - { - beg = i + 1; - } - } - } - } - } +void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* mappedLength, float* scale, IDWriteFontFace** mappedFontFace) const +{ + TextAnalysisSource analysisSource{ text, textLength }; + const auto& textFormatAxis = _p.d.font.textFormatAxes[_api.attributes.italic][_api.attributes.bold]; + + if (textFormatAxis) + { + THROW_IF_FAILED(_p.systemFontFallback1->MapCharacters( + /* analysisSource */ &analysisSource, + /* textPosition */ 0, + /* textLength */ textLength, + /* baseFontCollection */ _p.s->font->fontCollection.get(), + /* baseFamilyName */ _p.s->font->fontName.c_str(), + /* fontAxisValues */ textFormatAxis.data(), + /* fontAxisValueCount */ gsl::narrow_cast(textFormatAxis.size()), + /* mappedLength */ mappedLength, + /* scale */ scale, + /* mappedFontFace */ reinterpret_cast(mappedFontFace))); + } + else + { + const auto baseWeight = _api.attributes.bold ? DWRITE_FONT_WEIGHT_BOLD : static_cast(_p.s->font->fontWeight); + const auto baseStyle = _api.attributes.italic ? DWRITE_FONT_STYLE_ITALIC : DWRITE_FONT_STYLE_NORMAL; + wil::com_ptr font; + + THROW_IF_FAILED(_p.systemFontFallback->MapCharacters( + /* analysisSource */ &analysisSource, + /* textPosition */ 0, + /* textLength */ textLength, + /* baseFontCollection */ _p.s->font->fontCollection.get(), + /* baseFamilyName */ _p.s->font->fontName.c_str(), + /* baseWeight */ baseWeight, + /* baseStyle */ baseStyle, + /* baseStretch */ DWRITE_FONT_STRETCH_NORMAL, + /* mappedLength */ mappedLength, + /* mappedFont */ font.addressof(), + /* scale */ scale)); + + if (font) + { + THROW_IF_FAILED(font->CreateFontFace(mappedFontFace)); } } } -// ^^^ Look at that amazing 8-fold nesting level. Lovely. <3 -bool AtlasEngine::_emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, size_t bufferPos2) +void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 length, ShapedRow& row) { - static constexpr auto replacement = L'\uFFFD'; + _api.analysisResults.clear(); - // This would seriously blow us up otherwise. - Expects(bufferPos1 < bufferPos2 && bufferPos2 <= _api.bufferLine.size()); - - // _flushBufferLine() ensures that bufferLineColumn.size() > bufferLine.size(). - const auto x1 = _api.bufferLineColumn[bufferPos1]; - const auto x2 = _api.bufferLineColumn[bufferPos2]; - - // x1 == x2, if our TextBuffer and DirectWrite disagree where glyph boundaries are. Example: - // Our line of text contains a wide glyph consisting of 2 surrogate pairs "xx" and "yy". - // If DirectWrite considers the first "xx" to be separate from the second "yy", we'll get: - // _api.bufferLine = "...xxyy..." - // _api.bufferLineColumn = {01233335678} - // ^ ^ - // / \ - // bufferPos1 bufferPos2 - // x1: _api.bufferLineColumn[bufferPos1] == 3 - // x1: _api.bufferLineColumn[bufferPos2] == 3 - // --> cellCount (which is x2 - x1) is now 0 (invalid). - // - // Assuming that the TextBuffer implementation doesn't have any bugs... - // I'm not entirely certain why this occurs, but to me, a layperson, it appears as if - // IDWriteFontFallback::MapCharacters() doesn't respect extended grapheme clusters. - // It could also possibly be due to a difference in the supported Unicode version. - if (x1 >= x2 || x2 > _api.cellCount.x) + TextAnalysisSource analysisSource{ _api.bufferLine.data(), gsl::narrow(_api.bufferLine.size()) }; + TextAnalysisSink analysisSink{ _api.analysisResults }; + THROW_IF_FAILED(_p.textAnalyzer->AnalyzeScript(&analysisSource, idx, length, &analysisSink)); + + for (const auto& a : _api.analysisResults) { - return false; - } + const DWRITE_SCRIPT_ANALYSIS scriptAnalysis{ a.script, static_cast(a.shapes) }; + u32 actualGlyphCount = 0; + +#pragma warning(push) +#pragma warning(disable : 26494) // Variable '...' is uninitialized. Always initialize an object (type.5). + // None of these variables need to be initialized. + // features/featureRangeLengths are marked _In_reads_opt_(featureRanges). + // featureRanges is only > 0 when we also initialize all these variables. + DWRITE_TYPOGRAPHIC_FEATURES feature; + const DWRITE_TYPOGRAPHIC_FEATURES* features; + u32 featureRangeLengths; +#pragma warning(pop) + u32 featureRanges = 0; + + if (!_p.s->font->fontFeatures.empty()) + { + // Direct2D, why is this mutable? Why? +#pragma warning(suppress : 26492) // Don't use const_cast to cast away const or volatile (type.3). + feature.features = const_cast(_p.s->font->fontFeatures.data()); + feature.featureCount = gsl::narrow_cast(_p.s->font->fontFeatures.size()); + features = &feature; + featureRangeLengths = a.textLength; + featureRanges = 1; + } + + if (_api.clusterMap.size() <= a.textLength) + { + _api.clusterMap = Buffer{ static_cast(a.textLength) + 1 }; + _api.textProps = Buffer{ a.textLength }; + } + + for (auto retry = 0;;) + { + const auto hr = _p.textAnalyzer->GetGlyphs( + /* textString */ _api.bufferLine.data() + a.textPosition, + /* textLength */ a.textLength, + /* fontFace */ mappedFontFace, + /* isSideways */ false, + /* isRightToLeft */ a.bidiLevel & 1, + /* scriptAnalysis */ &scriptAnalysis, + /* localeName */ nullptr, + /* numberSubstitution */ nullptr, + /* features */ &features, + /* featureRangeLengths */ &featureRangeLengths, + /* featureRanges */ featureRanges, + /* maxGlyphCount */ gsl::narrow_cast(_api.glyphIndices.size()), + /* clusterMap */ _api.clusterMap.data(), + /* textProps */ _api.textProps.data(), + /* glyphIndices */ _api.glyphIndices.data(), + /* glyphProps */ _api.glyphProps.data(), + /* actualGlyphCount */ &actualGlyphCount); + + if (hr == HRESULT_FROM_WIN32(ERROR_INSUFFICIENT_BUFFER) && ++retry < 8) + { + // Grow factor 1.5x. + auto size = _api.glyphIndices.size(); + size = size + (size >> 1); + // Overflow check. + Expects(size > _api.glyphIndices.size()); + _api.glyphIndices = Buffer{ size }; + _api.glyphProps = Buffer{ size }; + continue; + } - const auto chars = fontFace ? &_api.bufferLine[bufferPos1] : &replacement; - const auto charCount = fontFace ? bufferPos2 - bufferPos1 : 1; - const u16 cellCount = x2 - x1; + THROW_IF_FAILED(hr); + break; + } + + if (_api.glyphAdvances.size() < actualGlyphCount) + { + // Grow the buffer by at least 1.5x and at least of `actualGlyphCount` items. + // The 1.5x growth ensures we don't reallocate every time we need 1 more slot. + auto size = _api.glyphAdvances.size(); + size = size + (size >> 1); + size = std::max(size, actualGlyphCount); + _api.glyphAdvances = Buffer{ size }; + _api.glyphOffsets = Buffer{ size }; + } + + THROW_IF_FAILED(_p.textAnalyzer->GetGlyphPlacements( + /* textString */ _api.bufferLine.data() + a.textPosition, + /* clusterMap */ _api.clusterMap.data(), + /* textProps */ _api.textProps.data(), + /* textLength */ a.textLength, + /* glyphIndices */ _api.glyphIndices.data(), + /* glyphProps */ _api.glyphProps.data(), + /* glyphCount */ actualGlyphCount, + /* fontFace */ mappedFontFace, + /* fontEmSize */ _p.s->font->fontSizeInDIP, + /* isSideways */ false, + /* isRightToLeft */ a.bidiLevel & 1, + /* scriptAnalysis */ &scriptAnalysis, + /* localeName */ nullptr, + /* features */ &features, + /* featureRangeLengths */ &featureRangeLengths, + /* featureRanges */ featureRanges, + /* glyphAdvances */ _api.glyphAdvances.data(), + /* glyphOffsets */ _api.glyphOffsets.data())); + + _api.clusterMap[a.textLength] = gsl::narrow_cast(actualGlyphCount); + + auto prevCluster = _api.clusterMap[0]; + size_t beg = 0; + + for (size_t i = 1; i <= a.textLength; ++i) + { + const auto nextCluster = _api.clusterMap[i]; + if (prevCluster == nextCluster) + { + continue; + } - auto attributes = _api.attributes; - attributes.cellCount = cellCount; + const auto col1 = _api.bufferLineColumn[a.textPosition + beg]; + const auto col2 = _api.bufferLineColumn[a.textPosition + i]; + const auto fg = _api.colorsForeground[col1]; - AtlasKey key{ attributes, gsl::narrow(charCount), chars }; - auto it = _r.glyphs.find(key); + const auto expectedAdvance = (col2 - col1) * _p.d.font.cellSizeDIP.x; + f32 actualAdvance = 0; + for (auto j = prevCluster; j < nextCluster; ++j) + { + actualAdvance += _api.glyphAdvances[j]; + } + _api.glyphAdvances[nextCluster - 1] += expectedAdvance - actualAdvance; - if (it == _r.glyphs.end()) + row.colors.insert(row.colors.end(), nextCluster - prevCluster, fg); + + prevCluster = nextCluster; + beg = i; + } + + row.glyphIndices.insert(row.glyphIndices.end(), _api.glyphIndices.begin(), _api.glyphIndices.begin() + actualGlyphCount); + row.glyphAdvances.insert(row.glyphAdvances.end(), _api.glyphAdvances.begin(), _api.glyphAdvances.begin() + actualGlyphCount); + row.glyphOffsets.insert(row.glyphOffsets.end(), _api.glyphOffsets.begin(), _api.glyphOffsets.begin() + actualGlyphCount); + } +} + +void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) +{ + if (!_api.replacementCharacterLookedUp) { - // Do fonts exist *in practice* which contain both colored and uncolored glyphs? I'm pretty sure... - // However doing it properly means using either of: - // * IDWriteFactory2::TranslateColorGlyphRun - // * IDWriteFactory4::TranslateColorGlyphRun - // * IDWriteFontFace4::GetGlyphImageData - // - // For the first two I wonder how one is supposed to restitch the 27 required parameters from a - // partial (!) text range returned by GetGlyphs(). Our caller breaks the GetGlyphs() result up - // into runs of characters up until the first canBreakShapingAfter == true after all. - // There's no documentation for it and I'm sure I'd mess it up. - // For very obvious reasons I didn't want to write this code. - // - // IDWriteFontFace4::GetGlyphImageData seems like the best approach and DirectWrite uses the - // same code that GetGlyphImageData uses to implement TranslateColorGlyphRun (well partially). - // However, it's a heavy operation and parses the font file on disk on every call (TranslateColorGlyphRun doesn't). - // For less obvious reasons I didn't want to write this code either. - // - // So this is a job for future me/someone. - // Bonus points for doing it without impacting performance. - auto flags = CellFlags::None; - if (fontFace) + bool succeeded = false; + + u32 mappedLength = 0; + f32 scale = 1.0f; + _mapCharacters(L"\uFFFD", 1, &mappedLength, &scale, _api.replacementCharacterFontFace.put()); + + if (mappedLength == 1) { - const auto fontFace2 = wil::try_com_query(fontFace); - WI_SetFlagIf(flags, CellFlags::ColoredGlyph, fontFace2 && fontFace2->IsColorFont()); + static constexpr u32 codepoint = 0xFFFD; + succeeded = SUCCEEDED(_api.replacementCharacterFontFace->GetGlyphIndicesW(&codepoint, 1, &_api.replacementCharacterGlyphIndex)); } - // The AtlasValue constructor fills the `coords` variable with a pointer to an array - // of at least `cellCount` elements. I did this so that I don't have to type out - // `value.data()->coords` again, despite the constructor having all the data necessary. - u16x2* coords; - AtlasValue value{ flags, cellCount, &coords }; - - for (u16 i = 0; i < cellCount; ++i) + if (!succeeded) { - coords[i] = _r.tileAllocator.allocate(_r.glyphs); + _api.replacementCharacterFontFace.reset(); + _api.replacementCharacterGlyphIndex = 0; } - it = _r.glyphs.insert(std::move(key), std::move(value)); - _r.glyphQueue.emplace_back(it); + _api.replacementCharacterLookedUp = true; } - const auto valueData = it->second.data(); - const auto coords = &valueData->coords[0]; - const auto cells = _getCell(x1, _api.lastPaintBufferLineCoord.y); - const auto cellGlyphMappings = _getCellGlyphMapping(x1, _api.lastPaintBufferLineCoord.y); - - for (u32 i = 0; i < cellCount; ++i) + if (_api.replacementCharacterFontFace) { - cells[i].tileIndex = coords[i]; - // We should apply the column color and flags from each column (instead - // of copying them from the x1) so that ligatures can appear in multiple - // colors with different line styles. - cells[i].flags = valueData->flags | _api.bufferLineMetadata[static_cast(x1) + i].flags; - cells[i].color = _api.bufferLineMetadata[static_cast(x1) + i].colors; + const auto initialIndicesCount = row.glyphIndices.size(); + const auto col0 = _api.bufferLineColumn[from]; + const auto col1 = _api.bufferLineColumn[to]; + const auto cols = gsl::narrow_cast(col1 - col0); + row.glyphIndices.insert(row.glyphIndices.end(), cols, _api.replacementCharacterGlyphIndex); + row.glyphAdvances.insert(row.glyphAdvances.end(), cols, _p.d.font.cellSizeDIP.x); + row.glyphOffsets.insert(row.glyphOffsets.end(), cols, DWRITE_GLYPH_OFFSET{}); + row.colors.insert(row.colors.end(), _api.colorsForeground.begin() + col0, _api.colorsForeground.begin() + col1); + row.mappings.emplace_back(_api.replacementCharacterFontFace, _p.s->font->fontSizeInDIP * 0.5f, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(row.glyphIndices.size())); } - - std::fill_n(cellGlyphMappings, cellCount, it); - return true; } diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 146d8053d1d..64b125c0b77 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -3,14 +3,10 @@ #pragma once -#include -#include -#include - -#include "../../renderer/inc/IRenderEngine.hpp" +#include "common.h" #include "DWriteTextAnalysis.h" -namespace Microsoft::Console::Render +namespace Microsoft::Console::Render::Atlas { struct TextAnalysisSinkResult; @@ -27,6 +23,7 @@ namespace Microsoft::Console::Render [[nodiscard]] HRESULT EndPaint() noexcept override; [[nodiscard]] bool RequiresContinuousRedraw() noexcept override; void WaitUntilCanRender() noexcept override; + void _recreateBackend(); [[nodiscard]] HRESULT Present() noexcept override; [[nodiscard]] HRESULT PrepareForTeardown(_Out_ bool* pForcePaint) noexcept override; [[nodiscard]] HRESULT ScrollFrame() noexcept override; @@ -80,883 +77,30 @@ namespace Microsoft::Console::Render [[nodiscard]] HRESULT UpdateFont(const FontInfoDesired& pfiFontInfoDesired, FontInfo& fiFontInfo, const std::unordered_map& features, const std::unordered_map& axes) noexcept override; void UpdateHyperlinkHoveredId(uint16_t hoveredId) noexcept override; - // Some helper classes for the implementation. - // public because I don't want to sprinkle the code with friends. - public: -#define ATLAS_POD_OPS(type) \ - constexpr bool operator==(const type& rhs) const noexcept \ - { \ - return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0; \ - } \ - \ - constexpr bool operator!=(const type& rhs) const noexcept \ - { \ - return __builtin_memcmp(this, &rhs, sizeof(rhs)) != 0; \ - } - -#define ATLAS_FLAG_OPS(type, underlying) \ - friend constexpr type operator~(type v) noexcept { return static_cast(~static_cast(v)); } \ - friend constexpr type operator|(type lhs, type rhs) noexcept { return static_cast(static_cast(lhs) | static_cast(rhs)); } \ - friend constexpr type operator&(type lhs, type rhs) noexcept { return static_cast(static_cast(lhs) & static_cast(rhs)); } \ - friend constexpr type operator^(type lhs, type rhs) noexcept { return static_cast(static_cast(lhs) ^ static_cast(rhs)); } \ - friend constexpr void operator|=(type& lhs, type rhs) noexcept { lhs = lhs | rhs; } \ - friend constexpr void operator&=(type& lhs, type rhs) noexcept { lhs = lhs & rhs; } \ - friend constexpr void operator^=(type& lhs, type rhs) noexcept { lhs = lhs ^ rhs; } - - template - struct vec2 - { - T x{}; - T y{}; - - ATLAS_POD_OPS(vec2) - }; - - template - struct vec3 - { - T x{}; - T y{}; - T z{}; - - ATLAS_POD_OPS(vec3) - }; - - template - struct vec4 - { - T x{}; - T y{}; - T z{}; - T w{}; - - ATLAS_POD_OPS(vec4) - }; - - template - struct rect - { - T left{}; - T top{}; - T right{}; - T bottom{}; - - ATLAS_POD_OPS(rect) - - constexpr bool non_empty() const noexcept - { - return (left < right) & (top < bottom); - } - }; - - using u8 = uint8_t; - - using u16 = uint16_t; - using u16x2 = vec2; - using u16r = rect; - - using i16 = int16_t; - - using u32 = uint32_t; - using u32x2 = vec2; - - using i32 = int32_t; - - using f32 = float; - using f32x2 = vec2; - using f32x3 = vec3; - using f32x4 = vec4; - - struct TextAnalyzerResult - { - u32 textPosition = 0; - u32 textLength = 0; - - // These 2 fields represent DWRITE_SCRIPT_ANALYSIS. - // Not using DWRITE_SCRIPT_ANALYSIS drops the struct size from 20 down to 12 bytes. - u16 script = 0; - u8 shapes = 0; - - u8 bidiLevel = 0; - }; - private: - // I wrote `Buffer` instead of using `std::vector`, because I want to convey that these things - // explicitly _don't_ hold resizeable contents, but rather plain content of a fixed size. - // For instance I didn't want a resizeable vector with a `push_back` method for my fixed-size - // viewport arrays - that doesn't make sense after all. `Buffer` also doesn't initialize - // contents to zero, allowing rapid creation/destruction and you can easily specify a custom - // (over-)alignment which can improve rendering perf by up to ~20% over `std::vector`. - template - struct Buffer - { - constexpr Buffer() noexcept = default; - - explicit Buffer(size_t size) : - _data{ allocate(size) }, - _size{ size } - { - std::uninitialized_default_construct_n(_data, size); - } - - Buffer(const T* data, size_t size) : - _data{ allocate(size) }, - _size{ size } - { - // Changing the constructor arguments to accept std::span might - // be a good future extension, but not to improve security here. - // You can trivially construct std::span's from invalid ranges. - // Until then the raw-pointer style is more practical. -#pragma warning(suppress : 26459) // You called an STL function '...' with a raw pointer parameter at position '3' that may be unsafe [...]. - std::uninitialized_copy_n(data, size, _data); - } - - ~Buffer() - { - destroy(); - } - - Buffer(Buffer&& other) noexcept : - _data{ std::exchange(other._data, nullptr) }, - _size{ std::exchange(other._size, 0) } - { - } - -#pragma warning(suppress : 26432) // If you define or delete any default operation in the type '...', define or delete them all (c.21). - Buffer& operator=(Buffer&& other) noexcept - { - destroy(); - _data = std::exchange(other._data, nullptr); - _size = std::exchange(other._size, 0); - return *this; - } - - explicit operator bool() const noexcept - { - return _data != nullptr; - } - - T& operator[](size_t index) noexcept - { - assert(index < _size); - return _data[index]; - } - - const T& operator[](size_t index) const noexcept - { - assert(index < _size); - return _data[index]; - } - - T* data() noexcept - { - return _data; - } - - const T* data() const noexcept - { - return _data; - } - - size_t size() const noexcept - { - return _size; - } - - T* begin() noexcept - { - return _data; - } - - T* begin() const noexcept - { - return _data; - } - - T* end() noexcept - { - return _data + _size; - } - - T* end() const noexcept - { - return _data + _size; - } - - private: - // These two functions don't need to use scoped objects or standard allocators, - // since this class is in fact an scoped allocator object itself. -#pragma warning(push) -#pragma warning(disable : 26402) // Return a scoped object instead of a heap-allocated if it has a move constructor (r.3). -#pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique instead (r.11). - static T* allocate(size_t size) - { - if constexpr (Alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) - { - return static_cast(::operator new(size * sizeof(T))); - } - else - { - return static_cast(::operator new(size * sizeof(T), static_cast(Alignment))); - } - } - - static void deallocate(T* data) noexcept - { - if constexpr (Alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) - { - ::operator delete(data); - } - else - { - ::operator delete(data, static_cast(Alignment)); - } - } -#pragma warning(pop) - - void destroy() noexcept - { - std::destroy_n(_data, _size); - deallocate(_data); - } - - T* _data = nullptr; - size_t _size = 0; - }; - - // This structure works similar to how std::string works: - // You can think of a std::string as a structure consisting of: - // char* data; - // size_t size; - // size_t capacity; - // where data is some backing memory allocated on the heap. - // - // But std::string employs an optimization called "small string optimization" (SSO). - // To simplify things it could be explained as: - // If the string capacity is small, then the characters are stored inside the "data" - // pointer and you make sure to set the lowest bit in the pointer one way or another. - // Heap allocations are always aligned by at least 4-8 bytes on any platform. - // If the address of the "data" pointer is not even you know data is stored inline. - template - union SmallObjectOptimizer - { - static_assert(std::is_trivially_copyable_v); - static_assert(std::has_unique_object_representations_v); - - T* allocated = nullptr; - T inlined; - - constexpr SmallObjectOptimizer() = default; - - SmallObjectOptimizer(const SmallObjectOptimizer& other) = delete; - SmallObjectOptimizer& operator=(const SmallObjectOptimizer& other) = delete; - - SmallObjectOptimizer(SmallObjectOptimizer&& other) noexcept - { - memcpy(this, &other, std::max(sizeof(allocated), sizeof(inlined))); - other.allocated = nullptr; - } - - SmallObjectOptimizer& operator=(SmallObjectOptimizer&& other) noexcept - { - std::destroy_at(this); - return *std::construct_at(this, std::move(other)); - } - - ~SmallObjectOptimizer() - { - if (!is_inline()) - { -#pragma warning(suppress : 26408) // Avoid malloc() and free(), prefer the nothrow version of new with delete (r.10). - free(allocated); - } - } - - T* initialize(size_t byteSize) - { - if (would_inline(byteSize)) - { - return &inlined; - } - -#pragma warning(suppress : 26408) // Avoid malloc() and free(), prefer the nothrow version of new with delete (r.10). - allocated = THROW_IF_NULL_ALLOC(static_cast(malloc(byteSize))); - return allocated; - } - - constexpr bool would_inline(size_t byteSize) const noexcept - { - return byteSize <= sizeof(T); - } - - bool is_inline() const noexcept - { - // VSO-1430353: __builtin_bitcast crashes the compiler under /permissive-. (BODGY) -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - return (reinterpret_cast(allocated) & 1) != 0; - } - - const T* data() const noexcept - { - return is_inline() ? &inlined : allocated; - } - - size_t size() const noexcept - { - return is_inline() ? sizeof(inlined) : _msize(allocated); - } - }; - - struct FontMetrics - { - wil::com_ptr fontCollection; - wil::com_ptr fontFamily; - std::wstring fontName; - f32 baselineInDIP = 0.0f; - f32 fontSizeInDIP = 0.0f; - f32 advanceScale = 0; - u16x2 cellSize; - u16 fontWeight = 0; - u16 underlinePos = 0; - u16 underlineWidth = 0; - u16 strikethroughPos = 0; - u16 strikethroughWidth = 0; - u16x2 doubleUnderlinePos; - u16 thinLineWidth = 0; - }; - - // These flags are shared with shader_ps.hlsl. - // If you change this be sure to copy it over to shader_ps.hlsl. - // - // clang-format off - enum class CellFlags : u32 - { - None = 0x00000000, - Inlined = 0x00000001, - - ColoredGlyph = 0x00000002, - - Cursor = 0x00000008, - Selected = 0x00000010, - - BorderLeft = 0x00000020, - BorderTop = 0x00000040, - BorderRight = 0x00000080, - BorderBottom = 0x00000100, - Underline = 0x00000200, - UnderlineDotted = 0x00000400, - UnderlineDouble = 0x00000800, - Strikethrough = 0x00001000, - }; - // clang-format on - ATLAS_FLAG_OPS(CellFlags, u32) - - // This structure is shared with the GPU shader and needs to follow certain alignment rules. - // You can generally assume that only u32 or types of that alignment are allowed. - struct Cell - { - alignas(u32) u16x2 tileIndex; - alignas(u32) CellFlags flags = CellFlags::None; - u32x2 color; - }; - struct AtlasKeyAttributes { - u16 inlined : 1; - u16 bold : 1; - u16 italic : 1; - u16 cellCount : 13; + bool bold = false; + bool italic = false; ATLAS_POD_OPS(AtlasKeyAttributes) }; - struct AtlasKeyData - { - AtlasKeyAttributes attributes; - u16 charCount; - wchar_t chars[14]; - }; - - struct AtlasKey - { - AtlasKey(AtlasKeyAttributes attributes, u16 charCount, const wchar_t* chars) - { - const auto size = dataSize(charCount); - const auto data = _data.initialize(size); - attributes.inlined = _data.would_inline(size); - data->attributes = attributes; - data->charCount = charCount; - memcpy(&data->chars[0], chars, static_cast(charCount) * sizeof(AtlasKeyData::chars[0])); - } - - const AtlasKeyData* data() const noexcept - { - return _data.data(); - } - - size_t hash() const noexcept - { - const auto d = data(); -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - return std::_Fnv1a_append_bytes(std::_FNV_offset_basis, reinterpret_cast(d), dataSize(d->charCount)); - } - - bool operator==(const AtlasKey& rhs) const noexcept - { - const auto a = data(); - const auto b = rhs.data(); - return a->charCount == b->charCount && memcmp(a, b, dataSize(a->charCount)) == 0; - } - - private: - SmallObjectOptimizer _data; - - static constexpr size_t dataSize(u16 charCount) noexcept - { - // This returns the actual byte size of a AtlasKeyData struct for the given charCount. - // The `wchar_t chars[2]` is only a buffer for the inlined variant after - // all and the actual charCount can be smaller or larger. Due to this we - // remove the size of the `chars` array and add its true length on top. - return sizeof(AtlasKeyData) - sizeof(AtlasKeyData::chars) + static_cast(charCount) * sizeof(AtlasKeyData::chars[0]); - } - }; - - struct CachedGlyphLayout - { - wil::com_ptr textLayout; - f32x2 offset; - f32x2 scale; - f32x2 scaleCenter; - D2D1_DRAW_TEXT_OPTIONS options = D2D1_DRAW_TEXT_OPTIONS_NONE; - bool scalingRequired = false; - - explicit operator bool() const noexcept; - void reset() noexcept; - void applyScaling(ID2D1RenderTarget* d2dRenderTarget, D2D1_POINT_2F origin) const noexcept; - void undoScaling(ID2D1RenderTarget* d2dRenderTarget) const noexcept; - }; - - struct AtlasValueData - { - CellFlags flags = CellFlags::None; - u16x2 coords[7]; - }; - - struct AtlasValue - { - AtlasValue(CellFlags flags, u16 cellCount, u16x2** coords) - { - __assume(coords != nullptr); - const auto size = dataSize(cellCount); - const auto data = _data.initialize(size); - WI_SetFlagIf(flags, CellFlags::Inlined, _data.would_inline(size)); - data->flags = flags; - *coords = &data->coords[0]; - } - - const AtlasValueData* data() const noexcept - { - return _data.data(); - } - - CachedGlyphLayout cachedLayout; - - private: - SmallObjectOptimizer _data; - - static constexpr size_t dataSize(u16 coordCount) noexcept - { - return sizeof(AtlasValueData) - sizeof(AtlasValueData::coords) + static_cast(coordCount) * sizeof(AtlasValueData::coords[0]); - } - }; - - struct AtlasKeyHasher - { - using is_transparent = int; - - size_t operator()(const AtlasKey& v) const noexcept - { - return v.hash(); - } - - size_t operator()(const std::list>::iterator& v) const noexcept - { - return operator()(v->first); - } - }; - - struct AtlasKeyEq - { - using is_transparent = int; - - bool operator()(const AtlasKey& a, const std::list>::iterator& b) const noexcept - { - return a == b->first; - } - - bool operator()(const std::list>::iterator& a, const std::list>::iterator& b) const noexcept - { - return operator()(a->first, b); - } - }; - - struct TileHashMap - { - using iterator = std::list>::iterator; - - TileHashMap() noexcept = default; - - iterator end() noexcept - { - return _lru.end(); - } - - iterator find(const AtlasKey& key) - { - const auto it = _map.find(key); - if (it != _map.end()) - { - // Move the key to the head of the LRU queue. - makeNewest(*it); - return *it; - } - return end(); - } - - iterator insert(AtlasKey&& key, AtlasValue&& value) - { - // Insert the key/value right at the head of the LRU queue, just like find(). - // - // && decays to & if the argument is named, because C++ is a simple language - // and so you have to std::move it again, because C++ is a simple language. - _lru.emplace_front(std::move(key), std::move(value)); - auto it = _lru.begin(); - _map.emplace(it); - return it; - } - - void makeNewest(const iterator& it) - { - _lru.splice(_lru.begin(), _lru, it); - } - - void popOldestTiles(std::vector& out) noexcept - { - Expects(!_lru.empty()); - const auto it = --_lru.end(); - - const auto key = it->first.data(); - const auto value = it->second.data(); - const auto beg = &value->coords[0]; - const auto cellCount = key->attributes.cellCount; - - const auto offset = out.size(); - out.resize(offset + cellCount); - std::copy_n(beg, cellCount, out.begin() + offset); - - _map.erase(it); - _lru.pop_back(); - } - - private: - // Please don't copy this code. It's a proof-of-concept. - // If you need a LRU hash-map, write a custom one with an intrusive - // prev/next linked list (it's easier than you might think!). - std::list> _lru; - std::unordered_set _map; - }; - - // TileAllocator yields `tileSize`-sized tiles for our texture atlas. - // While doing so it'll grow the atlas size() by a factor of 2 if needed. - // Once the setMaxArea() is exceeded it'll stop growing and instead - // snatch tiles back from the oldest TileHashMap entries. - // - // The quadratic growth works by alternating the size() - // between an 1:1 and 2:1 aspect ratio, like so: - // (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256) - // These initial tile positions allocate() returns are in a Z - // pattern over the available space in the atlas texture. - // You can log the `return _pos;` in allocate() using "Tracepoint"s - // in Visual Studio if you'd like to understand the Z pattern better. - struct TileAllocator - { - TileAllocator() = default; - - explicit TileAllocator(u16x2 tileSize, u16x2 windowSize) noexcept : - _tileSize{ tileSize } - { - const auto initialSize = std::max(u16{ _absoluteMinSize }, std::bit_ceil(std::max(tileSize.x, tileSize.y))); - _size = { initialSize, initialSize }; - _limit = { gsl::narrow_cast(initialSize - _tileSize.x), gsl::narrow_cast(initialSize - _tileSize.y) }; - setMaxArea(windowSize); - } - - u16x2 size() const noexcept - { - return _size; - } - - void setMaxArea(u16x2 windowSize) noexcept - { - // _generate() uses a quadratic growth factor for _size's area. - // Once it exceeds the _maxArea, it'll start snatching tiles back from the - // TileHashMap using its LRU queue. Since _size will at least reach half - // of _maxSize (because otherwise it could still grow by a factor of 2) - // and by ensuring that _maxArea is at least twice the window size - // we make it impossible* for _generate() to return false before - // TileHashMap contains at least as many tiles as the window contains. - // If that wasn't the case we'd snatch and reuse tiles that are still in use. - // * lhecker's legal department: - // No responsibility is taken for the correctness of this information. - setMaxArea(static_cast(windowSize.x) * static_cast(windowSize.y) * 2); - } - - void setMaxArea(size_t max) noexcept - { - // We need to reserve at least 1 extra `tileArea`, because the tile - // at position {0,0} is already reserved for the cursor texture. - const auto tileArea = static_cast(_tileSize.x) * static_cast(_tileSize.y); - _maxArea = clamp(max + tileArea, _absoluteMinArea, _absoluteMaxArea); - _updateCanGenerate(); - } - - u16x2 allocate(TileHashMap& map) noexcept - { - if (_generate()) - { - return _pos; - } - - if (_cache.empty()) - { - map.popOldestTiles(_cache); - } - - const auto pos = _cache.back(); - _cache.pop_back(); - return pos; - } - - private: - // This method generates the Z pattern coordinates - // described above in the TileAllocator comment. - bool _generate() noexcept - { - if (!_canGenerate) - { - return false; - } - - // We need to backup _pos/_size in case our resize below exceeds _maxArea. - // In that case we have to restore _pos/_size so that if _maxArea is increased - // (window resize for instance), we can pick up where we previously left off. - const auto pos = _pos; - - _pos.x += _tileSize.x; - if (_pos.x <= _limit.x) - { - return true; - } - - _pos.y += _tileSize.y; - if (_pos.y <= _limit.y) - { - _pos.x = _originX; - return true; - } - - // Same as for pos. - const auto size = _size; - - // This implements a quadratic growth factor for _size, by - // alternating between an 1:1 and 2:1 aspect ratio, like so: - // (64,64) -> (128,64) -> (128,128) -> (256,128) -> (256,256) - // This behavior is strictly dependent on setMaxArea(u16x2)'s - // behavior. See its comment for an explanation. - if (_size.x == _size.y) - { - _size.x *= 2; - _pos.y = 0; - } - else - { - _size.y *= 2; - _pos.x = 0; - } - - _updateCanGenerate(); - if (_canGenerate) - { - _limit = { gsl::narrow_cast(_size.x - _tileSize.x), gsl::narrow_cast(_size.y - _tileSize.y) }; - _originX = _pos.x; - } - else - { - _size = size; - _pos = pos; - } - - return _canGenerate; - } - - void _updateCanGenerate() noexcept - { - _canGenerate = static_cast(_size.x) * static_cast(_size.y) <= _maxArea; - } - - static constexpr u16 _absoluteMinSize = 256; - static constexpr size_t _absoluteMinArea = _absoluteMinSize * _absoluteMinSize; - // TODO: Consider using IDXGIAdapter3::QueryVideoMemoryInfo() and IDXGIAdapter3::RegisterVideoMemoryBudgetChangeNotificationEvent() - // That way we can make better to use of a user's available video memory. - static constexpr size_t _absoluteMaxArea = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION * D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; - - std::vector _cache; - size_t _maxArea = _absoluteMaxArea; - u16x2 _tileSize; - u16x2 _size; - u16x2 _limit; - // Since _pos starts at {0, 0}, it'll result in the first allocate()d tile to be at {_tileSize.x, 0}. - // Coincidentally that's exactly what we want as the cursor texture lives at {0, 0}. - u16x2 _pos; - u16 _originX = 0; - // Indicates whether we've exhausted our Z pattern across the atlas texture. - // If this is false, we have to snatch tiles back from TileHashMap. - bool _canGenerate = true; - }; - - struct CachedCursorOptions - { - u32 cursorColor = INVALID_COLOR; - u16 cursorType = gsl::narrow_cast(CursorType::Legacy); - u8 heightPercentage = 20; - u8 _padding = 0; - - ATLAS_POD_OPS(CachedCursorOptions) - }; - - struct BufferLineMetadata - { - u32x2 colors; - CellFlags flags = CellFlags::None; - }; - - // NOTE: D3D constant buffers sizes must be a multiple of 16 bytes. - struct alignas(16) ConstBuffer - { - // WARNING: Modify this carefully after understanding how HLSL struct packing works. - // The gist is: - // * Minimum alignment is 4 bytes (like `#pragma pack 4`) - // * Members cannot straddle 16 byte boundaries - // This means a structure like {u32; u32; u32; u32x2} would require - // padding so that it is {u32; u32; u32; <4 byte padding>; u32x2}. - // * bool will probably not work the way you want it to, - // because HLSL uses 32-bit bools and C++ doesn't. - alignas(sizeof(f32x4)) f32x4 viewport; - alignas(sizeof(f32x4)) f32 gammaRatios[4]{}; - alignas(sizeof(f32)) f32 enhancedContrast = 0; - alignas(sizeof(u32)) u32 cellCountX = 0; - alignas(sizeof(u32x2)) u32x2 cellSize; - alignas(sizeof(u32)) u32 underlinePos = 0; - alignas(sizeof(u32)) u32 underlineWidth = 0; - alignas(sizeof(u32)) u32 strikethroughPos = 0; - alignas(sizeof(u32)) u32 strikethroughWidth = 0; - alignas(sizeof(u32x2)) u32x2 doubleUnderlinePos; - alignas(sizeof(u32)) u32 thinLineWidth = 0; - alignas(sizeof(u32)) u32 backgroundColor = 0; - alignas(sizeof(u32)) u32 cursorColor = 0; - alignas(sizeof(u32)) u32 selectionColor = 0; - alignas(sizeof(u32)) u32 useClearType = 0; -#pragma warning(suppress : 4324) // 'ConstBuffer': structure was padded due to alignment specifier - }; - - struct alignas(16) CustomConstBuffer - { - // WARNING: Same rules as for ConstBuffer above apply. - alignas(sizeof(f32)) f32 time = 0; - alignas(sizeof(f32)) f32 scale = 0; - alignas(sizeof(f32x2)) f32x2 resolution; - alignas(sizeof(f32x4)) f32x4 background; -#pragma warning(suppress : 4324) // 'CustomConstBuffer': structure was padded due to alignment specifier - }; - - // Handled in BeginPaint() - enum class ApiInvalidations : u8 - { - None = 0, - Title = 1 << 0, - Device = 1 << 1, - SwapChain = 1 << 2, - Size = 1 << 3, - Font = 1 << 4, - Settings = 1 << 5, - }; - ATLAS_FLAG_OPS(ApiInvalidations, u8) - - // Handled in Present() - enum class RenderInvalidations : u8 - { - None = 0, - Cursor = 1 << 0, - ConstBuffer = 1 << 1, - }; - ATLAS_FLAG_OPS(RenderInvalidations, u8) - - // MSVC STL (version 22000) implements std::clamp(T, T, T) in terms of the generic - // std::clamp(T, T, T, Predicate) with std::less{} as the argument, - // which introduces branching. While not perfect, this is still better than std::clamp. - template - static constexpr T clamp(T val, T min, T max) - { - return std::max(min, std::min(max, val)); - } - // AtlasEngine.cpp - [[nodiscard]] HRESULT _handleException(const wil::ResultException& exception) noexcept; - __declspec(noinline) void _createResources(); - void _releaseSwapChain(); - __declspec(noinline) void _createSwapChain(); - __declspec(noinline) void _recreateSizeDependentResources(); - __declspec(noinline) void _recreateFontDependentResources(); - IDWriteTextFormat* _getTextFormat(bool bold, bool italic) const noexcept; - const Buffer& _getTextFormatAxis(bool bold, bool italic) const noexcept; - Cell* _getCell(u16 x, u16 y) noexcept; - TileHashMap::iterator* _getCellGlyphMapping(u16 x, u16 y) noexcept; - void _setCellFlags(u16r coords, CellFlags mask, CellFlags bits) noexcept; + __declspec(noinline) void _handleSettingsUpdate(); + void _recreateFontDependentResources(); + void _recreateCellCountDependentResources(); void _flushBufferLine(); - bool _emplaceGlyph(IDWriteFontFace* fontFace, size_t bufferPos1, size_t bufferPos2); + void _mapCharacters(const wchar_t* text, u32 textLength, u32* mappedLength, float* scale, IDWriteFontFace** mappedFontFace) const; + void _mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 length, ShapedRow& row); + __declspec(noinline) void _mapReplacementCharacter(u32 from, u32 to, ShapedRow& row); // AtlasEngine.api.cpp void _resolveTransparencySettings() noexcept; void _updateFont(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, const std::unordered_map& features, const std::unordered_map& axes); - void _resolveFontMetrics(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, FontMetrics* fontMetrics = nullptr) const; + void _resolveFontMetrics(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, FontSettings* fontMetrics = nullptr) const; // AtlasEngine.r.cpp - void _renderWithCustomShader() const; - void _setShaderResources() const; - void _updateConstantBuffer() const noexcept; - void _adjustAtlasSize(); - void _processGlyphQueue(); - void _drawGlyph(const TileHashMap::iterator& it) const; - CachedGlyphLayout _getCachedGlyphLayout(const wchar_t* chars, u16 charsLength, u16 cellCount, IDWriteTextFormat* textFormat, bool coloredGlyph) const; - void _drawCursor(u16r rect, u32 color, bool clear); - ID2D1Brush* _brushWithColor(u32 color); - void _d2dPresent(); - void _d2dCreateRenderTarget(); - void _d2dDrawDirtyArea(); - u16 _d2dDrawGlyph(const TileHashMap::iterator& it, u16x2 coord, u32 color); - void _d2dDrawLine(u16r rect, u16 pos, u16 width, u32 color, ID2D1StrokeStyle* strokeStyle = nullptr); - void _d2dFillRectangle(u16r rect, u32 color); - void _d2dCellFlagRendererCursor(u16r rect, u32 color); - void _d2dCellFlagRendererSelected(u16r rect, u32 color); - void _d2dCellFlagRendererUnderline(u16r rect, u32 color); - void _d2dCellFlagRendererUnderlineDotted(u16r rect, u32 color); - void _d2dCellFlagRendererUnderlineDouble(u16r rect, u32 color); - void _d2dCellFlagRendererStrikethrough(u16r rect, u32 color); - - static constexpr bool debugForceD2DMode = false; - static constexpr bool debugGlyphGenerationPerformance = false; - static constexpr bool debugTextParsingPerformance = false || debugGlyphGenerationPerformance; - static constexpr bool debugGeneralPerformance = false || debugTextParsingPerformance; static constexpr u16 u16min = 0x0000; static constexpr u16 u16max = 0xffff; @@ -966,102 +110,26 @@ namespace Microsoft::Console::Render static constexpr u16x2 invalidatedRowsNone{ u16max, u16min }; static constexpr u16x2 invalidatedRowsAll{ u16min, u16max }; - struct StaticResources - { - wil::com_ptr d2dFactory; - wil::com_ptr dwriteFactory; - wil::com_ptr systemFontFallback; - wil::com_ptr textAnalyzer; - bool isWindows10OrGreater = true; - -#ifndef NDEBUG - std::filesystem::path sourceDirectory; - wil::unique_folder_change_reader_nothrow sourceCodeWatcher; - std::atomic sourceCodeInvalidationTime{ INT64_MAX }; -#endif - } _sr; - - struct Resources - { - // DXGI resources - wil::com_ptr dxgiFactory; - - // D3D resources - wil::com_ptr device; - wil::com_ptr deviceContext; - wil::com_ptr swapChain; - wil::unique_handle frameLatencyWaitableObject; - wil::com_ptr renderTargetView; - wil::com_ptr vertexShader; - wil::com_ptr pixelShader; - wil::com_ptr constantBuffer; - wil::com_ptr cellBuffer; - wil::com_ptr cellView; - wil::com_ptr customOffscreenTexture; - wil::com_ptr customOffscreenTextureView; - wil::com_ptr customOffscreenTextureTargetView; - wil::com_ptr customVertexShader; - wil::com_ptr customPixelShader; - wil::com_ptr customShaderConstantBuffer; - wil::com_ptr customShaderSamplerState; - std::chrono::steady_clock::time_point customShaderStartTime; - - // D2D resources - wil::com_ptr atlasBuffer; - wil::com_ptr atlasView; - wil::com_ptr d2dRenderTarget; - wil::com_ptr brush; - wil::com_ptr fontFaces[4]; - wil::com_ptr textFormats[2][2]; - Buffer textFormatAxes[2][2]; - wil::com_ptr typography; - wil::com_ptr dottedStrokeStyle; - - Buffer cells; // invalidated by ApiInvalidations::Size - Buffer cellGlyphMapping; // invalidated by ApiInvalidations::Size - f32x2 cellSizeDIP; // invalidated by ApiInvalidations::Font, caches _api.cellSize but in DIP - u16x2 cellCount; // invalidated by ApiInvalidations::Font|Size, caches _api.cellCount - u16 dpi = USER_DEFAULT_SCREEN_DPI; // invalidated by ApiInvalidations::Font, caches _api.dpi - FontMetrics fontMetrics; // invalidated by ApiInvalidations::Font, cached _api.fontMetrics - f32 dipPerPixel = 1.0f; // invalidated by ApiInvalidations::Font, caches USER_DEFAULT_SCREEN_DPI / _api.dpi - f32 pixelPerDIP = 1.0f; // invalidated by ApiInvalidations::Font, caches _api.dpi / USER_DEFAULT_SCREEN_DPI - u16x2 atlasSizeInPixel; // invalidated by ApiInvalidations::Font - TileHashMap glyphs; - TileAllocator tileAllocator; - std::vector glyphQueue; - - f32 gamma = 0; - f32 cleartypeEnhancedContrast = 0; - f32 grayscaleEnhancedContrast = 0; - u32 backgroundColor = 0xff000000; - u32 selectionColor = 0x7fffffff; - u32 brushColor = 0xffffffff; - - CachedCursorOptions cursorOptions; - RenderInvalidations invalidations = RenderInvalidations::None; - - til::rect dirtyRect; - i16 scrollOffset = 0; - bool d2dMode = false; - bool waitForPresentation = false; - bool requiresContinuousRedraw = false; - -#ifndef NDEBUG - // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: - // > For every frame it renders, the app should wait on this handle before starting any rendering operations. - // > Note that this requirement includes the first frame the app renders with the swap chain. - bool frameLatencyWaitableObjectUsed = false; -#endif - } _r; + std::unique_ptr _b; + RenderingPayload _p; struct ApiState { + til::generational s = Settings::invalidated(); + // This structure is loosely sorted in chunks from "very often accessed together" // to seldom accessed and/or usually not together. + bool invalidatedTitle = false; + // These two are redundant with TargetSettings/MiscellaneousSettings, but that's because _resolveTransparencySettings() + // turns the given settings into potentially different actual settings (which are then written into the Settings). + bool enableTransparentBackground = false; + u8 antialiasingMode = DefaultAntialiasingMode; + std::vector bufferLine; std::vector bufferLineColumn; - Buffer bufferLineMetadata; + Buffer colorsForeground; + std::vector analysisResults; Buffer clusterMap; Buffer textProps; @@ -1069,24 +137,18 @@ namespace Microsoft::Console::Render Buffer glyphProps; Buffer glyphAdvances; Buffer glyphOffsets; - std::vector fontFeatures; // changes are flagged as ApiInvalidations::Font|Size - std::vector fontAxisValues; // changes are flagged as ApiInvalidations::Font|Size - FontMetrics fontMetrics; // changes are flagged as ApiInvalidations::Font|Size - u16x2 cellCount; // caches `sizeInPixel / cellSize` - u16x2 sizeInPixel; // changes are flagged as ApiInvalidations::Size + wil::com_ptr replacementCharacterFontFace; + u16 replacementCharacterGlyphIndex = 0; + bool replacementCharacterLookedUp = false; // UpdateDrawingBrushes() - u32 backgroundOpaqueMixin = 0xff000000; // changes are flagged as ApiInvalidations::SwapChain + u32 backgroundOpaqueMixin = 0xff000000; u32x2 currentColor; AtlasKeyAttributes attributes{}; u16x2 lastPaintBufferLineCoord; - CellFlags flags = CellFlags::None; - // SetSelectionBackground() - u32 selectionColor = 0x7fffffff; // UpdateHyperlinkHoveredId() u16 hyperlinkHoveredId = 0; - bool bufferLineWasHyperlinked = false; // dirtyRect is a computed value based on invalidatedRows. til::rect dirtyRect; @@ -1094,24 +156,14 @@ namespace Microsoft::Console::Render u16r invalidatedCursorArea = invalidatedAreaNone; u16x2 invalidatedRows = invalidatedRowsNone; // x is treated as "top" and y as "bottom" i16 scrollOffset = 0; - - std::function warningCallback; - std::function swapChainChangedCallback; - wil::unique_handle swapChainHandle; - HWND hwnd = nullptr; - u16 dpi = USER_DEFAULT_SCREEN_DPI; // changes are flagged as ApiInvalidations::Font|Size - u8 antialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; // changes are flagged as ApiInvalidations::Font - u8 realizedAntialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; // caches antialiasingMode, depends on antialiasingMode and backgroundOpaqueMixin, see _resolveTransparencySettings - bool enableTransparentBackground = false; - - std::wstring customPixelShaderPath; // changes are flagged as ApiInvalidations::Device - bool useRetroTerminalEffect = false; // changes are flagged as ApiInvalidations::Device - bool useSoftwareRendering = false; // changes are flagged as ApiInvalidations::Device - - ApiInvalidations invalidations = ApiInvalidations::Device; } _api; #undef ATLAS_POD_OPS #undef ATLAS_FLAG_OPS }; } + +namespace Microsoft::Console::Render +{ + using AtlasEngine = Atlas::AtlasEngine; +} diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index a8bc092c0b8..78137ae0b58 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -4,7 +4,8 @@ #include "pch.h" #include "AtlasEngine.h" -#include "dwrite.h" +#include "BackendD2D.h" +#include "BackendD3D11.h" // #### NOTE #### // If you see any code in here that contains "_api." you might be seeing a race condition. @@ -22,39 +23,7 @@ #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). #pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). -// https://en.wikipedia.org/wiki/Inversion_list -template -constexpr bool isInInversionList(const std::array& ranges, wchar_t needle) -{ - const auto beg = ranges.begin(); - const auto end = ranges.end(); - decltype(ranges.begin()) it; - - // Linear search is faster than binary search for short inputs. - if constexpr (N < 16) - { - it = std::find_if(beg, end, [=](wchar_t v) { return needle < v; }); - } - else - { - it = std::upper_bound(beg, end, needle); - } - - const auto idx = it - beg; - return (idx & 1) != 0; -} - -template -constexpr T colorFromU32(uint32_t rgba) -{ - const auto r = static_cast((rgba >> 0) & 0xff) / 255.0f; - const auto g = static_cast((rgba >> 8) & 0xff) / 255.0f; - const auto b = static_cast((rgba >> 16) & 0xff) / 255.0f; - const auto a = static_cast((rgba >> 24) & 0xff) / 255.0f; - return { r, g, b, a }; -} - -using namespace Microsoft::Console::Render; +using namespace Microsoft::Console::Render::Atlas; #pragma region IRenderEngine @@ -63,1044 +32,197 @@ using namespace Microsoft::Console::Render; [[nodiscard]] HRESULT AtlasEngine::Present() noexcept try { - const til::rect fullRect{ 0, 0, _r.cellCount.x, _r.cellCount.y }; - - // A change in the selection or background color (etc.) forces a full redraw. - if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer) || _r.customPixelShader) - { - _r.dirtyRect = fullRect; - } - - if (!_r.dirtyRect) + if (!_p.dirtyRect) { return S_OK; } - // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: - // > For every frame it renders, the app should wait on this handle before starting any rendering operations. - // > Note that this requirement includes the first frame the app renders with the swap chain. - assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed); - - if (_r.d2dMode) [[unlikely]] - { - _d2dPresent(); - } - else + if (_p.dxgiFactory && !_p.dxgiFactory->IsCurrent()) { - _adjustAtlasSize(); - _processGlyphQueue(); - - // The values the constant buffer depends on are potentially updated after BeginPaint(). - if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) - { - _updateConstantBuffer(); - WI_ClearFlag(_r.invalidations, RenderInvalidations::ConstBuffer); - } - - { -#pragma warning(suppress : 26494) // Variable 'mapped' is uninitialized. Always initialize an object (type.5). - D3D11_MAPPED_SUBRESOURCE mapped; - THROW_IF_FAILED(_r.deviceContext->Map(_r.cellBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - assert(mapped.RowPitch >= _r.cells.size() * sizeof(Cell)); - memcpy(mapped.pData, _r.cells.data(), _r.cells.size() * sizeof(Cell)); - _r.deviceContext->Unmap(_r.cellBuffer.get(), 0); - } - - if (_r.customPixelShader) [[unlikely]] - { - _renderWithCustomShader(); - } - else - { - _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); - _r.deviceContext->Draw(3, 0); - } + _b.reset(); } - // See documentation for IDXGISwapChain2::GetFrameLatencyWaitableObject method: - // > For every frame it renders, the app should wait on this handle before starting any rendering operations. - // > Note that this requirement includes the first frame the app renders with the swap chain. - assert(debugGeneralPerformance || _r.frameLatencyWaitableObjectUsed); - - if (_r.dirtyRect != fullRect) + if (!_b) { - auto dirtyRectInPx = _r.dirtyRect; - dirtyRectInPx.left *= _r.fontMetrics.cellSize.x; - dirtyRectInPx.top *= _r.fontMetrics.cellSize.y; - dirtyRectInPx.right *= _r.fontMetrics.cellSize.x; - dirtyRectInPx.bottom *= _r.fontMetrics.cellSize.y; - - RECT scrollRect{}; - POINT scrollOffset{}; - DXGI_PRESENT_PARAMETERS params{ - .DirtyRectsCount = 1, - .pDirtyRects = dirtyRectInPx.as_win32_rect(), - }; - - if (_r.scrollOffset) - { - scrollRect = { - 0, - std::max(0, _r.scrollOffset), - _r.cellCount.x, - _r.cellCount.y + std::min(0, _r.scrollOffset), - }; - scrollOffset = { - 0, - _r.scrollOffset, - }; - - scrollRect.top *= _r.fontMetrics.cellSize.y; - scrollRect.right *= _r.fontMetrics.cellSize.x; - scrollRect.bottom *= _r.fontMetrics.cellSize.y; - - scrollOffset.y *= _r.fontMetrics.cellSize.y; - - params.pScrollRect = &scrollRect; - params.pScrollOffset = &scrollOffset; - } - - THROW_IF_FAILED(_r.swapChain->Present1(1, 0, ¶ms)); - } - else - { - THROW_IF_FAILED(_r.swapChain->Present(1, 0)); - } - - _r.waitForPresentation = true; - - if (!_r.dxgiFactory->IsCurrent()) - { - WI_SetFlag(_api.invalidations, ApiInvalidations::Device); + _recreateBackend(); } + _b->Render(_p); return S_OK; } catch (const wil::ResultException& exception) { - // TODO: this writes to _api. - return _handleException(exception); + if (_p.warningCallback) + { + try + { + _p.warningCallback(exception.GetErrorCode()); + } + CATCH_LOG() + } + + _p.dxgiFactory.reset(); + _b.reset(); + return E_PENDING; // Indicate a retry to the renderer } CATCH_RETURN() [[nodiscard]] bool AtlasEngine::RequiresContinuousRedraw() noexcept { - return debugGeneralPerformance || _r.requiresContinuousRedraw; + return debugContinuousRedraw || (_b && _b->RequiresContinuousRedraw()); } void AtlasEngine::WaitUntilCanRender() noexcept { - // IDXGISwapChain2::GetFrameLatencyWaitableObject returns an auto-reset event. - // Once we've waited on the event, waiting on it again will block until the timeout elapses. - // _r.waitForPresentation guards against this. - if (!debugGeneralPerformance && std::exchange(_r.waitForPresentation, false)) + if (_b) { - WaitForSingleObjectEx(_r.frameLatencyWaitableObject.get(), 100, true); -#ifndef NDEBUG - _r.frameLatencyWaitableObjectUsed = true; -#endif + _b->WaitUntilCanRender(); } } #pragma endregion -void AtlasEngine::_renderWithCustomShader() const -{ - // Render with our main shader just like Present(). - { - // OM: Output Merger - _r.deviceContext->OMSetRenderTargets(1, _r.customOffscreenTextureTargetView.addressof(), nullptr); - _r.deviceContext->Draw(3, 0); - } - - // Update the custom shader's constant buffer. - { - CustomConstBuffer data; - data.time = std::chrono::duration(std::chrono::steady_clock::now() - _r.customShaderStartTime).count(); - data.scale = _r.pixelPerDIP; - data.resolution.x = static_cast(_r.cellCount.x * _r.fontMetrics.cellSize.x); - data.resolution.y = static_cast(_r.cellCount.y * _r.fontMetrics.cellSize.y); - data.background = colorFromU32(_r.backgroundColor); - -#pragma warning(suppress : 26494) // Variable 'mapped' is uninitialized. Always initialize an object (type.5). - D3D11_MAPPED_SUBRESOURCE mapped; - THROW_IF_FAILED(_r.deviceContext->Map(_r.customShaderConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - assert(mapped.RowPitch >= sizeof(data)); - memcpy(mapped.pData, &data, sizeof(data)); - _r.deviceContext->Unmap(_r.customShaderConstantBuffer.get(), 0); - } - - // Render with the custom shader. - { - // OM: Output Merger - // customOffscreenTextureView was just rendered to via customOffscreenTextureTargetView and is - // set as the output target. Before we can use it as an input we have to remove it as an output. - _r.deviceContext->OMSetRenderTargets(1, _r.renderTargetView.addressof(), nullptr); - - // VS: Vertex Shader - _r.deviceContext->VSSetShader(_r.customVertexShader.get(), nullptr, 0); - - // PS: Pixel Shader - _r.deviceContext->PSSetShader(_r.customPixelShader.get(), nullptr, 0); - _r.deviceContext->PSSetConstantBuffers(0, 1, _r.customShaderConstantBuffer.addressof()); - _r.deviceContext->PSSetShaderResources(0, 1, _r.customOffscreenTextureView.addressof()); - _r.deviceContext->PSSetSamplers(0, 1, _r.customShaderSamplerState.addressof()); - - _r.deviceContext->Draw(4, 0); - } - - // For the next frame we need to restore our context state. - { - // VS: Vertex Shader - _r.deviceContext->VSSetShader(_r.vertexShader.get(), nullptr, 0); - - // PS: Pixel Shader - _r.deviceContext->PSSetShader(_r.pixelShader.get(), nullptr, 0); - _r.deviceContext->PSSetConstantBuffers(0, 1, _r.constantBuffer.addressof()); - const std::array resources{ _r.cellView.get(), _r.atlasView.get() }; - _r.deviceContext->PSSetShaderResources(0, gsl::narrow_cast(resources.size()), resources.data()); - _r.deviceContext->PSSetSamplers(0, 0, nullptr); - } -} - -void AtlasEngine::_setShaderResources() const -{ - // IA: Input Assembler - // Our vertex shader uses a trick from Bill Bilodeau published in - // "Vertex Shader Tricks" at GDC14 to draw a fullscreen triangle - // without vertex/index buffers. This prepares our context for this. - _r.deviceContext->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); - _r.deviceContext->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); - _r.deviceContext->IASetInputLayout(nullptr); - _r.deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - - // VS: Vertex Shader - _r.deviceContext->VSSetShader(_r.vertexShader.get(), nullptr, 0); - - // PS: Pixel Shader - _r.deviceContext->PSSetShader(_r.pixelShader.get(), nullptr, 0); - _r.deviceContext->PSSetConstantBuffers(0, 1, _r.constantBuffer.addressof()); - const std::array resources{ _r.cellView.get(), _r.atlasView.get() }; - _r.deviceContext->PSSetShaderResources(0, gsl::narrow_cast(resources.size()), resources.data()); -} - -void AtlasEngine::_updateConstantBuffer() const noexcept +void AtlasEngine::_recreateBackend() { - const auto useClearType = _api.realizedAntialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; - - ConstBuffer data; - data.viewport.x = 0; - data.viewport.y = 0; - data.viewport.z = static_cast(_r.cellCount.x * _r.fontMetrics.cellSize.x); - data.viewport.w = static_cast(_r.cellCount.y * _r.fontMetrics.cellSize.y); - DWrite_GetGammaRatios(_r.gamma, data.gammaRatios); - data.enhancedContrast = useClearType ? _r.cleartypeEnhancedContrast : _r.grayscaleEnhancedContrast; - data.cellCountX = _r.cellCount.x; - data.cellSize.x = _r.fontMetrics.cellSize.x; - data.cellSize.y = _r.fontMetrics.cellSize.y; - data.underlinePos = _r.fontMetrics.underlinePos; - data.underlineWidth = _r.fontMetrics.underlineWidth; - data.strikethroughPos = _r.fontMetrics.strikethroughPos; - data.strikethroughWidth = _r.fontMetrics.strikethroughWidth; - data.doubleUnderlinePos.x = _r.fontMetrics.doubleUnderlinePos.x; - data.doubleUnderlinePos.y = _r.fontMetrics.doubleUnderlinePos.y; - data.thinLineWidth = _r.fontMetrics.thinLineWidth; - data.backgroundColor = _r.backgroundColor; - data.cursorColor = _r.cursorOptions.cursorColor; - data.selectionColor = _r.selectionColor; - data.useClearType = useClearType; -#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function '...' which may throw exceptions (f.6). - _r.deviceContext->UpdateSubresource(_r.constantBuffer.get(), 0, nullptr, &data, 0, 0); -} - -void AtlasEngine::_adjustAtlasSize() -{ - // Only grow the atlas texture if our tileAllocator needs it to be larger. - // We have no way of shrinking our tileAllocator at the moment, - // so technically a `requiredSize != _r.atlasSizeInPixel` - // comparison would be sufficient, but better safe than sorry. - const auto requiredSize = _r.tileAllocator.size(); - if (requiredSize.y <= _r.atlasSizeInPixel.y && requiredSize.x <= _r.atlasSizeInPixel.x) - { - return; - } - - wil::com_ptr atlasBuffer; - wil::com_ptr atlasView; - { - D3D11_TEXTURE2D_DESC desc{}; - desc.Width = requiredSize.x; - desc.Height = requiredSize.y; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; - desc.SampleDesc = { 1, 0 }; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; - THROW_IF_FAILED(_r.device->CreateTexture2D(&desc, nullptr, atlasBuffer.addressof())); - THROW_IF_FAILED(_r.device->CreateShaderResourceView(atlasBuffer.get(), nullptr, atlasView.addressof())); - } - - // If a _r.atlasBuffer already existed, we can copy its glyphs - // over to the new texture without re-rendering everything. - const auto copyFromExisting = _r.atlasSizeInPixel != u16x2{}; - if (copyFromExisting) - { - D3D11_BOX box; - box.left = 0; - box.top = 0; - box.front = 0; - box.right = _r.atlasSizeInPixel.x; - box.bottom = _r.atlasSizeInPixel.y; - box.back = 1; - _r.deviceContext->CopySubresourceRegion1(atlasBuffer.get(), 0, 0, 0, 0, _r.atlasBuffer.get(), 0, &box, D3D11_COPY_NO_OVERWRITE); - } - - { - const auto surface = atlasBuffer.query(); - - wil::com_ptr renderingParams; - DWrite_GetRenderParams(_sr.dwriteFactory.get(), &_r.gamma, &_r.cleartypeEnhancedContrast, &_r.grayscaleEnhancedContrast, renderingParams.addressof()); - - D2D1_RENDER_TARGET_PROPERTIES props{}; - props.type = D2D1_RENDER_TARGET_TYPE_DEFAULT; - props.pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }; - props.dpiX = static_cast(_r.dpi); - props.dpiY = static_cast(_r.dpi); - wil::com_ptr renderTarget; - THROW_IF_FAILED(_sr.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); - _r.d2dRenderTarget = renderTarget.query(); - - // We don't really use D2D for anything except DWrite, but it - // can't hurt to ensure that everything it does is pixel aligned. - _r.d2dRenderTarget->SetAntialiasMode(D2D1_ANTIALIAS_MODE_ALIASED); - // In case _api.realizedAntialiasingMode is D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE we'll - // continuously adjust it in AtlasEngine::_drawGlyph. See _drawGlyph. - _r.d2dRenderTarget->SetTextAntialiasMode(static_cast(_api.realizedAntialiasingMode)); - // Ensure that D2D uses the exact same gamma as our shader uses. - _r.d2dRenderTarget->SetTextRenderingParams(renderingParams.get()); - } - { - static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; - THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _r.brush.put())); - _r.brushColor = 0xffffffff; - } - - _r.atlasSizeInPixel = requiredSize; - _r.atlasBuffer = std::move(atlasBuffer); - _r.atlasView = std::move(atlasView); - _setShaderResources(); - - WI_SetAllFlags(_r.invalidations, RenderInvalidations::ConstBuffer); - WI_SetFlagIf(_r.invalidations, RenderInvalidations::Cursor, !copyFromExisting); -} - -void AtlasEngine::_processGlyphQueue() -{ - if (_r.glyphQueue.empty() && WI_IsFlagClear(_r.invalidations, RenderInvalidations::Cursor)) - { - return; - } - - _r.d2dRenderTarget->BeginDraw(); - - if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::Cursor)) - { - _drawCursor({ 0, 0, 1, 1 }, 0xffffffff, true); - WI_ClearFlag(_r.invalidations, RenderInvalidations::Cursor); - } - - for (const auto& it : _r.glyphQueue) - { - _drawGlyph(it); - } - _r.glyphQueue.clear(); - - THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); -} - -void AtlasEngine::_drawGlyph(const TileHashMap::iterator& it) const -{ - const auto key = it->first.data(); - const auto value = it->second.data(); - const auto coords = &value->coords[0]; - const auto charsLength = key->charCount; - const auto cellCount = key->attributes.cellCount; - const auto textFormat = _getTextFormat(key->attributes.bold, key->attributes.italic); - const auto coloredGlyph = WI_IsFlagSet(value->flags, CellFlags::ColoredGlyph); - const auto cachedLayout = _getCachedGlyphLayout(&key->chars[0], charsLength, cellCount, textFormat, coloredGlyph); - - // Colored glyphs cannot be drawn in linear gamma. - // That's why we're simply alpha-blending them in the shader. - // In order for this to work correctly we have to prevent them from being drawn - // with ClearType, because we would then lack the alpha channel for the glyphs. - if (_api.realizedAntialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE) - { - _r.d2dRenderTarget->SetTextAntialiasMode(coloredGlyph ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE); - } - - for (u16 i = 0; i < cellCount; ++i) - { - const auto coord = coords[i]; - - D2D1_RECT_F rect; - rect.left = static_cast(coord.x) * _r.dipPerPixel; - rect.top = static_cast(coord.y) * _r.dipPerPixel; - rect.right = rect.left + _r.cellSizeDIP.x; - rect.bottom = rect.top + _r.cellSizeDIP.y; - - D2D1_POINT_2F origin; - origin.x = rect.left - i * _r.cellSizeDIP.x; - origin.y = rect.top; - - _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); - _r.d2dRenderTarget->Clear(); - - cachedLayout.applyScaling(_r.d2dRenderTarget.get(), origin); - - // Now that we're done using origin to calculate the center point for our transformation - // we can use it for its intended purpose to slightly shift the glyph around. - origin.x += cachedLayout.offset.x; - origin.y += cachedLayout.offset.y; - _r.d2dRenderTarget->DrawTextLayout(origin, cachedLayout.textLayout.get(), _r.brush.get(), cachedLayout.options); - - cachedLayout.undoScaling(_r.d2dRenderTarget.get()); - - _r.d2dRenderTarget->PopAxisAlignedClip(); - } -} - -AtlasEngine::CachedGlyphLayout AtlasEngine::_getCachedGlyphLayout(const wchar_t* chars, u16 charsLength, u16 cellCount, IDWriteTextFormat* textFormat, bool coloredGlyph) const -{ - const f32x2 layoutBox{ cellCount * _r.cellSizeDIP.x, _r.cellSizeDIP.y }; - bool scalingRequired = false; - f32x2 offset{ 0, 0 }; - f32x2 scale{ 1, 1 }; - f32x2 scaleCenter; - - // See D2DFactory::DrawText - wil::com_ptr textLayout; - THROW_IF_FAILED(_sr.dwriteFactory->CreateTextLayout(chars, charsLength, textFormat, layoutBox.x, layoutBox.y, textLayout.addressof())); - if (_r.typography) - { - textLayout->SetTypography(_r.typography.get(), { 0, charsLength }); - } - - // Block Element and Box Drawing characters need to be handled separately, - // because unlike regular ones they're supposed to fill the entire layout box. - // - // Ranges: - // * 0x2500-0x257F: Box Drawing - // * 0x2580-0x259F: Block Elements - // * 0xE0A0-0xE0A3,0xE0B0-0xE0C8,0xE0CA-0xE0CA,0xE0CC-0xE0D4: PowerLine - // (https://github.com/ryanoasis/nerd-fonts/wiki/Glyph-Sets-and-Code-Points#powerline-symbols) - // - // The following `blockCharacters` forms a so called "inversion list". - static constexpr std::array blockCharacters{ - // clang-format off - L'\u2500', L'\u2580', - L'\u2580', L'\u25A0', - L'\uE0A0', L'\uE0A4', - L'\uE0B0', L'\uE0C9', - L'\uE0CA', L'\uE0CB', - L'\uE0CC', L'\uE0D5', - // clang-format on - }; - - if (charsLength == 1 && isInInversionList(blockCharacters, chars[0])) +#ifndef NDEBUG + if (IsDebuggerPresent()) { - wil::com_ptr fontCollection; - THROW_IF_FAILED(textFormat->GetFontCollection(fontCollection.addressof())); - const auto baseWeight = textFormat->GetFontWeight(); - const auto baseStyle = textFormat->GetFontStyle(); - - TextAnalysisSource analysisSource{ chars, 1 }; - UINT32 mappedLength = 0; - wil::com_ptr mappedFont; - FLOAT mappedScale = 0; - THROW_IF_FAILED(_sr.systemFontFallback->MapCharacters( - /* analysisSource */ &analysisSource, - /* textPosition */ 0, - /* textLength */ 1, - /* baseFontCollection */ fontCollection.get(), - /* baseFamilyName */ _r.fontMetrics.fontName.data(), - /* baseWeight */ baseWeight, - /* baseStyle */ baseStyle, - /* baseStretch */ DWRITE_FONT_STRETCH_NORMAL, - /* mappedLength */ &mappedLength, - /* mappedFont */ mappedFont.addressof(), - /* scale */ &mappedScale)); - - if (mappedFont) + // DXGIGetDebugInterface1 returns E_NOINTERFACE on systems without the Windows SDK installed. + if (wil::com_ptr infoQueue; SUCCEEDED_LOG(DXGIGetDebugInterface1(0, IID_PPV_ARGS(infoQueue.addressof())))) { - wil::com_ptr fontFace; - THROW_IF_FAILED(mappedFont->CreateFontFace(fontFace.addressof())); - - // Don't adjust the size of block glyphs that are part of the user's chosen font. - if (std::ranges::find(_r.fontFaces, fontFace) == std::end(_r.fontFaces)) + // I didn't want to link with dxguid.lib just for getting DXGI_DEBUG_ALL. This GUID is publicly documented. + static constexpr GUID dxgiDebugAll{ 0xe48ae283, 0xda80, 0x490b, { 0x87, 0xe6, 0x43, 0xe9, 0xa9, 0xcf, 0xda, 0x8 } }; + for (const auto severity : { DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_INFO }) { - DWRITE_FONT_METRICS metrics; - fontFace->GetMetrics(&metrics); - - static constexpr u32 codePoint = L'\u2588'; // Full Block character - u16 glyphIndex; - THROW_IF_FAILED(fontFace->GetGlyphIndicesW(&codePoint, 1, &glyphIndex)); - - if (glyphIndex) - { - DWRITE_GLYPH_METRICS glyphMetrics; - THROW_IF_FAILED(fontFace->GetDesignGlyphMetrics(&glyphIndex, 1, &glyphMetrics)); - - const auto fontScale = _r.fontMetrics.fontSizeInDIP / metrics.designUnitsPerEm; - - // How-to-DWRITE_OVERHANG_METRICS given a single glyph: - DWRITE_OVERHANG_METRICS overhang; - overhang.left = static_cast(glyphMetrics.leftSideBearing) * fontScale; - overhang.top = static_cast(glyphMetrics.verticalOriginY - glyphMetrics.topSideBearing) * fontScale - _r.fontMetrics.baselineInDIP; - overhang.right = static_cast(gsl::narrow_cast(glyphMetrics.advanceWidth) - glyphMetrics.rightSideBearing) * fontScale - layoutBox.x; - overhang.bottom = static_cast(gsl::narrow_cast(glyphMetrics.advanceHeight) - glyphMetrics.verticalOriginY - glyphMetrics.bottomSideBearing) * fontScale + _r.fontMetrics.baselineInDIP - layoutBox.y; - - scalingRequired = true; - // Center glyphs. - offset.x = (overhang.left - overhang.right) * 0.5f; - offset.y = (overhang.top - overhang.bottom) * 0.5f; - // We always want box drawing glyphs to exactly match the size of a terminal cell. - // But add 1px to the destination size, so that we don't end up with fractional pixels. - scale.x = (layoutBox.x + _r.pixelPerDIP) / (layoutBox.x + overhang.left + overhang.right); - scale.y = (layoutBox.y + _r.pixelPerDIP) / (layoutBox.y + overhang.top + overhang.bottom); - // Now that the glyph is in the center of the cell thanks - // to the offset, the scaleCenter is center of the cell. - scaleCenter.x = layoutBox.x * 0.5f; - scaleCenter.y = layoutBox.y * 0.5f; - } + infoQueue->SetBreakOnSeverity(dxgiDebugAll, severity, true); } } } - else - { - DWRITE_OVERHANG_METRICS overhang; - THROW_IF_FAILED(textLayout->GetOverhangMetrics(&overhang)); - - auto actualSizeX = layoutBox.x + overhang.left + overhang.right; - - // Long glyphs should be drawn with their proper design size, even if that makes them a bit blurry, - // because otherwise we fail to support "pseudo" block characters like the "===" ligature in Cascadia Code. - // If we didn't force upscale that ligatures it would seemingly shrink shorter and shorter, as its - // glyph advance is often slightly shorter by a fractional pixel or two compared to our terminal's cells. - // It's a trade off that keeps most glyphs "crisp" while retaining support for things like "===". - // At least I can't think of any better heuristic for this at the moment... - if (cellCount > 2) - { - const auto advanceScale = _r.fontMetrics.advanceScale; - scalingRequired = true; - scale = { advanceScale, advanceScale }; - actualSizeX *= advanceScale; - } - - // We need to offset glyphs that are simply outside of our layout box (layoutBox.x/.y) - // and additionally downsize glyphs that are entirely too large to fit in. - // The DWRITE_OVERHANG_METRICS will tell us how many DIPs the layout box is too large/small. - // It contains a positive number if the glyph is outside and a negative one if it's inside - // the layout box. For example, given a layoutBox.x/.y (and cell size) of 20/30: - // * "M" is the "largest" ASCII character and might be: - // left: -0.6f - // right: -0.6f - // top: -7.6f - // bottom: -7.4f - // "M" doesn't fill the layout box at all! - // This is because we've rounded up the Terminal's cell size to whole pixels in - // _resolveFontMetrics. top/bottom margins are fairly large because we added the - // chosen font's ascender, descender and line gap metrics to get our line height. - // --> offsetX = 0 - // --> offsetY = 0 - // --> scale = 1 - // * The bar diacritic (U+0336 combining long stroke overlay) - // left: -9.0f - // top: -16.3f - // right: 5.6f - // bottom: -11.7f - // right is positive! Our glyph is 5.6 DIPs outside of the layout box and would - // appear cut off during rendering. left is negative at -9, which indicates that - // we can simply shift the glyph by 5.6 DIPs to the left to fit it into our bounds. - // --> offsetX = -5.6f - // --> offsetY = 0 - // --> scale = 1 - // * Any wide emoji in a narrow cell (U+26A0 warning sign) - // left: 6.7f - // top: -4.1f - // right: 6.7f - // bottom: -3.0f - // Our emoji is outside the bounds on both the left and right side and we need to shrink it. - // --> offsetX = 0 - // --> offsetY = 0 - // --> scale = layoutBox.y / (layoutBox.y + left + right) - // = 0.69f - offset.x = std::max(0.0f, overhang.left) - std::max(0.0f, overhang.right); - scaleCenter.x = offset.x; - scaleCenter.y = _r.fontMetrics.baselineInDIP; +#endif - if ((actualSizeX - layoutBox.x) > _r.dipPerPixel) - { - scalingRequired = true; - offset.x = (overhang.left - overhang.right) * 0.5f; - scale.x = layoutBox.x / actualSizeX; - scale.y = scale.x; - scaleCenter.x = layoutBox.x * 0.5f; - } - if (overhang.top > _r.dipPerPixel || overhang.bottom > _r.dipPerPixel) + // Tell the OS that we're resilient to graphics device removal. Docs say: + // > This function should be called once per process and before any device creation. + if (const auto module = GetModuleHandleW(L"dxgi.dll")) + { + if (const auto func = GetProcAddressByFunctionDeclaration(module, DXGIDeclareAdapterRemovalSupport)) { - const auto descend = _r.cellSizeDIP.y - _r.fontMetrics.baselineInDIP; - const auto scaleTop = _r.fontMetrics.baselineInDIP / (_r.fontMetrics.baselineInDIP + overhang.top); - const auto scaleBottom = descend / (descend + overhang.bottom); - scalingRequired = true; - scale.x = std::min(scale.x, std::min(scaleTop, scaleBottom)); - scale.y = scale.x; + func(); } } - auto options = D2D1_DRAW_TEXT_OPTIONS_NONE; - // D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT enables a bunch of internal machinery - // which doesn't have to run if we know we can't use it anyways in the shader. - WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_ENABLE_COLOR_FONT, coloredGlyph); - // !!! IMPORTANT !!! - // DirectWrite/2D snaps the baseline to whole pixels, which is something we technically - // want (it makes text look crisp), but fails in weird ways if `scalingRequired` is true. - // As our scaling matrix's dx/dy (center point) is based on the `origin` coordinates - // each cell we draw gets a unique, fractional baseline which gets rounded differently. - // I'm not 100% sure why that happens, since `origin` is always in full pixels... - // But this causes wide glyphs to draw as tiles that are potentially misaligned vertically by a pixel. - // The resulting text rendering looks especially bad for ligatures like "====" in Cascadia Code, - // where every single "=" might be blatantly misaligned vertically (same for any box drawings). - WI_SetFlagIf(options, D2D1_DRAW_TEXT_OPTIONS_NO_SNAP, scalingRequired); - - // ClearType basically has a 3x higher horizontal resolution. To make our glyphs render the same everywhere, - // it's probably for the best to ensure we initially rasterize them on a whole pixel boundary. - // (https://en.wikipedia.org/wiki/ClearType#How_ClearType_works) - offset.x = roundf(offset.x * _r.pixelPerDIP) * _r.dipPerPixel; - // As explained below, we use D2D1_DRAW_TEXT_OPTIONS_NO_SNAP to prevent a weird issue with baseline snapping. - // But we do want it technically, so this re-implements baseline snapping... I think? - offset.y = roundf(offset.y * _r.pixelPerDIP) * _r.dipPerPixel; - - return CachedGlyphLayout{ - .textLayout = textLayout, - .offset = offset, - .scale = scale, - .scaleCenter = scaleCenter, - .options = options, - .scalingRequired = scalingRequired, - }; -} - -void AtlasEngine::_drawCursor(u16r rect, u32 color, bool clear) -{ - // lineWidth is in D2D's DIPs. For instance if we have a 150-200% zoom scale we want to draw a 2px wide line. - // At 150% scale lineWidth thus needs to be 1.33333... because at a zoom scale of 1.5 this results in a 2px wide line. - const auto lineWidth = std::max(1.0f, static_cast((_r.dpi + USER_DEFAULT_SCREEN_DPI / 2) / USER_DEFAULT_SCREEN_DPI * USER_DEFAULT_SCREEN_DPI) / static_cast(_r.dpi)); - const auto cursorType = static_cast(_r.cursorOptions.cursorType); - - // `clip` is the rectangle within our texture atlas that's reserved for our cursor texture, ... - D2D1_RECT_F clip; - clip.left = static_cast(rect.left) * _r.cellSizeDIP.x; - clip.top = static_cast(rect.top) * _r.cellSizeDIP.y; - clip.right = static_cast(rect.right) * _r.cellSizeDIP.x; - clip.bottom = static_cast(rect.bottom) * _r.cellSizeDIP.y; - - // ... whereas `rect` is just the visible (= usually white) portion of our cursor. - auto box = clip; - - switch (cursorType) - { - case CursorType::Legacy: - box.top = box.bottom - _r.cellSizeDIP.y * static_cast(_r.cursorOptions.heightPercentage) / 100.0f; - break; - case CursorType::VerticalBar: - box.right = box.left + lineWidth; - break; - case CursorType::EmptyBox: - { - // EmptyBox is drawn as a line and unlike filled rectangles those are drawn centered on their - // coordinates in such a way that the line border extends half the width to each side. - // --> Our coordinates have to be 0.5 DIP off in order to draw a 2px line on a 200% scaling. - const auto halfWidth = lineWidth / 2.0f; - box.left += halfWidth; - box.top += halfWidth; - box.right -= halfWidth; - box.bottom -= halfWidth; - break; - } - case CursorType::Underscore: - case CursorType::DoubleUnderscore: - box.top = box.bottom - lineWidth; - break; - default: - break; - } - - const auto brush = _brushWithColor(color); - - // We need to clip the area we draw in to ensure we don't - // accidentally draw into any neighboring texture atlas tiles. - _r.d2dRenderTarget->PushAxisAlignedClip(&clip, D2D1_ANTIALIAS_MODE_ALIASED); - - if (clear) - { - _r.d2dRenderTarget->Clear(); - } - - if (cursorType == CursorType::EmptyBox) - { - _r.d2dRenderTarget->DrawRectangle(&box, brush, lineWidth); - } - else - { - _r.d2dRenderTarget->FillRectangle(&box, brush); - } - - if (cursorType == CursorType::DoubleUnderscore) - { - const auto offset = lineWidth * 2.0f; - box.top -= offset; - box.bottom -= offset; - _r.d2dRenderTarget->FillRectangle(&box, brush); - } - - _r.d2dRenderTarget->PopAxisAlignedClip(); -} - -ID2D1Brush* AtlasEngine::_brushWithColor(u32 color) -{ - if (_r.brushColor != color) - { - const auto d2dColor = colorFromU32(color); - THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&d2dColor, nullptr, _r.brush.put())); - _r.brushColor = color; - } - return _r.brush.get(); -} - -AtlasEngine::CachedGlyphLayout::operator bool() const noexcept -{ - return static_cast(textLayout); -} - -void AtlasEngine::CachedGlyphLayout::reset() noexcept -{ - textLayout.reset(); -} - -void AtlasEngine::CachedGlyphLayout::applyScaling(ID2D1RenderTarget* d2dRenderTarget, D2D1_POINT_2F origin) const noexcept -{ - __assume(d2dRenderTarget != nullptr); - - if (scalingRequired) - { - const D2D1_MATRIX_3X2_F transform{ - scale.x, - 0, - 0, - scale.y, - (origin.x + scaleCenter.x) * (1.0f - scale.x), - (origin.y + scaleCenter.y) * (1.0f - scale.y), - }; - d2dRenderTarget->SetTransform(&transform); - } -} - -void AtlasEngine::CachedGlyphLayout::undoScaling(ID2D1RenderTarget* d2dRenderTarget) const noexcept -{ - __assume(d2dRenderTarget != nullptr); - - if (scalingRequired) - { - static constexpr D2D1_MATRIX_3X2_F identity{ 1, 0, 0, 1, 0, 0 }; - d2dRenderTarget->SetTransform(&identity); - } -} - -void AtlasEngine::_d2dPresent() -{ - if (!_r.d2dRenderTarget) - { - _d2dCreateRenderTarget(); - } - - _d2dDrawDirtyArea(); - - _r.glyphQueue.clear(); - WI_ClearAllFlags(_r.invalidations, RenderInvalidations::Cursor | RenderInvalidations::ConstBuffer); -} - -void AtlasEngine::_d2dCreateRenderTarget() -{ - { - wil::com_ptr buffer; - THROW_IF_FAILED(_r.swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); - - const auto surface = buffer.query(); - - D2D1_RENDER_TARGET_PROPERTIES props{}; - props.type = D2D1_RENDER_TARGET_TYPE_DEFAULT; - props.pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }; - props.dpiX = static_cast(_r.dpi); - props.dpiY = static_cast(_r.dpi); - wil::com_ptr renderTarget; - THROW_IF_FAILED(_sr.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); - _r.d2dRenderTarget = renderTarget.query(); - - // In case _api.realizedAntialiasingMode is D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE we'll - // continuously adjust it in AtlasEngine::_drawGlyph. See _drawGlyph. - _r.d2dRenderTarget->SetTextAntialiasMode(static_cast(_api.realizedAntialiasingMode)); - } - { - static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; - THROW_IF_FAILED(_r.d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _r.brush.put())); - _r.brushColor = 0xffffffff; - } -} - -void AtlasEngine::_d2dDrawDirtyArea() -{ - struct CellFlagHandler - { - CellFlags filter; - decltype(&AtlasEngine::_d2dCellFlagRendererCursor) func; - }; - - static constexpr std::array cellFlagHandlers{ - // Ordered by lowest to highest "layer". - // The selection for instance is drawn on top of underlines, not under them. - CellFlagHandler{ CellFlags::Underline, &AtlasEngine::_d2dCellFlagRendererUnderline }, - CellFlagHandler{ CellFlags::UnderlineDotted, &AtlasEngine::_d2dCellFlagRendererUnderlineDotted }, - CellFlagHandler{ CellFlags::UnderlineDouble, &AtlasEngine::_d2dCellFlagRendererUnderlineDouble }, - CellFlagHandler{ CellFlags::Strikethrough, &AtlasEngine::_d2dCellFlagRendererStrikethrough }, - CellFlagHandler{ CellFlags::Cursor, &AtlasEngine::_d2dCellFlagRendererCursor }, - CellFlagHandler{ CellFlags::Selected, &AtlasEngine::_d2dCellFlagRendererSelected }, - }; +#ifndef NDEBUG + static constexpr UINT flags = DXGI_CREATE_FACTORY_DEBUG; +#else + static constexpr UINT flags = 0; +#endif - auto left = gsl::narrow(_r.dirtyRect.left); - auto top = gsl::narrow(_r.dirtyRect.top); - auto right = gsl::narrow(_r.dirtyRect.right); - auto bottom = gsl::narrow(_r.dirtyRect.bottom); - if constexpr (debugGlyphGenerationPerformance) - { - left = 0; - top = 0; - right = _r.cellCount.x; - bottom = _r.cellCount.y; - } + THROW_IF_FAILED(CreateDXGIFactory2(flags, __uuidof(_p.dxgiFactory), _p.dxgiFactory.put_void())); - _r.d2dRenderTarget->BeginDraw(); + auto d2dMode = debugForceD2DMode; + auto deviceFlags = D3D11_CREATE_DEVICE_SINGLETHREADED +#ifndef NDEBUG + | D3D11_CREATE_DEVICE_DEBUG +#endif + // This flag prevents the driver from creating a large thread pool for things like shader computations + // that would be advantageous for games. For us this has only a minimal performance benefit, + // but comes with a large memory usage overhead. At the time of writing the Nvidia + // driver launches $cpu_thread_count more worker threads without this flag. + | D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS + // Direct2D support. + | D3D11_CREATE_DEVICE_BGRA_SUPPORT; - if (WI_IsFlagSet(_r.invalidations, RenderInvalidations::ConstBuffer)) - { - _r.d2dRenderTarget->Clear(colorFromU32(_r.backgroundColor)); - } + wil::com_ptr dxgiAdapter; + THROW_IF_FAILED(_p.dxgiFactory->EnumAdapters1(0, dxgiAdapter.addressof())); - for (u16 y = top; y < bottom; ++y) { - const Cell* cells = _getCell(0, y); - const TileHashMap::iterator* cellGlyphMappings = _getCellGlyphMapping(0, y); + auto findSoftwareAdapter = _p.s->target->useSoftwareRendering; + auto adapter = dxgiAdapter; + UINT i = 0; - // left/right might intersect a wide glyph. We have to extend left/right - // to include the entire glyph so that we can properly render it. - // Since a series of identical narrow glyphs (2 spaces for instance) are stored in cellGlyphMappings - // just like a single wide glyph (2 references to the same glyph in a row), the only way for us to - // know where wide glyphs begin and end is to iterate the entire row and use the stored `cellCount`. - u16 beg = 0; for (;;) { - const auto cellCount = cellGlyphMappings[beg]->first.data()->attributes.cellCount; - const auto begNext = gsl::narrow_cast(beg + cellCount); + DXGI_ADAPTER_DESC1 desc; + THROW_IF_FAILED(adapter->GetDesc1(&desc)); + + // Switch to D2D mode if any adapter is a remote adapter (RDP). + d2dMode |= WI_IsFlagSet(desc.Flags, DXGI_ADAPTER_FLAG_REMOTE); - if (begNext > left) + // If useSoftwareRendering is true we search for the first WARP adapter. + if (findSoftwareAdapter && WI_IsFlagSet(desc.Flags, DXGI_ADAPTER_FLAG_SOFTWARE)) { - break; + WI_ClearFlag(deviceFlags, D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS); + dxgiAdapter = std::move(adapter); + findSoftwareAdapter = false; } - beg = begNext; - } - auto end = beg; - for (;;) - { - const auto cellCount = cellGlyphMappings[end]->first.data()->attributes.cellCount; - end += cellCount; - - if (end >= right) + ++i; + if (_p.dxgiFactory->EnumAdapters1(i, adapter.put()) == DXGI_ERROR_NOT_FOUND) { break; } } + } - // Draw background. - { - _r.d2dRenderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_COPY); - - auto x1 = beg; - auto x2 = gsl::narrow_cast(x1 + 1); - auto currentColor = cells[x1].color.y; - - for (; x2 < end; ++x2) - { - const auto color = cells[x2].color.y; - - if (currentColor != color) - { - const u16r rect{ x1, y, x2, gsl::narrow_cast(y + 1) }; - _d2dFillRectangle(rect, currentColor); - x1 = x2; - currentColor = color; - } - } - - { - const u16r rect{ x1, y, x2, gsl::narrow_cast(y + 1) }; - _d2dFillRectangle(rect, currentColor); - } + wil::com_ptr device0; + wil::com_ptr deviceContext0; + D3D_FEATURE_LEVEL featureLevel{}; - _r.d2dRenderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_SOURCE_OVER); - } + static constexpr std::array featureLevels{ + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + D3D_FEATURE_LEVEL_9_3, + D3D_FEATURE_LEVEL_9_2, + D3D_FEATURE_LEVEL_9_1, + }; - // Draw text. - for (auto x = beg; x < end;) - { - const auto& it = cellGlyphMappings[x]; - const u16x2 coord{ x, y }; - const auto color = cells[x].color.x; - x += _d2dDrawGlyph(it, coord, color); - } + THROW_IF_FAILED(D3D11CreateDevice( + /* pAdapter */ dxgiAdapter.get(), + /* DriverType */ D3D_DRIVER_TYPE_UNKNOWN, + /* Software */ nullptr, + /* Flags */ deviceFlags, + /* pFeatureLevels */ featureLevels.data(), + /* FeatureLevels */ gsl::narrow_cast(featureLevels.size()), + /* SDKVersion */ D3D11_SDK_VERSION, + /* ppDevice */ device0.put(), + /* pFeatureLevel */ &featureLevel, + /* ppImmediateContext */ deviceContext0.put())); + + auto device = device0.query(); + auto deviceContext = deviceContext0.query(); - // Draw underlines, cursors, selections, etc. - for (const auto& handler : cellFlagHandlers) +#ifndef NDEBUG + if (IsDebuggerPresent()) + { + if (const auto d3dInfoQueue = device.try_query()) { - auto x1 = beg; - auto currentFlags = CellFlags::None; - - for (auto x2 = beg; x2 < end; ++x2) + for (const auto severity : { D3D11_MESSAGE_SEVERITY_CORRUPTION, D3D11_MESSAGE_SEVERITY_ERROR, D3D11_MESSAGE_SEVERITY_WARNING, D3D11_MESSAGE_SEVERITY_INFO }) { - const auto flags = cells[x2].flags & handler.filter; - - if (currentFlags != flags) - { - if (currentFlags != CellFlags::None) - { - const u16r rect{ x1, y, x2, gsl::narrow_cast(y + 1) }; - const auto color = cells[x1].color.x; - (this->*handler.func)(rect, color); - } - - x1 = x2; - currentFlags = flags; - } - } - - if (currentFlags != CellFlags::None) - { - const u16r rect{ x1, y, right, gsl::narrow_cast(y + 1) }; - const auto color = cells[x1].color.x; - (this->*handler.func)(rect, color); + d3dInfoQueue->SetBreakOnSeverity(severity, true); } } } +#endif - THROW_IF_FAILED(_r.d2dRenderTarget->EndDraw()); -} - -// See _drawGlyph() for reference. -AtlasEngine::u16 AtlasEngine::_d2dDrawGlyph(const TileHashMap::iterator& it, const u16x2 coord, const u32 color) -{ - const auto key = it->first.data(); - const auto value = it->second.data(); - const auto charsLength = key->charCount; - const auto cellCount = key->attributes.cellCount; - const auto textFormat = _getTextFormat(key->attributes.bold, key->attributes.italic); - const auto coloredGlyph = WI_IsFlagSet(value->flags, CellFlags::ColoredGlyph); - - auto& cachedLayout = it->second.cachedLayout; - if (!cachedLayout) + if (featureLevel < D3D_FEATURE_LEVEL_10_0) { - cachedLayout = _getCachedGlyphLayout(&key->chars[0], charsLength, cellCount, textFormat, coloredGlyph); + d2dMode = true; } - - D2D1_RECT_F rect; - rect.left = static_cast(coord.x) * _r.cellSizeDIP.x; - rect.top = static_cast(coord.y) * _r.cellSizeDIP.y; - rect.right = static_cast(coord.x + cellCount) * _r.cellSizeDIP.x; - rect.bottom = rect.top + _r.cellSizeDIP.y; - - D2D1_POINT_2F origin; - origin.x = rect.left; - origin.y = rect.top; - - _r.d2dRenderTarget->PushAxisAlignedClip(&rect, D2D1_ANTIALIAS_MODE_ALIASED); - - cachedLayout.applyScaling(_r.d2dRenderTarget.get(), origin); - - origin.x += cachedLayout.offset.x; - origin.y += cachedLayout.offset.y; - _r.d2dRenderTarget->DrawTextLayout(origin, cachedLayout.textLayout.get(), _brushWithColor(color), cachedLayout.options); - - cachedLayout.undoScaling(_r.d2dRenderTarget.get()); - - _r.d2dRenderTarget->PopAxisAlignedClip(); - - return cellCount; -} - -void AtlasEngine::_d2dDrawLine(u16r rect, u16 pos, u16 width, u32 color, ID2D1StrokeStyle* strokeStyle) -{ - const auto w = static_cast(width) * _r.dipPerPixel; - const auto y1 = static_cast(rect.top) * _r.cellSizeDIP.y + static_cast(pos) * _r.dipPerPixel + w * 0.5f; - const auto x1 = static_cast(rect.left) * _r.cellSizeDIP.x; - const auto x2 = static_cast(rect.right) * _r.cellSizeDIP.x; - const auto brush = _brushWithColor(color); - _r.d2dRenderTarget->DrawLine({ x1, y1 }, { x2, y1 }, brush, w, strokeStyle); -} - -void AtlasEngine::_d2dFillRectangle(u16r rect, u32 color) -{ - const D2D1_RECT_F r{ - .left = static_cast(rect.left) * _r.cellSizeDIP.x, - .top = static_cast(rect.top) * _r.cellSizeDIP.y, - .right = static_cast(rect.right) * _r.cellSizeDIP.x, - .bottom = static_cast(rect.bottom) * _r.cellSizeDIP.y, - }; - const auto brush = _brushWithColor(color); - _r.d2dRenderTarget->FillRectangle(r, brush); -} - -void AtlasEngine::_d2dCellFlagRendererCursor(u16r rect, u32 color) -{ - _drawCursor(rect, _r.cursorOptions.cursorColor, false); -} - -void AtlasEngine::_d2dCellFlagRendererSelected(u16r rect, u32 color) -{ - _d2dFillRectangle(rect, _r.selectionColor); -} - -void AtlasEngine::_d2dCellFlagRendererUnderline(u16r rect, u32 color) -{ - _d2dDrawLine(rect, _r.fontMetrics.underlinePos, _r.fontMetrics.underlineWidth, color); -} - -void AtlasEngine::_d2dCellFlagRendererUnderlineDotted(u16r rect, u32 color) -{ - if (!_r.dottedStrokeStyle) + else if (featureLevel < D3D_FEATURE_LEVEL_11_0) { - static constexpr D2D1_STROKE_STYLE_PROPERTIES props{ .dashStyle = D2D1_DASH_STYLE_CUSTOM }; - static constexpr FLOAT dashes[2]{ 1, 2 }; - THROW_IF_FAILED(_sr.d2dFactory->CreateStrokeStyle(&props, &dashes[0], 2, _r.dottedStrokeStyle.addressof())); + D3D11_FEATURE_DATA_D3D10_X_HARDWARE_OPTIONS options{}; + // I'm assuming if `CheckFeatureSupport` fails, it'll leave `options` untouched which will result in `d2dMode |= true`. + std::ignore = device->CheckFeatureSupport(D3D11_FEATURE_D3D10_X_HARDWARE_OPTIONS, &options, sizeof(options)); + d2dMode |= !options.ComputeShaders_Plus_RawAndStructuredBuffers_Via_Shader_4_x; } - _d2dDrawLine(rect, _r.fontMetrics.underlinePos, _r.fontMetrics.underlineWidth, color, _r.dottedStrokeStyle.get()); -} - -void AtlasEngine::_d2dCellFlagRendererUnderlineDouble(u16r rect, u32 color) -{ - _d2dDrawLine(rect, _r.fontMetrics.doubleUnderlinePos.x, _r.fontMetrics.thinLineWidth, color); - _d2dDrawLine(rect, _r.fontMetrics.doubleUnderlinePos.y, _r.fontMetrics.thinLineWidth, color); -} - -void AtlasEngine::_d2dCellFlagRendererStrikethrough(u16r rect, u32 color) -{ - _d2dDrawLine(rect, _r.fontMetrics.strikethroughPos, _r.fontMetrics.strikethroughWidth, color); + if (d2dMode) + { + _b = std::make_unique(std::move(device), std::move(deviceContext)); + } + else + { + _b = std::make_unique(std::move(device), std::move(deviceContext)); + } } diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp new file mode 100644 index 00000000000..5118ce7f454 --- /dev/null +++ b/src/renderer/atlas/Backend.cpp @@ -0,0 +1,339 @@ +#include "pch.h" +#include "Backend.h" + +TIL_FAST_MATH_BEGIN + +// Disable a bunch of warnings which get in the way of writing performant code. +#pragma warning(disable : 26429) // Symbol 'data' is never tested for nullness, it can be marked as not_null (f.23). +#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). +#pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). + +using namespace Microsoft::Console::Render::Atlas; + +wil::com_ptr SwapChainManager::GetBuffer() const +{ + wil::com_ptr buffer; + THROW_IF_FAILED(_swapChain->GetBuffer(0, __uuidof(ID3D11Texture2D), buffer.put_void())); + return buffer; +} + +void SwapChainManager::Present(const RenderingPayload& p) +{ + const til::rect fullRect{ 0, 0, p.s->cellCount.x, p.s->cellCount.y }; + + if (p.dirtyRect != fullRect) + { + auto dirtyRectInPx = p.dirtyRect; + dirtyRectInPx.left *= p.s->font->cellSize.x; + dirtyRectInPx.top *= p.s->font->cellSize.y; + dirtyRectInPx.right *= p.s->font->cellSize.x; + dirtyRectInPx.bottom *= p.s->font->cellSize.y; + + // This block will enlarge the dirtyRectInPx to handle glyphs that overlap their rows. + // TODO: This only works because we redraw the entire back buffer every frame. + const auto actualDirtyTop = gsl::at(p.rows, p.dirtyRect.top).top; + const auto actualDirtyBottom = gsl::at(p.rows, gsl::narrow_cast(p.dirtyRect.bottom) - 1).bottom; + // Since rows might be taller than their cells, they might have drawn outside of the viewport. + // This use of clamp() below avoids us from writing out of bounds coordinates into dirtyRectInPx. + dirtyRectInPx.top = clamp(actualDirtyTop, 0, dirtyRectInPx.top); + dirtyRectInPx.bottom = clamp(actualDirtyBottom, dirtyRectInPx.bottom, static_cast(_targetSize.y)); + + // TODO + //if (p.dirtyRect.right == fullRect.right) + //{ + // dirtyRectInPx.right = _targetSize.x; + //} + + RECT scrollRect{}; + POINT scrollOffset{}; + DXGI_PRESENT_PARAMETERS params{ + .DirtyRectsCount = 1, + .pDirtyRects = dirtyRectInPx.as_win32_rect(), + }; + + if (p.scrollOffset) + { + const auto offsetInPx = p.scrollOffset * p.s->font->cellSize.y; + const auto width = p.s->cellCount.x * p.s->font->cellSize.x; + const auto height = p.s->cellCount.y * p.s->font->cellSize.y; + const auto top = std::max(0, offsetInPx); + const auto bottom = height + std::min(0, offsetInPx); + + scrollRect = { 0, top, width, bottom }; + scrollOffset = { 0, offsetInPx }; + + params.pScrollRect = &scrollRect; + params.pScrollOffset = &scrollOffset; + } + + if (const auto hr = _swapChain->Present1(1, 0, ¶ms); FAILED(hr)) + { + __debugbreak(); + THROW_HR(hr); + } + } + else + { + THROW_IF_FAILED(_swapChain->Present(1, 0)); + } + + _waitForPresentation = true; +} + +void SwapChainManager::WaitUntilCanRender() noexcept +{ + // IDXGISwapChain2::GetFrameLatencyWaitableObject returns an auto-reset event. + // Once we've waited on the event, waiting on it again will block until the timeout elapses. + // _waitForPresentation guards against this. + if constexpr (!debugDisableFrameLatencyWaitableObject) + { + if (_waitForPresentation) + { + WaitForSingleObjectEx(_frameLatencyWaitableObject.get(), 100, true); + _waitForPresentation = false; + } + } +} + +void SwapChainManager::_createSwapChain(const RenderingPayload& p, IUnknown* device) +{ + _swapChain.reset(); + _frameLatencyWaitableObject.reset(); + + DXGI_SWAP_CHAIN_DESC1 desc{}; + desc.Width = p.s->targetSize.x; + desc.Height = p.s->targetSize.y; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.SampleDesc.Count = 1; + desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + // Sometimes up to 2 buffers are locked, for instance during screen capture or when moving the window. + // 3 buffers seems to guarantee a stable framerate at display frequency at all times. + desc.BufferCount = 3; + desc.Scaling = DXGI_SCALING_NONE; + // DXGI_SWAP_EFFECT_FLIP_DISCARD is a mode that was created at a time were display drivers + // lacked support for Multiplane Overlays (MPO) and were copying buffers was expensive. + // This allowed DWM to quickly draw overlays (like gamebars) on top of rendered content. + // With faster GPU memory in general and with support for MPO in particular this isn't + // really an advantage anymore. Instead DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL allows for a + // more "intelligent" composition and display updates to occur like Panel Self Refresh + // (PSR) which requires dirty rectangles (Present1 API) to work correctly. + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + // If our background is opaque we can enable "independent" flips by setting DXGI_ALPHA_MODE_IGNORE. + // As our swap chain won't have to compose with DWM anymore it reduces the display latency dramatically. + desc.AlphaMode = p.s->target->enableTransparentBackground ? DXGI_ALPHA_MODE_PREMULTIPLIED : DXGI_ALPHA_MODE_IGNORE; + desc.Flags = flags; + + wil::com_ptr swapChain0; + + if (p.s->target->hwnd) + { + desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; + THROW_IF_FAILED(p.dxgiFactory->CreateSwapChainForHwnd(device, p.s->target->hwnd, &desc, nullptr, nullptr, swapChain0.addressof())); + } + else + { + const auto module = GetModuleHandleW(L"dcomp.dll"); + const auto DCompositionCreateSurfaceHandle = GetProcAddressByFunctionDeclaration(module, DCompositionCreateSurfaceHandle); + THROW_LAST_ERROR_IF(!DCompositionCreateSurfaceHandle); + + // As per: https://docs.microsoft.com/en-us/windows/win32/api/dcomp/nf-dcomp-dcompositioncreatesurfacehandle + static constexpr DWORD COMPOSITIONSURFACE_ALL_ACCESS = 0x0003L; + THROW_IF_FAILED(DCompositionCreateSurfaceHandle(COMPOSITIONSURFACE_ALL_ACCESS, nullptr, _swapChainHandle.addressof())); + THROW_IF_FAILED(p.dxgiFactory.query()->CreateSwapChainForCompositionSurfaceHandle(device, _swapChainHandle.get(), &desc, nullptr, swapChain0.addressof())); + } + + _swapChain = swapChain0.query(); + _frameLatencyWaitableObject.reset(_swapChain->GetFrameLatencyWaitableObject()); + _targetGeneration = p.s->target.generation(); + _targetSize = p.s->targetSize; + _waitForPresentation = true; + + WaitUntilCanRender(); + + if (p.swapChainChangedCallback) + { + try + { + p.swapChainChangedCallback(_swapChainHandle.get()); + } + CATCH_LOG() + } +} + +void SwapChainManager::_updateMatrixTransform(const RenderingPayload& p) const +{ + // XAML's SwapChainPanel combines the worst of both worlds and always applies a transform to + // the swap chain to make it match the display scale. This if condition undoes the damage. + if (_fontGeneration != p.s->font.generation() && !p.s->target->hwnd) + { + const DXGI_MATRIX_3X2_F matrix{ + ._11 = p.d.font.dipPerPixel, + ._22 = p.d.font.dipPerPixel, + }; + THROW_IF_FAILED(_swapChain->SetMatrixTransform(&matrix)); + } +} + +// Returns the theoretical/design design size of the given `DWRITE_GLYPH_RUN`, relative the the given baseline origin. +f32r Microsoft::Console::Render::Atlas::GetGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY) +{ + DWRITE_FONT_METRICS fontMetrics; + glyphRun.fontFace->GetMetrics(&fontMetrics); + + std::unique_ptr glyphRunMetricsHeap; + std::array glyphRunMetricsStack; + DWRITE_GLYPH_METRICS* glyphRunMetrics = glyphRunMetricsStack.data(); + + if (glyphRun.glyphCount > glyphRunMetricsStack.size()) + { + glyphRunMetricsHeap = std::make_unique_for_overwrite(glyphRun.glyphCount); + glyphRunMetrics = glyphRunMetricsHeap.get(); + } + + glyphRun.fontFace->GetDesignGlyphMetrics(glyphRun.glyphIndices, glyphRun.glyphCount, glyphRunMetrics, false); + + f32 const fontScale = glyphRun.fontEmSize / fontMetrics.designUnitsPerEm; + f32r accumulatedBounds{ + FLT_MAX, + FLT_MAX, + FLT_MIN, + FLT_MIN, + }; + + for (uint32_t i = 0; i < glyphRun.glyphCount; ++i) + { + const auto& glyphMetrics = glyphRunMetrics[i]; + const auto glyphAdvance = glyphRun.glyphAdvances ? glyphRun.glyphAdvances[i] : glyphMetrics.advanceWidth * fontScale; + + const auto left = static_cast(glyphMetrics.leftSideBearing) * fontScale; + const auto top = static_cast(glyphMetrics.topSideBearing - glyphMetrics.verticalOriginY) * fontScale; + const auto right = static_cast(gsl::narrow_cast(glyphMetrics.advanceWidth) - glyphMetrics.rightSideBearing) * fontScale; + const auto bottom = static_cast(gsl::narrow_cast(glyphMetrics.advanceHeight) - glyphMetrics.bottomSideBearing - glyphMetrics.verticalOriginY) * fontScale; + + if (left < right && top < bottom) + { + auto glyphX = baselineX; + auto glyphY = baselineY; + if (glyphRun.glyphOffsets) + { + glyphX += glyphRun.glyphOffsets[i].advanceOffset; + glyphY -= glyphRun.glyphOffsets[i].ascenderOffset; + } + + accumulatedBounds.left = std::min(accumulatedBounds.left, left + glyphX); + accumulatedBounds.top = std::min(accumulatedBounds.top, top + glyphY); + accumulatedBounds.right = std::max(accumulatedBounds.right, right + glyphX); + accumulatedBounds.bottom = std::max(accumulatedBounds.bottom, bottom + glyphY); + } + + baselineX += glyphAdvance; + } + + return accumulatedBounds; +} + +// Draws a `DWRITE_GLYPH_RUN` at `baselineOrigin` into the given `ID2D1DeviceContext`. +// `d2dRenderTarget4` and `dwriteFactory4` are optional and used to draw colored glyphs. +// Returns true if the `DWRITE_GLYPH_RUN` contained a color glyph. +bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRenderTarget, ID2D1DeviceContext4* d2dRenderTarget4, IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) +{ + static constexpr auto measuringMode = DWRITE_MEASURING_MODE_NATURAL; + static constexpr auto formats = + DWRITE_GLYPH_IMAGE_FORMATS_TRUETYPE | + DWRITE_GLYPH_IMAGE_FORMATS_CFF | + DWRITE_GLYPH_IMAGE_FORMATS_COLR | + DWRITE_GLYPH_IMAGE_FORMATS_SVG | + DWRITE_GLYPH_IMAGE_FORMATS_PNG | + DWRITE_GLYPH_IMAGE_FORMATS_JPEG | + DWRITE_GLYPH_IMAGE_FORMATS_TIFF | + DWRITE_GLYPH_IMAGE_FORMATS_PREMULTIPLIED_B8G8R8A8; + + wil::com_ptr enumerator; + + // If ID2D1DeviceContext4 isn't supported, we'll exit early below. + auto hr = DWRITE_E_NOCOLOR; + + if (d2dRenderTarget4) + { + D2D_MATRIX_3X2_F transform; + d2dRenderTarget4->GetTransform(&transform); + f32 dpiX, dpiY; + d2dRenderTarget4->GetDpi(&dpiX, &dpiY); + transform = transform * D2D1::Matrix3x2F::Scale(dpiX, dpiY); + + // Support for ID2D1DeviceContext4 implies support for IDWriteFactory4. + // ID2D1DeviceContext4 is required for drawing below. + hr = dwriteFactory4->TranslateColorGlyphRun(baselineOrigin, glyphRun, nullptr, formats, measuringMode, nullptr, 0, &enumerator); + } + + if (hr == DWRITE_E_NOCOLOR) + { + d2dRenderTarget->DrawGlyphRun(baselineOrigin, glyphRun, foregroundBrush, measuringMode); + return false; + } + + THROW_IF_FAILED(hr); + + const auto previousAntialiasingMode = d2dRenderTarget4->GetTextAntialiasMode(); + d2dRenderTarget4->SetTextAntialiasMode(D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE); + const auto cleanup = wil::scope_exit([&]() { + d2dRenderTarget4->SetTextAntialiasMode(previousAntialiasingMode); + }); + + wil::com_ptr solidBrush; + + for (;;) + { + BOOL hasRun; + THROW_IF_FAILED(enumerator->MoveNext(&hasRun)); + if (!hasRun) + { + break; + } + + const DWRITE_COLOR_GLYPH_RUN1* colorGlyphRun; + THROW_IF_FAILED(enumerator->GetCurrentRun(&colorGlyphRun)); + + ID2D1Brush* runBrush = nullptr; + if (colorGlyphRun->paletteIndex == /*DWRITE_NO_PALETTE_INDEX*/ 0xffff) + { + runBrush = foregroundBrush; + } + else + { + if (!solidBrush) + { + THROW_IF_FAILED(d2dRenderTarget4->CreateSolidColorBrush(colorGlyphRun->runColor, &solidBrush)); + } + else + { + solidBrush->SetColor(colorGlyphRun->runColor); + } + runBrush = solidBrush.get(); + } + + switch (colorGlyphRun->glyphImageFormat) + { + case DWRITE_GLYPH_IMAGE_FORMATS_NONE: + break; + case DWRITE_GLYPH_IMAGE_FORMATS_PNG: + case DWRITE_GLYPH_IMAGE_FORMATS_JPEG: + case DWRITE_GLYPH_IMAGE_FORMATS_TIFF: + case DWRITE_GLYPH_IMAGE_FORMATS_PREMULTIPLIED_B8G8R8A8: + d2dRenderTarget4->DrawColorBitmapGlyphRun(colorGlyphRun->glyphImageFormat, baselineOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->measuringMode, D2D1_COLOR_BITMAP_GLYPH_SNAP_OPTION_DEFAULT); + break; + case DWRITE_GLYPH_IMAGE_FORMATS_SVG: + d2dRenderTarget4->DrawSvgGlyphRun(baselineOrigin, &colorGlyphRun->glyphRun, runBrush, nullptr, 0, colorGlyphRun->measuringMode); + break; + default: + d2dRenderTarget4->DrawGlyphRun(baselineOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->glyphRunDescription, runBrush, colorGlyphRun->measuringMode); + break; + } + } + + return true; +} + +TIL_FAST_MATH_END diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h new file mode 100644 index 00000000000..823c9a2b8e4 --- /dev/null +++ b/src/renderer/atlas/Backend.h @@ -0,0 +1,84 @@ +#pragma once + +#include "common.h" + +namespace Microsoft::Console::Render::Atlas +{ + inline constexpr bool debugContinuousRedraw = false; + inline constexpr bool debugDisableFrameLatencyWaitableObject = false; + inline constexpr bool debugDisablePartialInvalidation = false; + inline constexpr bool debugForceD2DMode = false; + + struct SwapChainManager + { + void UpdateSwapChainSettings(const RenderingPayload& p, IUnknown* device, auto&& prepareRecreate, auto&& prepareResize) + { + if (_targetGeneration != p.s->target.generation()) + { + if (_swapChain) + { + prepareRecreate(); + } + _createSwapChain(p, device); + } + else if (_targetSize != p.s->targetSize) + { + prepareResize(); + THROW_IF_FAILED(_swapChain->ResizeBuffers(0, _targetSize.x, _targetSize.y, DXGI_FORMAT_UNKNOWN, flags)); + _targetSize = p.s->targetSize; + } + + _updateMatrixTransform(p); + } + + wil::com_ptr GetBuffer() const; + void Present(const RenderingPayload& p); + void WaitUntilCanRender() noexcept; + + private: + void _createSwapChain(const RenderingPayload& p, IUnknown* device); + void _updateMatrixTransform(const RenderingPayload& p) const; + + static constexpr DXGI_SWAP_CHAIN_FLAG flags = debugDisableFrameLatencyWaitableObject ? DXGI_SWAP_CHAIN_FLAG{} : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + + wil::com_ptr _swapChain; + wil::unique_handle _swapChainHandle; + wil::unique_handle _frameLatencyWaitableObject; + til::generation_t _targetGeneration; + til::generation_t _fontGeneration; + u16x2 _targetSize; + bool _waitForPresentation = false; + }; + + template + constexpr T colorFromU32(u32 rgba) + { + const auto r = static_cast((rgba >> 0) & 0xff) / 255.0f; + const auto g = static_cast((rgba >> 8) & 0xff) / 255.0f; + const auto b = static_cast((rgba >> 16) & 0xff) / 255.0f; + const auto a = static_cast((rgba >> 24) & 0xff) / 255.0f; + return { r, g, b, a }; + } + + template + constexpr T colorFromU32Premultiply(u32 rgba) + { + const auto r = static_cast((rgba >> 0) & 0xff) / 255.0f; + const auto g = static_cast((rgba >> 8) & 0xff) / 255.0f; + const auto b = static_cast((rgba >> 16) & 0xff) / 255.0f; + const auto a = static_cast((rgba >> 24) & 0xff) / 255.0f; + return { r * a, g * a, b * a, a }; + } + + // MSVC STL (version 22000) implements std::clamp(T, T, T) in terms of the generic + // std::clamp(T, T, T, Predicate) with std::less{} as the argument, + // which introduces branching. While not perfect, this is still better than std::clamp. + template + static constexpr T clamp(T val, T min, T max) + { + return std::max(min, std::min(max, val)); + } + + f32r GetGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); + bool DrawGlyphRun(ID2D1DeviceContext* d2dRenderTarget, ID2D1DeviceContext4* d2dRenderTarget4, IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush); +} diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp new file mode 100644 index 00000000000..6e9588f38fb --- /dev/null +++ b/src/renderer/atlas/BackendD2D.cpp @@ -0,0 +1,381 @@ +#include "pch.h" +#include "BackendD2D.h" + +TIL_FAST_MATH_BEGIN + +// Disable a bunch of warnings which get in the way of writing performant code. +#pragma warning(disable : 26429) // Symbol 'data' is never tested for nullness, it can be marked as not_null (f.23). +#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). +#pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). + +using namespace Microsoft::Console::Render::Atlas; + +BackendD2D::BackendD2D(wil::com_ptr device, wil::com_ptr deviceContext) noexcept : + _device{ std::move(device) }, + _deviceContext{ std::move(deviceContext) } +{ +} + +void BackendD2D::Render(RenderingPayload& p) +{ + if (_generation != p.s.generation()) + { + _handleSettingsUpdate(p); + } + + _renderTarget->BeginDraw(); + _drawBackground(p); + _drawText(p); + _drawGridlines(p); + _drawCursor(p); + _drawSelection(p); + THROW_IF_FAILED(_renderTarget->EndDraw()); + + _swapChainManager.Present(p); +} + +bool BackendD2D::RequiresContinuousRedraw() noexcept +{ + return false; +} + +void BackendD2D::WaitUntilCanRender() noexcept +{ + _swapChainManager.WaitUntilCanRender(); +} + +void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) +{ + _swapChainManager.UpdateSwapChainSettings( + p, + _device.get(), + [this]() { + _renderTarget.reset(); + _renderTarget4.reset(); + _deviceContext->ClearState(); + _deviceContext->Flush(); + }, + [this]() { + _renderTarget.reset(); + _renderTarget4.reset(); + _deviceContext->ClearState(); + }); + + if (!_renderTarget) + { + { + const auto surface = _swapChainManager.GetBuffer().query(); + + const D2D1_RENDER_TARGET_PROPERTIES props{ + .type = D2D1_RENDER_TARGET_TYPE_DEFAULT, + .pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }, + .dpiX = static_cast(p.s->font->dpi), + .dpiY = static_cast(p.s->font->dpi), + }; + wil::com_ptr renderTarget; + THROW_IF_FAILED(p.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); + _renderTarget = renderTarget.query(); + _renderTarget4 = renderTarget.try_query(); + _renderTarget->SetAntialiasMode(D2D1_ANTIALIAS_MODE_ALIASED); + } + { + static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; + THROW_IF_FAILED(_renderTarget->CreateSolidColorBrush(&color, nullptr, _brush.put())); + _brushColor = 0xffffffff; + } + } + + if (!_dottedStrokeStyle) + { + static constexpr D2D1_STROKE_STYLE_PROPERTIES props{ .dashStyle = D2D1_DASH_STYLE_CUSTOM }; + static constexpr FLOAT dashes[2]{ 1, 2 }; + THROW_IF_FAILED(p.d2dFactory->CreateStrokeStyle(&props, &dashes[0], 2, _dottedStrokeStyle.addressof())); + } + + const auto fontChanged = _fontGeneration != p.s->font.generation(); + const auto cellCountChanged = _cellCount != p.s->cellCount; + + if (fontChanged) + { + const auto dpi = static_cast(p.s->font->dpi); + _renderTarget->SetDpi(dpi, dpi); + _renderTarget->SetTextAntialiasMode(static_cast(p.s->font->antialiasingMode)); + } + + if (fontChanged || cellCountChanged) + { + const D2D1_BITMAP_PROPERTIES props{ + .pixelFormat = { DXGI_FORMAT_R8G8B8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }, + .dpiX = static_cast(p.s->font->dpi), + .dpiY = static_cast(p.s->font->dpi), + }; + const D2D1_SIZE_U size{ p.s->cellCount.x, p.s->cellCount.y }; + const D2D1_MATRIX_3X2_F transform{ + ._11 = static_cast(p.s->font->cellSize.x), + ._22 = static_cast(p.s->font->cellSize.y), + }; + THROW_IF_FAILED(_renderTarget->CreateBitmap(size, nullptr, 0, &props, _backgroundBitmap.put())); + THROW_IF_FAILED(_renderTarget->CreateBitmapBrush(_backgroundBitmap.get(), _backgroundBrush.put())); + _backgroundBrush->SetInterpolationMode(D2D1_BITMAP_INTERPOLATION_MODE_NEAREST_NEIGHBOR); + _backgroundBrush->SetExtendModeX(D2D1_EXTEND_MODE_MIRROR); + _backgroundBrush->SetExtendModeY(D2D1_EXTEND_MODE_MIRROR); + _backgroundBrush->SetTransform(&transform); + } + + _generation = p.s.generation(); + _fontGeneration = p.s->font.generation(); + _cellCount = p.s->cellCount; +} + +void BackendD2D::_drawBackground(const RenderingPayload& p) noexcept +{ + // If the terminal was 120x30 cells and 1200x600 pixels large, this would draw the + // background by upscaling a 120x30 pixel bitmap to fill the entire render target. + const D2D1_RECT_F rect{ 0, 0, p.s->targetSize.x * p.d.font.dipPerPixel, p.s->targetSize.y * p.d.font.dipPerPixel }; + _renderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_COPY); + _backgroundBitmap->CopyFromMemory(nullptr, p.backgroundBitmap.data(), p.s->cellCount.x * 4); + _renderTarget->FillRectangle(&rect, _backgroundBrush.get()); + _renderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_SOURCE_OVER); +} + +void BackendD2D::_drawText(RenderingPayload& p) +{ + // It is possible to create a "_foregroundBrush" similar to how the `_backgroundBrush` is created and + // use that as the brush for text rendering below. That way we wouldn't have to search `row.colors` for color + // changes and could draw entire lines of text in a single call. Unfortunately Direct2D is not particularly + // smart if you do this and chooses to draw the given text into a way too small offscreen texture first and + // then blends it on the screen with the given bitmap brush. While this roughly doubles the performance + // when drawing lots of colors, the extra latency drops performance by >10x when drawing fewer colors. + // Since fewer colors are more common, I've chosen to go with regular solid-color brushes. + u16 y = 0; + for (auto& row : p.rows) + { + f32 baselineX = 0.0f; + + for (const auto& m : row.mappings) + { + const auto colorsBegin = row.colors.begin(); + auto it = colorsBegin + m.glyphsFrom; + const auto end = colorsBegin + m.glyphsTo; + + do + { + const auto beg = it; + const auto off = it - colorsBegin; + const auto fg = *it; + + while (++it != end && *it == fg) + { + } + + const auto count = it - beg; + const auto brush = _brushWithColor(fg); + const auto baselineY = p.d.font.cellSizeDIP.y * y + p.s->font->baselineInDIP; + const DWRITE_GLYPH_RUN glyphRun{ + .fontFace = m.fontFace.get(), + .fontEmSize = m.fontEmSize, + .glyphCount = gsl::narrow_cast(count), + .glyphIndices = &row.glyphIndices[off], + .glyphAdvances = &row.glyphAdvances[off], + .glyphOffsets = &row.glyphOffsets[off], + }; + + DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), { baselineX, baselineY }, &glyphRun, brush); + + const auto blackBox = GetGlyphRunBlackBox(glyphRun, baselineX, baselineY); + // Add a 1px padding to avoid inaccuracies with the blackbox measurement. + // It's only an estimate based on the design size after all. + row.top = std::min(row.top, static_cast(lround(blackBox.top - 1.5f))); + row.bottom = std::max(row.bottom, static_cast(lround(blackBox.bottom + 1.5f))); + + for (UINT32 i = 0; i < glyphRun.glyphCount; ++i) + { + baselineX += glyphRun.glyphAdvances[i]; + } + } while (it != end); + } + + y++; + } +} + +void BackendD2D::_drawGridlines(const RenderingPayload& p) +{ + u16 y = 0; + for (const auto& row : p.rows) + { + const auto top = p.d.font.cellSizeDIP.y * y; + const auto bottom = p.d.font.cellSizeDIP.y * (y + 1); + + for (const auto& r : row.gridLineRanges) + { + // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. + assert(r.lines.any()); + + D2D1_RECT_F rect{ r.from * p.d.font.cellSizeDIP.x, top, r.to * p.d.font.cellSizeDIP.x, bottom }; + + if (r.lines.test(GridLines::Left)) + { + for (auto i = r.from; i < r.to; ++i) + { + rect.left = i * p.d.font.cellSizeDIP.x; + rect.right = rect.left + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, r.color); + } + } + if (r.lines.test(GridLines::Top)) + { + rect.bottom = rect.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, r.color); + } + if (r.lines.test(GridLines::Right)) + { + for (auto i = r.to; i > r.from; --i) + { + rect.right = i * p.d.font.cellSizeDIP.x; + rect.left = rect.right - p.s->font->thinLineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, r.color); + } + } + if (r.lines.test(GridLines::Bottom)) + { + rect.top = rect.bottom - p.s->font->thinLineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, r.color); + } + if (r.lines.test(GridLines::Underline)) + { + rect.top += p.s->font->underlinePos * p.d.font.dipPerPixel; + rect.bottom = rect.top + p.s->font->underlineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, r.color); + } + if (r.lines.test(GridLines::HyperlinkUnderline)) + { + const auto w = p.s->font->underlineWidth * p.d.font.dipPerPixel; + const auto centerY = rect.top + p.s->font->underlinePos * p.d.font.dipPerPixel + w * 0.5f; + const auto brush = _brushWithColor(r.color); + const D2D1_POINT_2F point0{ rect.left, centerY }; + const D2D1_POINT_2F point1{ rect.right, centerY }; + _renderTarget->DrawLine(point0, point1, brush, w, _dottedStrokeStyle.get()); + } + if (r.lines.test(GridLines::DoubleUnderline)) + { + rect.top = top + p.s->font->doubleUnderlinePos.x * p.d.font.dipPerPixel; + rect.bottom = rect.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, r.color); + + rect.top = top + p.s->font->doubleUnderlinePos.y * p.d.font.dipPerPixel; + rect.bottom = rect.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, r.color); + } + if (r.lines.test(GridLines::Strikethrough)) + { + rect.top = top + p.s->font->strikethroughPos * p.d.font.dipPerPixel; + rect.bottom = rect.top + p.s->font->strikethroughWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, r.color); + } + } + + y++; + } +} + +void BackendD2D::_drawCursor(const RenderingPayload& p) +{ + if (!p.cursorRect) + { + return; + } + + D2D1_RECT_F rect{ + p.d.font.cellSizeDIP.x * p.cursorRect.left, + p.d.font.cellSizeDIP.y * p.cursorRect.top, + p.d.font.cellSizeDIP.x * p.cursorRect.right, + p.d.font.cellSizeDIP.y * p.cursorRect.bottom, + }; + + switch (static_cast(p.s->cursor->cursorType)) + { + case CursorType::Legacy: + rect.top = rect.bottom - (rect.bottom - rect.top) * static_cast(p.s->cursor->heightPercentage) / 100.0f; + _fillRectangle(rect, p.s->cursor->cursorColor); + break; + case CursorType::VerticalBar: + rect.right = rect.left + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, p.s->cursor->cursorColor); + break; + case CursorType::Underscore: + rect.top += p.s->font->underlinePos * p.d.font.dipPerPixel; + rect.bottom = rect.top + p.s->font->underlineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, p.s->cursor->cursorColor); + break; + case CursorType::EmptyBox: + { + const auto brush = _brushWithColor(p.s->cursor->cursorColor); + const auto w = p.s->font->thinLineWidth * p.d.font.dipPerPixel; + const auto wh = w / 2.0f; + rect.left += wh; + rect.top += wh; + rect.right += wh; + rect.bottom += wh; + _renderTarget->DrawRectangle(&rect, brush, w, nullptr); + break; + } + case CursorType::FullBox: + _fillRectangle(rect, p.s->cursor->cursorColor); + break; + case CursorType::DoubleUnderscore: + { + auto rect2 = rect; + rect2.top = rect.top + p.s->font->doubleUnderlinePos.x * p.d.font.dipPerPixel; + rect2.bottom = rect2.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect2, p.s->cursor->cursorColor); + rect.top = rect.top + p.s->font->doubleUnderlinePos.y * p.d.font.dipPerPixel; + rect.bottom = rect.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + _fillRectangle(rect, p.s->cursor->cursorColor); + break; + } + default: + break; + } +} + +void BackendD2D::_drawSelection(const RenderingPayload& p) +{ + u16 y = 0; + for (const auto& row : p.rows) + { + if (row.selectionTo > row.selectionFrom) + { + const D2D1_RECT_F rect{ + p.d.font.cellSizeDIP.x * row.selectionFrom, + p.d.font.cellSizeDIP.y * y, + p.d.font.cellSizeDIP.x * row.selectionTo, + p.d.font.cellSizeDIP.y * (y + 1), + }; + _fillRectangle(rect, p.s->misc->selectionColor); + } + + y++; + } +} + +ID2D1Brush* BackendD2D::_brushWithColor(u32 color) +{ + if (_brushColor != color) + { + const auto d2dColor = colorFromU32(color); + THROW_IF_FAILED(_renderTarget->CreateSolidColorBrush(&d2dColor, nullptr, _brush.put())); + _brushColor = color; + } + return _brush.get(); +} + +void BackendD2D::_fillRectangle(const D2D1_RECT_F& rect, u32 color) +{ + const auto brush = _brushWithColor(color); + _renderTarget->FillRectangle(&rect, brush); +} + +TIL_FAST_MATH_END diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h new file mode 100644 index 00000000000..ed26d322e10 --- /dev/null +++ b/src/renderer/atlas/BackendD2D.h @@ -0,0 +1,43 @@ +#pragma once + +#include "Backend.h" + +namespace Microsoft::Console::Render::Atlas +{ + struct BackendD2D : IBackend + { + BackendD2D(wil::com_ptr device, wil::com_ptr deviceContext) noexcept; + + void Render(RenderingPayload& payload) override; + bool RequiresContinuousRedraw() noexcept override; + void WaitUntilCanRender() noexcept override; + + private: + __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); + void _drawBackground(const RenderingPayload& p) noexcept; + void _drawText(RenderingPayload& p); + void _drawGridlines(const RenderingPayload& p); + void _drawCursor(const RenderingPayload& p); + void _drawSelection(const RenderingPayload& p); + ID2D1Brush* _brushWithColor(u32 color); + void _fillRectangle(const D2D1_RECT_F& rect, u32 color); + + SwapChainManager _swapChainManager; + + wil::com_ptr _device; + wil::com_ptr _deviceContext; + + wil::com_ptr _renderTarget; + wil::com_ptr _renderTarget4; // Optional. Supported since Windows 10 14393. + wil::com_ptr _brush; + wil::com_ptr _dottedStrokeStyle; + wil::com_ptr _backgroundBitmap; + wil::com_ptr _backgroundBrush; + + u32 _brushColor = 0; + + til::generation_t _generation; + til::generation_t _fontGeneration; + u16x2 _cellCount; + }; +} diff --git a/src/renderer/atlas/BackendD3D11.cpp b/src/renderer/atlas/BackendD3D11.cpp new file mode 100644 index 00000000000..b06969dea5e --- /dev/null +++ b/src/renderer/atlas/BackendD3D11.cpp @@ -0,0 +1,1283 @@ +#include "pch.h" +#include "BackendD3D11.h" + +#include + +#include +#include +#include +#include + +#include "dwrite.h" + +TIL_FAST_MATH_BEGIN + +// Disable a bunch of warnings which get in the way of writing performant code. +#pragma warning(disable : 26429) // Symbol 'data' is never tested for nullness, it can be marked as not_null (f.23). +#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). +#pragma warning(disable : 26482) // Only index into arrays using constant expressions (bounds.2). + +using namespace Microsoft::Console::Render::Atlas; + +BackendD3D11::GlyphCacheMap::~GlyphCacheMap() +{ + Clear(); +} + +BackendD3D11::GlyphCacheMap& BackendD3D11::GlyphCacheMap::operator=(GlyphCacheMap&& other) noexcept +{ + _map = std::exchange(other._map, {}); + _mapMask = std::exchange(other._mapMask, 0); + _capacity = std::exchange(other._capacity, 0); + _size = std::exchange(other._size, 0); + return *this; +} + +void BackendD3D11::GlyphCacheMap::Clear() noexcept +{ + if (_size) + { + for (auto& entry : _map) + { + if (entry.fontFace) + { + // I'm pretty sure Release() doesn't throw exceptions. +#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'Release()' which may throw exceptions (f.6). + entry.fontFace->Release(); + entry.fontFace = nullptr; + } + } + } +} + +BackendD3D11::GlyphCacheEntry& BackendD3D11::GlyphCacheMap::FindOrInsert(IDWriteFontFace* fontFace, u16 glyphIndex, bool& inserted) +{ + const auto hash = _hash(fontFace, glyphIndex); + + for (auto i = hash;; ++i) + { + auto& entry = _map[i & _mapMask]; + if (entry.fontFace == fontFace && entry.glyphIndex == glyphIndex) + { + inserted = false; + return entry; + } + if (!entry.fontFace) + { + inserted = true; + return _insert(fontFace, glyphIndex, hash); + } + } +} + +size_t BackendD3D11::GlyphCacheMap::_hash(IDWriteFontFace* fontFace, u16 glyphIndex) noexcept +{ + // MSVC 19.33 produces surprisingly good assembly for this without stack allocation. + const uintptr_t data[2]{ std::bit_cast(fontFace), glyphIndex }; + return til::hash(&data[0], sizeof(data)); +} + +BackendD3D11::GlyphCacheEntry& BackendD3D11::GlyphCacheMap::_insert(IDWriteFontFace* fontFace, u16 glyphIndex, size_t hash) +{ + if (_size >= _capacity) + { + _bumpSize(); + } + + ++_size; + + for (auto i = hash;; ++i) + { + auto& entry = _map[i & _mapMask]; + if (!entry.fontFace) + { + entry.fontFace = fontFace; + entry.glyphIndex = glyphIndex; + entry.fontFace->AddRef(); + return entry; + } + } +} + +void BackendD3D11::GlyphCacheMap::_bumpSize() +{ + const auto newMapSize = _map.size() * 2; + const auto newMapMask = newMapSize - 1; + FAIL_FAST_IF(newMapSize >= INT32_MAX); // overflow/truncation protection + + auto newMap = Buffer(newMapSize); + + for (const auto& entry : _map) + { + const auto newHash = _hash(entry.fontFace, entry.glyphIndex); + newMap[newHash & newMapMask] = entry; + } + + _map = std::move(newMap); + _mapMask = newMapMask; + _capacity = newMapSize / 2; +} + +BackendD3D11::BackendD3D11(wil::com_ptr device, wil::com_ptr deviceContext) : + _device{ std::move(device) }, + _deviceContext{ std::move(deviceContext) } +{ + THROW_IF_FAILED(_device->CreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _vertexShader.addressof())); + THROW_IF_FAILED(_device->CreatePixelShader(&shader_ps[0], sizeof(shader_ps), nullptr, _pixelShader.addressof())); + + { + static constexpr D3D11_BUFFER_DESC desc{ + .ByteWidth = sizeof(VSConstBuffer), + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_CONSTANT_BUFFER, + }; + THROW_IF_FAILED(_device->CreateBuffer(&desc, nullptr, _vsConstantBuffer.addressof())); + } + + { + static constexpr D3D11_BUFFER_DESC desc{ + .ByteWidth = sizeof(PSConstBuffer), + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_CONSTANT_BUFFER, + }; + THROW_IF_FAILED(_device->CreateBuffer(&desc, nullptr, _psConstantBuffer.addressof())); + } + + { + // The final step of the ClearType blending algorithm is a lerp() between the premultiplied alpha + // background color and straight alpha foreground color given the 3 RGB weights in alphaCorrected: + // lerp(background, foreground, weights) + // Which is equivalent to: + // background * (1 - weights) + foreground * weights + // + // This COULD be implemented using dual source color blending like so: + // .SrcBlend = D3D11_BLEND_SRC1_COLOR + // .DestBlend = D3D11_BLEND_INV_SRC1_COLOR + // .BlendOp = D3D11_BLEND_OP_ADD + // Because: + // background * (1 - weights) + foreground * weights + // ^ ^ ^ ^ ^ + // Dest INV_SRC1_COLOR | Src SRC1_COLOR + // OP_ADD + // + // BUT we need simultaneous support for regular "source over" alpha blending + // (SHADING_TYPE_PASSTHROUGH) like this: + // background * (1 - alpha) + foreground + // + // This is why we set: + // .SrcBlend = D3D11_BLEND_ONE + // + // --> We need to multiply the foreground with the weights ourselves. + static constexpr D3D11_BLEND_DESC desc{ + .RenderTarget = { { + .BlendEnable = TRUE, + .SrcBlend = D3D11_BLEND_ONE, + .DestBlend = D3D11_BLEND_INV_SRC1_COLOR, + .BlendOp = D3D11_BLEND_OP_ADD, + .SrcBlendAlpha = D3D11_BLEND_ONE, + .DestBlendAlpha = D3D11_BLEND_INV_SRC1_ALPHA, + .BlendOpAlpha = D3D11_BLEND_OP_ADD, + .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, + } }, + }; + THROW_IF_FAILED(_device->CreateBlendState(&desc, _blendState.addressof())); + } + + { + static constexpr D3D11_BLEND_DESC desc{ + .RenderTarget = { { + .BlendEnable = TRUE, + .SrcBlend = D3D11_BLEND_ONE, + .DestBlend = D3D11_BLEND_ONE, + .BlendOp = D3D11_BLEND_OP_SUBTRACT, + // In order for D3D to be okay with us using dual source blending in the shader, we need to use dual + // source blending in the blend state. Alternatively we could write an extra shader for these cursors. + .SrcBlendAlpha = D3D11_BLEND_SRC1_ALPHA, + .DestBlendAlpha = D3D11_BLEND_ZERO, + .BlendOpAlpha = D3D11_BLEND_OP_ADD, + .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, + } }, + }; + THROW_IF_FAILED(_device->CreateBlendState(&desc, _blendStateInvert.addressof())); + } + +#ifndef NDEBUG + _sourceDirectory = std::filesystem::path{ __FILE__ }.parent_path(); + _sourceCodeWatcher = wil::make_folder_change_reader_nothrow(_sourceDirectory.c_str(), false, wil::FolderChangeEvents::FileName | wil::FolderChangeEvents::LastWriteTime, [this](wil::FolderChangeEvent, PCWSTR path) { + if (til::ends_with(path, L".hlsl")) + { + auto expected = INT64_MAX; + const auto invalidationTime = std::chrono::steady_clock::now() + std::chrono::milliseconds(100); + _sourceCodeInvalidationTime.compare_exchange_strong(expected, invalidationTime.time_since_epoch().count(), std::memory_order_relaxed); + } + }); +#endif +} + +void BackendD3D11::Render(RenderingPayload& p) +{ + _debugUpdateShaders(); + + if (_generation != p.s.generation()) + { + _handleSettingsUpdate(p); + } + + // After a Present() the render target becomes unbound. + _deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); + + _drawBackground(p); + _drawCursorPart1(p); + _drawText(p); + _drawGridlines(p); + _drawCursorPart2(p); + _drawSelection(p); + _flushQuads(p); + + if (_customPixelShader) + { + _executeCustomShader(p); + } + + _swapChainManager.Present(p); +} + +bool BackendD3D11::RequiresContinuousRedraw() noexcept +{ + return _requiresContinuousRedraw; +} + +void BackendD3D11::WaitUntilCanRender() noexcept +{ + _swapChainManager.WaitUntilCanRender(); +} + +void BackendD3D11::_debugUpdateShaders() noexcept +try +{ +#ifndef NDEBUG + const auto invalidationTime = _sourceCodeInvalidationTime.load(std::memory_order_relaxed); + + if (invalidationTime == INT64_MAX || invalidationTime > std::chrono::steady_clock::now().time_since_epoch().count()) + { + return; + } + + _sourceCodeInvalidationTime.store(INT64_MAX, std::memory_order_relaxed); + + static const auto compile = [](const std::filesystem::path& path, const char* target) { + wil::com_ptr error; + wil::com_ptr blob; + const auto hr = D3DCompileFromFile( + /* pFileName */ path.c_str(), + /* pDefines */ nullptr, + /* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE, + /* pEntrypoint */ "main", + /* pTarget */ target, + /* Flags1 */ D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS, + /* Flags2 */ 0, + /* ppCode */ blob.addressof(), + /* ppErrorMsgs */ error.addressof()); + + if (error) + { + std::thread t{ [error = std::move(error)]() noexcept { + MessageBoxA(nullptr, static_cast(error->GetBufferPointer()), "Compilation error", MB_ICONERROR | MB_OK); + } }; + t.detach(); + } + + THROW_IF_FAILED(hr); + return blob; + }; + + struct FileVS + { + std::wstring_view filename; + wil::com_ptr BackendD3D11::*target; + }; + struct FilePS + { + std::wstring_view filename; + wil::com_ptr BackendD3D11::*target; + }; + + static constexpr std::array filesVS{ + FileVS{ L"shader_vs.hlsl", &BackendD3D11::_vertexShader }, + }; + static constexpr std::array filesPS{ + FilePS{ L"shader_ps.hlsl", &BackendD3D11::_pixelShader }, + }; + + std::array, filesVS.size()> compiledVS; + std::array, filesPS.size()> compiledPS; + + // Compile our files before moving them into `this` below to ensure we're + // always in a consistent state where all shaders are seemingly valid. + for (size_t i = 0; i < filesVS.size(); ++i) + { + const auto blob = compile(_sourceDirectory / filesVS[i].filename, "vs_4_0"); + THROW_IF_FAILED(_device->CreateVertexShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compiledVS[i].addressof())); + } + for (size_t i = 0; i < filesPS.size(); ++i) + { + const auto blob = compile(_sourceDirectory / filesPS[i].filename, "ps_4_0"); + THROW_IF_FAILED(_device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compiledPS[i].addressof())); + } + + for (size_t i = 0; i < filesVS.size(); ++i) + { + this->*filesVS[i].target = std::move(compiledVS[i]); + } + for (size_t i = 0; i < filesPS.size(); ++i) + { + this->*filesPS[i].target = std::move(compiledPS[i]); + } +#endif +} +CATCH_LOG() + +void BackendD3D11::_handleSettingsUpdate(const RenderingPayload& p) +{ + _swapChainManager.UpdateSwapChainSettings( + p, + _device.get(), + [this]() { + _renderTargetView.reset(); + _customRenderTargetView.reset(); + _deviceContext->ClearState(); + _deviceContext->Flush(); + }, + [this]() { + _renderTargetView.reset(); + _customRenderTargetView.reset(); + _deviceContext->ClearState(); + }); + + if (!_renderTargetView) + { + const auto buffer = _swapChainManager.GetBuffer(); + THROW_IF_FAILED(_device->CreateRenderTargetView(buffer.get(), nullptr, _renderTargetView.put())); + } + + const auto fontChanged = _fontGeneration != p.s->font.generation(); + const auto miscChanged = _miscGeneration != p.s->misc.generation(); + const auto cellCountChanged = _cellCount != p.s->cellCount; + + if (fontChanged) + { + DWrite_GetRenderParams(p.dwriteFactory.get(), &_gamma, &_cleartypeEnhancedContrast, &_grayscaleEnhancedContrast, _textRenderingParams.put()); + _resetGlyphAtlasNeeded = true; + + if (_d2dRenderTarget) + { + _d2dRenderTargetUpdateFontSettings(*p.s->font); + } + } + + if (cellCountChanged) + { + _recreateBackgroundColorBitmap(p.s->cellCount); + } + + if (miscChanged) + { + _recreateCustomShader(p); + } + + if (_customPixelShader && !_customRenderTargetView) + { + _recreateCustomRenderTargetView(p.s->targetSize); + } + + _recreateConstBuffer(p); + _setupDeviceContextState(p); + + _generation = p.s.generation(); + _fontGeneration = p.s->font.generation(); + _miscGeneration = p.s->misc.generation(); + _targetSize = p.s->targetSize; + _cellCount = p.s->cellCount; +} + +void BackendD3D11::_recreateCustomShader(const RenderingPayload& p) +{ + _customRenderTargetView.reset(); + _customOffscreenTexture.reset(); + _customOffscreenTextureView.reset(); + _customVertexShader.reset(); + _customPixelShader.reset(); + _customShaderConstantBuffer.reset(); + _customShaderSamplerState.reset(); + _requiresContinuousRedraw = false; + + if (!p.s->misc->customPixelShaderPath.empty()) + { + const char* target = nullptr; + switch (_device->GetFeatureLevel()) + { + case D3D_FEATURE_LEVEL_10_0: + target = "ps_4_0"; + break; + case D3D_FEATURE_LEVEL_10_1: + target = "ps_4_1"; + break; + default: + target = "ps_5_0"; + break; + } + + static constexpr auto flags = + D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR +#ifdef NDEBUG + | D3DCOMPILE_OPTIMIZATION_LEVEL3; +#else + // Only enable strictness and warnings in DEBUG mode + // as these settings makes it very difficult to develop + // shaders as windows terminal is not telling the user + // what's wrong, windows terminal just fails. + // Keep it in DEBUG mode to catch errors in shaders + // shipped with windows terminal + | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS | D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; +#endif + + wil::com_ptr error; + wil::com_ptr blob; + const auto hr = D3DCompileFromFile( + /* pFileName */ p.s->misc->customPixelShaderPath.c_str(), + /* pDefines */ nullptr, + /* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE, + /* pEntrypoint */ "main", + /* pTarget */ target, + /* Flags1 */ flags, + /* Flags2 */ 0, + /* ppCode */ blob.addressof(), + /* ppErrorMsgs */ error.addressof()); + + // Unless we can determine otherwise, assume this shader requires evaluation every frame + _requiresContinuousRedraw = true; + + if (SUCCEEDED(hr)) + { + THROW_IF_FAILED(_device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, _customPixelShader.put())); + + // Try to determine whether the shader uses the Time variable + wil::com_ptr reflector; + if (SUCCEEDED_LOG(D3DReflect(blob->GetBufferPointer(), blob->GetBufferSize(), IID_PPV_ARGS(reflector.put())))) + { + if (ID3D11ShaderReflectionConstantBuffer* constantBufferReflector = reflector->GetConstantBufferByIndex(0)) // shader buffer + { + if (ID3D11ShaderReflectionVariable* variableReflector = constantBufferReflector->GetVariableByIndex(0)) // time + { + D3D11_SHADER_VARIABLE_DESC variableDescriptor; + if (SUCCEEDED_LOG(variableReflector->GetDesc(&variableDescriptor))) + { + // only if time is used + _requiresContinuousRedraw = WI_IsFlagSet(variableDescriptor.uFlags, D3D_SVF_USED); + } + } + } + } + } + else + { + if (error) + { + LOG_HR_MSG(hr, "%*hs", error->GetBufferSize(), error->GetBufferPointer()); + } + else + { + LOG_HR(hr); + } + if (p.warningCallback) + { + p.warningCallback(D2DERR_SHADER_COMPILE_FAILED); + } + } + } + else if (p.s->misc->useRetroTerminalEffect) + { + THROW_IF_FAILED(_device->CreatePixelShader(&custom_shader_ps[0], sizeof(custom_shader_ps), nullptr, _customPixelShader.put())); + // We know the built-in retro shader doesn't require continuous redraw. + _requiresContinuousRedraw = false; + } + + if (_customPixelShader) + { + THROW_IF_FAILED(_device->CreateVertexShader(&custom_shader_vs[0], sizeof(custom_shader_vs), nullptr, _customVertexShader.put())); + + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(CustomConstBuffer); + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + THROW_IF_FAILED(_device->CreateBuffer(&desc, nullptr, _customShaderConstantBuffer.put())); + } + + { + D3D11_SAMPLER_DESC desc{}; + desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + desc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; + desc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; + desc.AddressW = D3D11_TEXTURE_ADDRESS_BORDER; + desc.MaxAnisotropy = 1; + desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; + desc.MaxLOD = D3D11_FLOAT32_MAX; + THROW_IF_FAILED(_device->CreateSamplerState(&desc, _customShaderSamplerState.put())); + } + + _customShaderStartTime = std::chrono::steady_clock::now(); + } +} + +void BackendD3D11::_recreateCustomRenderTargetView(u16x2 targetSize) +{ + // Avoid memory usage spikes by releasing memory first. + _customOffscreenTexture.reset(); + _customOffscreenTextureView.reset(); + + // This causes our regular rendered contents to end up in the offscreen texture. We'll then use the + // `_customRenderTargetView` to render into the swap chain using the custom (user provided) shader. + _customRenderTargetView = std::move(_renderTargetView); + + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = targetSize.x; + desc.Height = targetSize.y; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.SampleDesc = { 1, 0 }; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _customOffscreenTexture.addressof())); + THROW_IF_FAILED(_device->CreateShaderResourceView(_customOffscreenTexture.get(), nullptr, _customOffscreenTextureView.addressof())); + THROW_IF_FAILED(_device->CreateRenderTargetView(_customOffscreenTexture.get(), nullptr, _renderTargetView.addressof())); +} + +void BackendD3D11::_recreateBackgroundColorBitmap(u16x2 cellCount) +{ + // Avoid memory usage spikes by releasing memory first. + _backgroundBitmap.reset(); + _backgroundBitmapView.reset(); + + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = cellCount.x; + desc.Height = cellCount.y; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + desc.SampleDesc = { 1, 0 }; + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _backgroundBitmap.addressof())); + THROW_IF_FAILED(_device->CreateShaderResourceView(_backgroundBitmap.get(), nullptr, _backgroundBitmapView.addressof())); +} + +void BackendD3D11::_d2dRenderTargetUpdateFontSettings(const FontSettings& font) noexcept +{ + _d2dRenderTarget->SetDpi(font.dpi, font.dpi); + _d2dRenderTarget->SetTextAntialiasMode(static_cast(font.antialiasingMode)); +} + +void BackendD3D11::_recreateConstBuffer(const RenderingPayload& p) +{ + { + VSConstBuffer data; + data.positionScale = { 2.0f / p.s->targetSize.x, -2.0f / p.s->targetSize.y }; + _deviceContext->UpdateSubresource(_vsConstantBuffer.get(), 0, nullptr, &data, 0, 0); + } + { + PSConstBuffer data; + data.backgroundColor = colorFromU32Premultiply(p.s->misc->backgroundColor); + data.cellCount = { static_cast(p.s->cellCount.x), static_cast(p.s->cellCount.y) }; + data.cellSize = { static_cast(p.s->font->cellSize.x), static_cast(p.s->font->cellSize.y) }; + DWrite_GetGammaRatios(_gamma, data.gammaRatios); + data.enhancedContrast = p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? _cleartypeEnhancedContrast : _grayscaleEnhancedContrast; + data.dashedLineLength = p.s->font->underlineWidth * 3.0f; + _deviceContext->UpdateSubresource(_psConstantBuffer.get(), 0, nullptr, &data, 0, 0); + } +} + +void BackendD3D11::_setupDeviceContextState(const RenderingPayload& p) +{ + // IA: Input Assembler + _deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + _deviceContext->IASetIndexBuffer(_indexBuffer.get(), _indicesFormat, 0); + + // VS: Vertex Shader + _deviceContext->VSSetShader(_vertexShader.get(), nullptr, 0); + _deviceContext->VSSetConstantBuffers(0, 1, _vsConstantBuffer.addressof()); + _deviceContext->VSSetShaderResources(0, 1, _instanceBufferView.addressof()); + + // RS: Rasterizer Stage + D3D11_VIEWPORT viewport{}; + viewport.Width = static_cast(p.s->targetSize.x); + viewport.Height = static_cast(p.s->targetSize.y); + _deviceContext->RSSetViewports(1, &viewport); + + // PS: Pixel Shader + ID3D11ShaderResourceView* const resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; + _deviceContext->PSSetShader(_pixelShader.get(), nullptr, 0); + _deviceContext->PSSetConstantBuffers(0, 1, _psConstantBuffer.addressof()); + _deviceContext->PSSetShaderResources(0, 2, &resources[0]); + + // OM: Output Merger + _deviceContext->OMSetBlendState(_blendState.get(), nullptr, 0xffffffff); + _deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); +} + +void BackendD3D11::_d2dBeginDrawing() noexcept +{ + if (!_d2dBeganDrawing) + { + _d2dRenderTarget->BeginDraw(); + _d2dBeganDrawing = true; + } +} + +void BackendD3D11::_d2dEndDrawing() +{ + if (_d2dBeganDrawing) + { + THROW_IF_FAILED(_d2dRenderTarget->EndDraw()); + _d2dBeganDrawing = false; + } +} + +void BackendD3D11::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) +{ + // This block of code calculates the size of a power-of-2 texture that has an area larger than the targetSize + // of the swap chain. In other words for a 985x1946 pixel swap chain (area = 1916810) it would result in a u/v + // of 2048x1024 (area = 2097152). This has 2 benefits: GPUs like power-of-2 textures and it ensures that we don't + // resize the texture every time you resize the window by a pixel. Instead it only grows/shrinks by a factor of 2. + auto area = static_cast(p.s->targetSize.x) * static_cast(p.s->targetSize.y); + // The index returned by _BitScanReverse is undefined when the input is 0. We can simultaneously + // guard against this and avoid unreasonably small textures, by clamping the min. texture size. + area = std::max(uint32_t{ 256 * 256 }, area); + unsigned long index; + _BitScanReverse(&index, area - 1); + const auto u = ::base::saturated_cast(1u << ((index + 2) / 2)); + const auto v = ::base::saturated_cast(1u << ((index + 1) / 2)); + + if (u != _rectPacker.width || v != _rectPacker.height) + { + _d2dRenderTarget.reset(); + _d2dRenderTarget4.reset(); + _glyphAtlas.reset(); + _glyphAtlasView.reset(); + + { + D3D11_TEXTURE2D_DESC desc{}; + desc.Width = u; + desc.Height = v; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + desc.SampleDesc = { 1, 0 }; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _glyphAtlas.addressof())); + THROW_IF_FAILED(_device->CreateShaderResourceView(_glyphAtlas.get(), nullptr, _glyphAtlasView.addressof())); + } + + { + const auto surface = _glyphAtlas.query(); + + D2D1_RENDER_TARGET_PROPERTIES props{}; + props.type = D2D1_RENDER_TARGET_TYPE_DEFAULT; + props.pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }; + wil::com_ptr renderTarget; + THROW_IF_FAILED(p.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); + _d2dRenderTarget = renderTarget.query(); + _d2dRenderTarget4 = renderTarget.try_query(); + + // We don't really use D2D for anything except DWrite, but it + // can't hurt to ensure that everything it does is pixel aligned. + _d2dRenderTarget->SetAntialiasMode(D2D1_ANTIALIAS_MODE_ALIASED); + // Ensure that D2D uses the exact same gamma as our shader uses. + _d2dRenderTarget->SetTextRenderingParams(_textRenderingParams.get()); + + _d2dRenderTargetUpdateFontSettings(*p.s->font); + } + + { + static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; + THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _brush.put())); + } + + ID3D11ShaderResourceView* const resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; + _deviceContext->PSSetShaderResources(0, 2, &resources[0]); + } + + _glyphCache.Clear(); + _rectPackerData = Buffer{ u }; + stbrp_init_target(&_rectPacker, u, v, _rectPackerData.data(), gsl::narrow_cast(_rectPackerData.size())); + + _d2dBeginDrawing(); + _d2dRenderTarget->Clear(); +} + +BackendD3D11::QuadInstance& BackendD3D11::_getLastQuad() noexcept +{ + assert(_instancesSize != 0); + return _instances[_instancesSize - 1]; +} + +void BackendD3D11::_appendQuad(i32r position, u32 color, ShadingType shadingType) +{ + _appendQuad(position, {}, color, shadingType); +} + +void BackendD3D11::_appendQuad(i32r position, i32r texcoord, u32 color, ShadingType shadingType) +{ + if (_instancesSize >= _instances.size()) + { + _bumpInstancesSize(); + } + + _instances[_instancesSize++] = QuadInstance{ position, texcoord, color, static_cast(shadingType) }; +} + +void BackendD3D11::_bumpInstancesSize() +{ + _instances = Buffer{ std::max(1024, _instances.size() << 1) }; +} + +void BackendD3D11::_flushQuads(const RenderingPayload& p) +{ + if (!_instancesSize) + { + return; + } + + if (_instancesSize > _instanceBufferSize) + { + _recreateInstanceBuffers(p); + } + + { + D3D11_MAPPED_SUBRESOURCE mapped{}; + THROW_IF_FAILED(_deviceContext->Map(_instanceBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + memcpy(mapped.pData, _instances.data(), _instancesSize * sizeof(QuadInstance)); + _deviceContext->Unmap(_instanceBuffer.get(), 0); + } + + { + D3D11_MAPPED_SUBRESOURCE mapped{}; + THROW_IF_FAILED(_deviceContext->Map(_indexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + + if (_indicesFormat == DXGI_FORMAT_R16_UINT) + { + auto data = static_cast(mapped.pData); + const u16 vertices = gsl::narrow_cast(4 * _instancesSize); + for (u16 off = 0; off < vertices; off += 4) + { + *data++ = off + 0; + *data++ = off + 1; + *data++ = off + 2; + *data++ = off + 3; + *data++ = off + 2; + *data++ = off + 1; + } + } + else + { + assert(_indicesFormat == DXGI_FORMAT_R32_UINT); + auto data = static_cast(mapped.pData); + const u32 vertices = gsl::narrow_cast(4 * _instancesSize); + for (u32 off = 0; off < vertices; off += 4) + { + *data++ = off + 0; + *data++ = off + 1; + *data++ = off + 2; + *data++ = off + 3; + *data++ = off + 2; + *data++ = off + 1; + } + } + + _deviceContext->Unmap(_indexBuffer.get(), 0); + } + + // I found 4 approaches to drawing lots of quads quickly. + // They can often be found in discussions about "particle" or "point sprite" rendering in game development. + // * Compute Shader: My understanding is that at the time of writing games are moving over to bucketing + // particles into "tiles" on the screen and drawing them with a compute shader. While this improves + // performance, it doesn't mix well with our goal of allowing arbitrary overlaps between glyphs. + // Additionally none of the next 3 approaches use any significant amount of GPU time in the first place. + // * Geometry Shader: Geometry shaders can generate vertices on the fly, which would neatly replace + // our need for an index buffer. The reason this wasn't chosen is the same as for the next point. + // * DrawInstanced: On my own hardware (Nvidia RTX 4090) this seems to perform ~50% better than the final point, + // but with no significant difference in power draw. However the popular "Vertex Shader Tricks" talk from + // Bill Bilodeau at GDC 2014 suggests that this at least doesn't apply to 2014ish hardware, which supposedly + // performs poorly with very small, instanced meshes. Furthermore, public feedback suggests that we still + // have a lot of users with older hardware, so I've chosen the following approach, suggested in the talk. + // * DrawIndexed: This works about the same as DrawInstanced, but instead of using D3D11_INPUT_PER_INSTANCE_DATA, + // it uses a SRV (shader resource view) for instance data and maps each SV_VertexID to a SRV slot. + _deviceContext->DrawIndexed(gsl::narrow_cast(6 * _instancesSize), 0, 0); + + _instancesSize = 0; +} + +void BackendD3D11::_recreateInstanceBuffers(const RenderingPayload& p) +{ + static constexpr size_t R16max = 1 << 16; + // While the viewport size of the terminal is probably a good initial estimate for the amount of instances we'll see, + // I feel like we should ensure that the estimate doesn't exceed the limit for a DXGI_FORMAT_R16_UINT index buffer. + const auto estimatedInstances = std::min(R16max / 4, static_cast(p.s->cellCount.x) * p.s->cellCount.y); + const auto minSize = std::max(_instancesSize, estimatedInstances); + // std::bit_ceil will result in a nice exponential growth curve. I don't know exactly how structured buffers are treated + // by various drivers, but I'm assuming that they prefer buffer sizes that are close to power-of-2 sizes as well. + const auto newInstancesSize = std::bit_ceil(minSize * sizeof(QuadInstance)) / sizeof(QuadInstance); + const auto newIndicesSize = newInstancesSize * 6; + const auto vertices = newInstancesSize * 4; + const auto indicesFormat = vertices <= R16max ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; + const auto indexSize = vertices <= R16max ? sizeof(u16) : sizeof(u32); + + _indexBuffer.reset(); + _instanceBuffer.reset(); + _instanceBufferView.reset(); + + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = gsl::narrow(newIndicesSize * indexSize); + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + THROW_IF_FAILED(_device->CreateBuffer(&desc, nullptr, _indexBuffer.addressof())); + } + + { + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = gsl::narrow(newInstancesSize * sizeof(QuadInstance)); + desc.Usage = D3D11_USAGE_DYNAMIC; + desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; + desc.StructureByteStride = sizeof(QuadInstance); + THROW_IF_FAILED(_device->CreateBuffer(&desc, nullptr, _instanceBuffer.addressof())); + THROW_IF_FAILED(_device->CreateShaderResourceView(_instanceBuffer.get(), nullptr, _instanceBufferView.addressof())); + } + + _deviceContext->IASetIndexBuffer(_indexBuffer.get(), indicesFormat, 0); + _deviceContext->VSSetShaderResources(0, 1, _instanceBufferView.addressof()); + + _instanceBufferSize = newInstancesSize; + _indicesFormat = indicesFormat; +} + +void BackendD3D11::_drawBackground(const RenderingPayload& p) +{ + { + D3D11_MAPPED_SUBRESOURCE mapped{}; + THROW_IF_FAILED(_deviceContext->Map(_backgroundBitmap.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + auto data = static_cast(mapped.pData); + for (size_t i = 0; i < p.s->cellCount.y; ++i) + { + memcpy(data, p.backgroundBitmap.data() + i * p.s->cellCount.x, p.s->cellCount.x * sizeof(u32)); + data += mapped.RowPitch; + } + _deviceContext->Unmap(_backgroundBitmap.get(), 0); + } + { + const i32r rect{ 0, 0, p.s->targetSize.x, p.s->targetSize.y }; + _appendQuad(rect, rect, 0, ShadingType::Background); + } +} + +void BackendD3D11::_drawText(RenderingPayload& p) +{ + if (_resetGlyphAtlasNeeded) + { + _resetGlyphAtlasAndBeginDraw(p); + _resetGlyphAtlasNeeded = false; + } + + u16 y = 0; + + for (auto& row : p.rows) + { + const auto baselineY = y * p.d.font.cellSizeDIP.y + p.s->font->baselineInDIP; + f32 cumulativeAdvance = 0; + + for (const auto& m : row.mappings) + { + for (auto x = m.glyphsFrom; x < m.glyphsTo; ++x) + { + bool inserted = false; + auto& entry = _glyphCache.FindOrInsert(m.fontFace.get(), row.glyphIndices[x], inserted); + if (inserted) + { + _d2dBeginDrawing(); + + if (!_drawGlyph(p, entry, m.fontEmSize)) + { + _d2dEndDrawing(); + _flushQuads(p); + _resetGlyphAtlasAndBeginDraw(p); + --x; + continue; // retry + } + } + + if (entry.shadingType) + { + const auto l = static_cast((cumulativeAdvance + row.glyphOffsets[x].advanceOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.x; + const auto t = static_cast((baselineY - row.glyphOffsets[x].ascenderOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.y; + const auto w = entry.texcoord.right - entry.texcoord.left; + const auto h = entry.texcoord.bottom - entry.texcoord.top; + const i32r rect{ l, t, l + w, t + h }; + row.top = std::min(row.top, rect.top); + row.bottom = std::max(row.bottom, rect.bottom); + _appendQuad(rect, entry.texcoord, row.colors[x], static_cast(entry.shadingType)); + } + + cumulativeAdvance += row.glyphAdvances[x]; + } + } + + ++y; + } + + _d2dEndDrawing(); +} + +bool BackendD3D11::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) +{ + DWRITE_GLYPH_RUN glyphRun{}; + glyphRun.fontFace = entry.fontFace; + glyphRun.fontEmSize = fontEmSize; + glyphRun.glyphCount = 1; + glyphRun.glyphIndices = &entry.glyphIndex; + + auto box = GetGlyphRunBlackBox(glyphRun, 0, 0); + if (box.left >= box.right || box.top >= box.bottom) + { + entry = {}; + return true; + } + + box.left *= p.d.font.pixelPerDIP; + box.top *= p.d.font.pixelPerDIP; + box.right *= p.d.font.pixelPerDIP; + box.bottom *= p.d.font.pixelPerDIP; + + // We'll add a 1px padding on all 4 sides, by adding +2px to the width and +1px to the baseline origin. + // We do this to avoid neighboring glyphs from overlapping, since the blackbox measurement is only an estimate. + + stbrp_rect rect{}; + rect.w = gsl::narrow_cast(box.right - box.left + 2.5f); + rect.h = gsl::narrow_cast(box.bottom - box.top + 2.5f); + if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) + { + return false; + } + + const D2D1_POINT_2F baseline{ + roundf(rect.x - box.left + 1.0f) * p.d.font.dipPerPixel, + roundf(rect.y - box.top + 1.0f) * p.d.font.dipPerPixel, + }; + const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baseline, &glyphRun, _brush.get()); + + entry.shadingType = gsl::narrow_cast(colorGlyph ? ShadingType::Passthrough : (p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale)); + entry.offset.x = gsl::narrow_cast(lround(box.left)); + entry.offset.y = gsl::narrow_cast(lround(box.top)); + entry.texcoord.left = rect.x; + entry.texcoord.top = rect.y; + entry.texcoord.right = rect.x + rect.w; + entry.texcoord.bottom = rect.y + rect.h; + return true; +} + +void BackendD3D11::_drawGridlines(const RenderingPayload& p) +{ + u16 y = 0; + for (const auto& row : p.rows) + { + if (!row.gridLineRanges.empty()) + { + _drawGridlineRow(p, row, y); + } + y++; + } +} + +void BackendD3D11::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y) +{ + const auto top = p.s->font->cellSize.y * y; + const auto bottom = top + p.s->font->cellSize.y; + + for (const auto& r : row.gridLineRanges) + { + // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. + assert(r.lines.any()); + + i32r rect{ r.from * p.s->font->cellSize.x, top, r.to * p.s->font->cellSize.x, bottom }; + + if (r.lines.test(GridLines::Left)) + { + for (auto i = r.from; i < r.to; ++i) + { + rect.left = i * p.s->font->cellSize.x; + rect.right = rect.left + p.s->font->thinLineWidth; + _appendQuad(rect, r.color, ShadingType::SolidFill); + } + } + if (r.lines.test(GridLines::Top)) + { + rect.bottom = rect.top + p.s->font->thinLineWidth; + _appendQuad(rect, r.color, ShadingType::SolidFill); + } + if (r.lines.test(GridLines::Right)) + { + for (auto i = r.to; i > r.from; --i) + { + rect.right = i * p.s->font->cellSize.x; + rect.left = rect.right - p.s->font->thinLineWidth; + _appendQuad(rect, r.color, ShadingType::SolidFill); + } + } + if (r.lines.test(GridLines::Bottom)) + { + rect.top = rect.bottom - p.s->font->thinLineWidth; + _appendQuad(rect, r.color, ShadingType::SolidFill); + } + if (r.lines.test(GridLines::Underline)) + { + rect.top += p.s->font->underlinePos; + rect.bottom = rect.top + p.s->font->underlineWidth; + _appendQuad(rect, r.color, ShadingType::SolidFill); + } + if (r.lines.test(GridLines::HyperlinkUnderline)) + { + rect.top += p.s->font->underlinePos; + rect.bottom = rect.top + p.s->font->underlineWidth; + _appendQuad(rect, r.color, ShadingType::DashedLine); + } + if (r.lines.test(GridLines::DoubleUnderline)) + { + rect.top = top + p.s->font->doubleUnderlinePos.x; + rect.bottom = rect.top + p.s->font->thinLineWidth; + _appendQuad(rect, r.color, ShadingType::SolidFill); + + rect.top = top + p.s->font->doubleUnderlinePos.y; + rect.bottom = rect.top + p.s->font->thinLineWidth; + _appendQuad(rect, r.color, ShadingType::SolidFill); + } + if (r.lines.test(GridLines::Strikethrough)) + { + rect.top = top + p.s->font->strikethroughPos; + rect.bottom = rect.top + p.s->font->strikethroughWidth; + _appendQuad(rect, r.color, ShadingType::SolidFill); + } + } +} + +void BackendD3D11::_drawCursorPart1(const RenderingPayload& p) +{ + _cursorRects.clear(); + if (!p.cursorRect) + { + return; + } + + const auto color = p.s->cursor->cursorColor; + const auto offset = gsl::narrow_cast(p.cursorRect.top * p.s->cellCount.x); + + for (auto x1 = p.cursorRect.left; x1 < p.cursorRect.right; ++x1) + { + const auto x0 = x1; + const auto bg = p.backgroundBitmap[offset + x1] | 0xff000000; + + for (; x1 < p.cursorRect.right && (p.backgroundBitmap[offset + x1] | 0xff000000) == bg; ++x1) + { + } + + auto& c0 = _cursorRects.emplace_back(CursorRect{ + i32r{ + p.s->font->cellSize.x * x0, + p.s->font->cellSize.y * p.cursorRect.top, + p.s->font->cellSize.x * x1, + p.s->font->cellSize.y * p.cursorRect.bottom, + }, + color == 0xffffffff ? bg ^ 0x3f3f3f : color, + }); + + switch (static_cast(p.s->cursor->cursorType)) + { + case CursorType::Legacy: + c0.rect.top = c0.rect.bottom - ((c0.rect.bottom - c0.rect.top) * p.s->cursor->heightPercentage + 50) / 100; + break; + case CursorType::VerticalBar: + c0.rect.right = c0.rect.left + p.s->font->thinLineWidth; + break; + case CursorType::Underscore: + c0.rect.top += p.s->font->underlinePos; + c0.rect.bottom = c0.rect.top + p.s->font->underlineWidth; + break; + case CursorType::EmptyBox: + { + auto& c1 = _cursorRects.emplace_back(c0); + if (x0 == p.cursorRect.left) + { + auto& c = _cursorRects.emplace_back(c0); + c.rect.top += p.s->font->thinLineWidth; + c.rect.bottom -= p.s->font->thinLineWidth; + c.rect.right = c.rect.left + p.s->font->thinLineWidth; + } + if (x1 == p.cursorRect.right) + { + auto& c = _cursorRects.emplace_back(c0); + c.rect.top += p.s->font->thinLineWidth; + c.rect.bottom -= p.s->font->thinLineWidth; + c.rect.left = c.rect.right - p.s->font->thinLineWidth; + } + c0.rect.bottom = c0.rect.top + p.s->font->thinLineWidth; + c1.rect.top = c1.rect.bottom - p.s->font->thinLineWidth; + break; + } + case CursorType::FullBox: + break; + case CursorType::DoubleUnderscore: + { + auto& c1 = _cursorRects.emplace_back(c0); + c0.rect.top += p.s->font->doubleUnderlinePos.x; + c0.rect.bottom = c0.rect.top + p.s->font->thinLineWidth; + c1.rect.top += p.s->font->doubleUnderlinePos.y; + c1.rect.bottom = c1.rect.top + p.s->font->thinLineWidth; + break; + } + default: + break; + } + } + + if (color == 0xffffffff) + { + for (auto& c : _cursorRects) + { + _appendQuad(c.rect, c.color, ShadingType::SolidFill); + c.color = 0xffffffff; + } + } +} + +void BackendD3D11::_drawCursorPart2(const RenderingPayload& p) +{ + if (!p.cursorRect) + { + return; + } + + const auto color = p.s->cursor->cursorColor; + + if (color == 0xffffffff) + { + _flushQuads(p); + _deviceContext->OMSetBlendState(_blendStateInvert.get(), nullptr, 0xffffffff); + } + + for (const auto& c : _cursorRects) + { + _appendQuad(c.rect, c.color, ShadingType::SolidFill); + } + + if (color == 0xffffffff) + { + _flushQuads(p); + _deviceContext->OMSetBlendState(_blendState.get(), nullptr, 0xffffffff); + } +} + +void BackendD3D11::_drawSelection(const RenderingPayload& p) +{ + u16 y = 0; + u16 lastFrom = 0; + u16 lastTo = 0; + + for (const auto& row : p.rows) + { + if (row.selectionTo > row.selectionFrom) + { + // If the current selection line matches the previous one, we can just extend the previous quad downwards. + // The way this is implemented isn't very smart, but we also don't have very many rows to iterate through. + if (row.selectionFrom == lastFrom && row.selectionTo == lastTo) + { + _getLastQuad().position.bottom = p.s->font->cellSize.y * (y + 1); + } + else + { + const i32r rect{ + p.s->font->cellSize.x * row.selectionFrom, + p.s->font->cellSize.y * y, + p.s->font->cellSize.x * row.selectionTo, + p.s->font->cellSize.y * (y + 1), + }; + _appendQuad(rect, p.s->misc->selectionColor, ShadingType::SolidFill); + lastFrom = row.selectionFrom; + lastTo = row.selectionTo; + } + } + + y++; + } +} + +void BackendD3D11::_executeCustomShader(RenderingPayload& p) +{ + { + CustomConstBuffer data; + data.time = std::chrono::duration(std::chrono::steady_clock::now() - _customShaderStartTime).count(); + data.scale = p.d.font.pixelPerDIP; + data.resolution.x = static_cast(_cellCount.x * p.s->font->cellSize.x); + data.resolution.y = static_cast(_cellCount.y * p.s->font->cellSize.y); + data.background = colorFromU32(p.s->misc->backgroundColor); + + D3D11_MAPPED_SUBRESOURCE mapped{}; + THROW_IF_FAILED(_deviceContext->Map(_customShaderConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + memcpy(mapped.pData, &data, sizeof(data)); + _deviceContext->Unmap(_customShaderConstantBuffer.get(), 0); + } + + { + // Before we do anything else we have to unbound _renderTargetView from being + // a render target, otherwise we can't use it as a shader resource below. + _deviceContext->OMSetRenderTargets(1, _customRenderTargetView.addressof(), nullptr); + + // IA: Input Assembler + _deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); + _deviceContext->IASetIndexBuffer(_indexBuffer.get(), _indicesFormat, 0); + + // VS: Vertex Shader + _deviceContext->VSSetShader(_customVertexShader.get(), nullptr, 0); + _deviceContext->VSSetConstantBuffers(0, 0, nullptr); + _deviceContext->VSSetShaderResources(0, 0, nullptr); + + // RS: Rasterizer Stage + D3D11_VIEWPORT viewport{}; + viewport.Width = static_cast(p.s->targetSize.x); + viewport.Height = static_cast(p.s->targetSize.y); + _deviceContext->RSSetViewports(1, &viewport); + + // PS: Pixel Shader + _deviceContext->PSSetShader(_customPixelShader.get(), nullptr, 0); + _deviceContext->PSSetConstantBuffers(0, 1, _customShaderConstantBuffer.addressof()); + _deviceContext->PSSetShaderResources(0, 1, _customOffscreenTextureView.addressof()); + _deviceContext->PSSetSamplers(0, 1, _customShaderSamplerState.addressof()); + + // OM: Output Merger + _deviceContext->OMSetBlendState(nullptr, nullptr, 0xffffffff); + + _deviceContext->Draw(4, 0); + } + + _setupDeviceContextState(p); + + // With custom shaders, everything might be invalidated, so we have to + // indirectly disable Present1() and its dirty rects this way. + p.dirtyRect = { 0, 0, p.s->cellCount.x, p.s->cellCount.y }; +} + +TIL_FAST_MATH_END diff --git a/src/renderer/atlas/BackendD3D11.h b/src/renderer/atlas/BackendD3D11.h new file mode 100644 index 00000000000..becf9562a19 --- /dev/null +++ b/src/renderer/atlas/BackendD3D11.h @@ -0,0 +1,214 @@ +#pragma once + +#include +#include + +#include "Backend.h" + +namespace Microsoft::Console::Render::Atlas +{ + struct BackendD3D11 : IBackend + { + BackendD3D11(wil::com_ptr device, wil::com_ptr deviceContext); + + void Render(RenderingPayload& payload) override; + bool RequiresContinuousRedraw() noexcept override; + void WaitUntilCanRender() noexcept override; + + private: + // NOTE: D3D constant buffers sizes must be a multiple of 16 bytes. + struct alignas(16) VSConstBuffer + { + // WARNING: Modify this carefully after understanding how HLSL struct packing works. The gist is: + // * Minimum alignment is 4 bytes + // * Members cannot straddle 16 byte boundaries + // This means a structure like {u32; u32; u32; u32x2} would require + // padding so that it is {u32; u32; u32; <4 byte padding>; u32x2}. + // * bool will probably not work the way you want it to, + // because HLSL uses 32-bit bools and C++ doesn't. + alignas(sizeof(f32x2)) f32x2 positionScale; +#pragma warning(suppress : 4324) // 'VSConstBuffer': structure was padded due to alignment specifier + }; + + // WARNING: Same rules as for VSConstBuffer above apply. + struct alignas(16) PSConstBuffer + { + alignas(sizeof(f32x4)) f32x4 backgroundColor; + alignas(sizeof(f32x2)) f32x2 cellCount; + alignas(sizeof(f32x2)) f32x2 cellSize; + alignas(sizeof(f32x4)) f32 gammaRatios[4]{}; + alignas(sizeof(f32)) f32 enhancedContrast = 0; + alignas(sizeof(f32)) f32 dashedLineLength = 0; +#pragma warning(suppress : 4324) // 'PSConstBuffer': structure was padded due to alignment specifier + }; + + // WARNING: Same rules as for VSConstBuffer above apply. + struct alignas(16) CustomConstBuffer + { + alignas(sizeof(f32)) f32 time = 0; + alignas(sizeof(f32)) f32 scale = 0; + alignas(sizeof(f32x2)) f32x2 resolution; + alignas(sizeof(f32x4)) f32x4 background; +#pragma warning(suppress : 4324) // 'CustomConstBuffer': structure was padded due to alignment specifier + }; + + enum class ShadingType + { + Background = 0, + TextGrayscale, + TextClearType, + Passthrough, + DashedLine, + SolidFill, + }; + + struct alignas(16) QuadInstance + { + alignas(sizeof(i32r)) i32r position; + alignas(sizeof(i32r)) i32r texcoord; + alignas(sizeof(u32)) u32 color = 0; + alignas(sizeof(u32)) u32 shadingType = 0; + alignas(sizeof(u32x2)) u32x2 padding; + }; + static_assert(sizeof(QuadInstance) == 48); + + struct GlyphCacheEntry + { + // BODGY: The IDWriteFontFace results from us calling IDWriteFontFallback::MapCharacters + // which at the time of writing returns the same IDWriteFontFace as long as someone is + // holding a reference / the reference count doesn't drop to 0 (see ActiveFaceCache). + IDWriteFontFace* fontFace = nullptr; + u16 glyphIndex = 0; + u16 shadingType = 0; + i32x2 offset; + i32r texcoord; + }; + static_assert(sizeof(GlyphCacheEntry) == 40); + + struct GlyphCacheMap + { + GlyphCacheMap() = default; + ~GlyphCacheMap(); + + GlyphCacheMap(const GlyphCacheMap&) = delete; + GlyphCacheMap(GlyphCacheMap&&) = delete; + + const GlyphCacheMap& operator=(const GlyphCacheMap&) = delete; + GlyphCacheMap& operator=(GlyphCacheMap&& other) noexcept; + + void Clear() noexcept; + GlyphCacheEntry& FindOrInsert(IDWriteFontFace* fontFace, u16 glyphIndex, bool& inserted); + + private: + static size_t _hash(IDWriteFontFace* fontFace, u16 glyphIndex) noexcept; + GlyphCacheEntry& _insert(IDWriteFontFace* fontFace, u16 glyphIndex, size_t hash); + void _bumpSize(); + + static constexpr u32 initialSize = 256; + + Buffer _map{ initialSize }; + size_t _mapMask = initialSize - 1; + size_t _capacity = initialSize / 2; + size_t _size = 0; + }; + + void _debugUpdateShaders() noexcept; + __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); + void _recreateCustomShader(const RenderingPayload& p); + void _recreateCustomRenderTargetView(u16x2 targetSize); + void _d2dRenderTargetUpdateFontSettings(const FontSettings& font) noexcept; + void _recreateBackgroundColorBitmap(u16x2 cellCount); + void _recreateConstBuffer(const RenderingPayload& p); + void _setupDeviceContextState(const RenderingPayload& p); + void _d2dBeginDrawing() noexcept; + void _d2dEndDrawing(); + void _resetGlyphAtlasAndBeginDraw(const RenderingPayload& p); + QuadInstance& _getLastQuad() noexcept; + void _appendQuad(i32r position, u32 color, ShadingType shadingType); + void _appendQuad(i32r position, i32r texcoord, u32 color, ShadingType shadingType); + __declspec(noinline) void _bumpInstancesSize(); + void _flushQuads(const RenderingPayload& p); + __declspec(noinline) void _recreateInstanceBuffers(const RenderingPayload& p); + void _drawBackground(const RenderingPayload& p); + void _drawText(RenderingPayload& p); + bool _drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize); + void _drawGridlines(const RenderingPayload& p); + void _drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y); + void _drawCursorPart1(const RenderingPayload& p); + void _drawCursorPart2(const RenderingPayload& p); + void _drawSelection(const RenderingPayload& p); + void _executeCustomShader(RenderingPayload& p); + + SwapChainManager _swapChainManager; + + wil::com_ptr _device; + wil::com_ptr _deviceContext; + wil::com_ptr _renderTargetView; + + wil::com_ptr _vertexShader; + wil::com_ptr _pixelShader; + wil::com_ptr _blendState; + wil::com_ptr _blendStateInvert; + wil::com_ptr _vsConstantBuffer; + wil::com_ptr _psConstantBuffer; + wil::com_ptr _indexBuffer; + wil::com_ptr _instanceBuffer; + wil::com_ptr _instanceBufferView; + size_t _instanceBufferSize = 0; + Buffer _instances; + size_t _instancesSize = 0; + DXGI_FORMAT _indicesFormat = DXGI_FORMAT_UNKNOWN; + + wil::com_ptr _customRenderTargetView; + wil::com_ptr _customOffscreenTexture; + wil::com_ptr _customOffscreenTextureView; + wil::com_ptr _customVertexShader; + wil::com_ptr _customPixelShader; + wil::com_ptr _customShaderConstantBuffer; + wil::com_ptr _customShaderSamplerState; + std::chrono::steady_clock::time_point _customShaderStartTime; + + wil::com_ptr _backgroundBitmap; + wil::com_ptr _backgroundBitmapView; + + wil::com_ptr _glyphAtlas; + wil::com_ptr _glyphAtlasView; + GlyphCacheMap _glyphCache; + Buffer _rectPackerData; + stbrp_context _rectPacker{}; + + wil::com_ptr _d2dRenderTarget; + wil::com_ptr _d2dRenderTarget4; // Optional. Supported since Windows 10 14393. + wil::com_ptr _brush; + bool _d2dBeganDrawing = false; + bool _resetGlyphAtlasNeeded = false; + + float _gamma = 0; + float _cleartypeEnhancedContrast = 0; + float _grayscaleEnhancedContrast = 0; + wil::com_ptr _textRenderingParams; + + til::generation_t _generation; + til::generation_t _fontGeneration; + til::generation_t _miscGeneration; + u16x2 _targetSize; + u16x2 _cellCount; + + // An empty-box cursor spanning a wide glyph that has different + // background colors on each side results in 6 lines being drawn. + struct CursorRect + { + i32r rect; + u32 color = 0; + }; + til::small_vector _cursorRects; + + bool _requiresContinuousRedraw = false; + +#ifndef NDEBUG + std::filesystem::path _sourceDirectory; + wil::unique_folder_change_reader_nothrow _sourceCodeWatcher; + std::atomic _sourceCodeInvalidationTime{ INT64_MAX }; +#endif + }; +} diff --git a/src/renderer/atlas/DWriteTextAnalysis.cpp b/src/renderer/atlas/DWriteTextAnalysis.cpp index 8b4ba477349..d9456c4ddf9 100644 --- a/src/renderer/atlas/DWriteTextAnalysis.cpp +++ b/src/renderer/atlas/DWriteTextAnalysis.cpp @@ -4,7 +4,7 @@ #pragma warning(disable : 4100) // '...': unreferenced formal parameter #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). -using namespace Microsoft::Console::Render; +using namespace Microsoft::Console::Render::Atlas; TextAnalysisSource::TextAnalysisSource(const wchar_t* _text, const UINT32 _textLength) noexcept : _text{ _text }, diff --git a/src/renderer/atlas/DWriteTextAnalysis.h b/src/renderer/atlas/DWriteTextAnalysis.h index 15490ca868f..895d1e0dbea 100644 --- a/src/renderer/atlas/DWriteTextAnalysis.h +++ b/src/renderer/atlas/DWriteTextAnalysis.h @@ -3,7 +3,7 @@ #pragma once -namespace Microsoft::Console::Render +namespace Microsoft::Console::Render::Atlas { struct TextAnalysisSinkResult { diff --git a/src/renderer/atlas/atlas.vcxproj b/src/renderer/atlas/atlas.vcxproj index e9907d15490..d46028a2130 100644 --- a/src/renderer/atlas/atlas.vcxproj +++ b/src/renderer/atlas/atlas.vcxproj @@ -13,14 +13,22 @@ + + + Create + + + + + @@ -31,7 +39,7 @@ Pixel 4.0 true - custom_shader_ps + %(Filename) $(OutDir)$(ProjectName)\%(Filename).h true @@ -42,7 +50,7 @@ Vertex 4.0 true - custom_shader_vs + %(Filename) $(OutDir)$(ProjectName)\%(Filename).h true @@ -52,11 +60,14 @@ true + + true + Pixel 4.0 true - shader_ps + %(Filename) $(OutDir)$(ProjectName)\%(Filename).h true @@ -67,7 +78,7 @@ Vertex 4.0 true - shader_vs + %(Filename) $(OutDir)$(ProjectName)\%(Filename).h true @@ -80,7 +91,7 @@ pch.h - $(OutDir)$(ProjectName)\;%(AdditionalIncludeDirectories) + $(SolutionDir)\oss\stb;$(OutDir)$(ProjectName);%(AdditionalIncludeDirectories) - + \ No newline at end of file diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h new file mode 100644 index 00000000000..940ce2e394a --- /dev/null +++ b/src/renderer/atlas/common.h @@ -0,0 +1,437 @@ +#pragma once + +#include +#include + +#include + +#include "../../renderer/inc/IRenderEngine.hpp" + +namespace Microsoft::Console::Render::Atlas +{ +#define ATLAS_POD_OPS(type) \ + constexpr auto operator<=>(const type&) const noexcept = default; \ + \ + constexpr bool operator==(const type& rhs) const noexcept \ + { \ + if constexpr (std::has_unique_object_representations_v) \ + { \ + return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0; \ + } \ + else \ + { \ + return std::is_eq(*this <=> rhs); \ + } \ + } \ + \ + constexpr bool operator!=(const type& rhs) const noexcept \ + { \ + return !(*this == rhs); \ + } + +#define ATLAS_FLAG_OPS(type, underlying) \ + friend constexpr type operator~(type v) noexcept \ + { \ + return static_cast(~static_cast(v)); \ + } \ + friend constexpr type operator|(type lhs, type rhs) noexcept \ + { \ + return static_cast(static_cast(lhs) | static_cast(rhs)); \ + } \ + friend constexpr type operator&(type lhs, type rhs) noexcept \ + { \ + return static_cast(static_cast(lhs) & static_cast(rhs)); \ + } \ + friend constexpr type operator^(type lhs, type rhs) noexcept \ + { \ + return static_cast(static_cast(lhs) ^ static_cast(rhs)); \ + } \ + friend constexpr void operator|=(type& lhs, type rhs) noexcept \ + { \ + lhs = lhs | rhs; \ + } \ + friend constexpr void operator&=(type& lhs, type rhs) noexcept \ + { \ + lhs = lhs & rhs; \ + } \ + friend constexpr void operator^=(type& lhs, type rhs) noexcept \ + { \ + lhs = lhs ^ rhs; \ + } + + template + struct vec2 + { + T x{}; + T y{}; + + ATLAS_POD_OPS(vec2) + }; + + template + struct vec4 + { + T x{}; + T y{}; + T z{}; + T w{}; + + ATLAS_POD_OPS(vec4) + }; + + template + struct rect + { + T left{}; + T top{}; + T right{}; + T bottom{}; + + ATLAS_POD_OPS(rect) + + constexpr bool non_empty() const noexcept + { + return (left < right) & (top < bottom); + } + }; + + using u8 = uint8_t; + + using u16 = uint16_t; + using u16x2 = vec2; + using u16x4 = vec4; + using u16r = rect; + + using i16 = int16_t; + using i16x2 = vec2; + using i16x4 = vec4; + using i16r = rect; + + using u32 = uint32_t; + using u32x2 = vec2; + using u32x4 = vec4; + using u32r = rect; + + using i32 = int32_t; + using i32x2 = vec2; + using i32x4 = vec4; + using i32r = rect; + + using f32 = float; + using f32x2 = vec2; + using f32x4 = vec4; + using f32r = rect; + + // I wrote `Buffer` instead of using `std::vector`, because I want to convey that these things + // explicitly _don't_ hold resizeable contents, but rather plain content of a fixed size. + // For instance I didn't want a resizeable vector with a `push_back` method for my fixed-size + // viewport arrays - that doesn't make sense after all. `Buffer` also doesn't initialize + // contents to zero, allowing rapid creation/destruction and you can easily specify a custom + // (over-)alignment which can improve rendering perf by up to ~20% over `std::vector`. + template + struct Buffer + { + constexpr Buffer() noexcept = default; + + explicit Buffer(size_t size) : + _data{ allocate(size) }, + _size{ size } + { + std::uninitialized_default_construct_n(_data, size); + } + + Buffer(const T* data, size_t size) : + _data{ allocate(size) }, + _size{ size } + { + // Changing the constructor arguments to accept std::span might + // be a good future extension, but not to improve security here. + // You can trivially construct std::span's from invalid ranges. + // Until then the raw-pointer style is more practical. +#pragma warning(suppress : 26459) // You called an STL function '...' with a raw pointer parameter at position '3' that may be unsafe [...]. + std::uninitialized_copy_n(data, size, _data); + } + + ~Buffer() + { + destroy(); + } + + Buffer(Buffer&& other) noexcept : + _data{ std::exchange(other._data, nullptr) }, + _size{ std::exchange(other._size, 0) } + { + } + +#pragma warning(suppress : 26432) // If you define or delete any default operation in the type '...', define or delete them all (c.21). + Buffer& operator=(Buffer&& other) noexcept + { + destroy(); + _data = std::exchange(other._data, nullptr); + _size = std::exchange(other._size, 0); + return *this; + } + + explicit operator bool() const noexcept + { + return _data != nullptr; + } + + T& operator[](size_t index) noexcept + { + assert(index < _size); +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). + return _data[index]; + } + + const T& operator[](size_t index) const noexcept + { + assert(index < _size); +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). + return _data[index]; + } + + T* data() noexcept + { + return _data; + } + + const T* data() const noexcept + { + return _data; + } + + size_t size() const noexcept + { + return _size; + } + + T* begin() noexcept + { + return _data; + } + + const T* begin() const noexcept + { + return _data; + } + + T* end() noexcept + { +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). + return _data + _size; + } + + const T* end() const noexcept + { +#pragma warning(suppress : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). + return _data + _size; + } + + private: + // These two functions don't need to use scoped objects or standard allocators, + // since this class is in fact an scoped allocator object itself. +#pragma warning(push) +#pragma warning(disable : 26402) // Return a scoped object instead of a heap-allocated if it has a move constructor (r.3). +#pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique instead (r.11). + static T* allocate(size_t size) + { + if constexpr (Alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) + { + return static_cast(::operator new(size * sizeof(T))); + } + else + { + return static_cast(::operator new(size * sizeof(T), static_cast(Alignment))); + } + } + + static void deallocate(T* data) noexcept + { + if constexpr (Alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) + { + ::operator delete(data); + } + else + { + ::operator delete(data, static_cast(Alignment)); + } + } +#pragma warning(pop) + + void destroy() noexcept + { + std::destroy_n(_data, _size); + deallocate(_data); + } + + T* _data = nullptr; + size_t _size = 0; + }; + + struct TargetSettings + { + HWND hwnd = nullptr; + bool enableTransparentBackground = false; + bool useSoftwareRendering = false; + }; + + inline constexpr auto DefaultAntialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; + + struct FontSettings + { + wil::com_ptr fontCollection; + wil::com_ptr fontFamily; + std::wstring fontName; + std::vector fontFeatures; + std::vector fontAxisValues; + float baselineInDIP = 0.0f; + float fontSizeInDIP = 0.0f; + f32 advanceScale = 0; + u16x2 cellSize; + u16 fontWeight = 0; + u16 underlinePos = 0; + u16 underlineWidth = 0; + u16 strikethroughPos = 0; + u16 strikethroughWidth = 0; + u16x2 doubleUnderlinePos; + u16 thinLineWidth = 0; + u16 dpi = 96; + u8 antialiasingMode = DefaultAntialiasingMode; + }; + + struct CursorSettings + { + ATLAS_POD_OPS(CursorSettings) + + u32 cursorColor = 0xffffffff; + u16 cursorType = 0; + u8 heightPercentage = 20; + u8 _padding = 0; + }; + + struct MiscellaneousSettings + { + u32 backgroundColor = 0; + u32 selectionColor = 0x7fffffff; + std::wstring customPixelShaderPath; + bool useRetroTerminalEffect = false; + }; + + struct Settings + { + static auto invalidated() noexcept + { + return til::generational{ + til::generation_t{ 1 }, + til::generational{ til::generation_t{ 1 } }, + til::generational{ til::generation_t{ 1 } }, + til::generational{ til::generation_t{ 1 } }, + til::generational{ til::generation_t{ 1 } }, + }; + } + + til::generational target; + til::generational font; + til::generational cursor; + til::generational misc; + u16x2 targetSize; + u16x2 cellCount; + }; + + struct FontDependents + { + //wil::com_ptr textFormats[2][2]; + Buffer textFormatAxes[2][2]; + //wil::com_ptr typography; + f32 dipPerPixel = 1.0f; // caches USER_DEFAULT_SCREEN_DPI / dpi + f32 pixelPerDIP = 1.0f; // caches dpi / USER_DEFAULT_SCREEN_DPI + f32x2 cellSizeDIP; // caches cellSize in DIP + }; + + struct Dependents + { + FontDependents font; + }; + + struct FontMapping + { + wil::com_ptr fontFace; + f32 fontEmSize = 0; + u32 glyphsFrom = 0; + u32 glyphsTo = 0; + }; + + struct GridLineRange + { + GridLineSet lines; + u32 color = 0; + u16 from = 0; + u16 to = 0; + }; + + struct ShapedRow + { + void clear(u16 y, u16 cellHeight) noexcept + { + mappings.clear(); + glyphIndices.clear(); + glyphAdvances.clear(); + glyphOffsets.clear(); + colors.clear(); + gridLineRanges.clear(); + selectionFrom = 0; + selectionTo = 0; + top = y * cellHeight; + bottom = top + cellHeight; + } + + std::vector mappings; + std::vector glyphIndices; + std::vector glyphAdvances; // same size as glyphIndices + std::vector glyphOffsets; // same size as glyphIndices + std::vector colors; + std::vector gridLineRanges; + u16 selectionFrom = 0; + u16 selectionTo = 0; + til::CoordType top = 0; + til::CoordType bottom = 0; + }; + + struct RenderingPayload + { + // Parameters which are constant across backends. + wil::com_ptr d2dFactory; + wil::com_ptr dwriteFactory; + wil::com_ptr dwriteFactory4; // optional, might be nullptr + wil::com_ptr systemFontFallback; + wil::com_ptr systemFontFallback1; // optional, might be nullptr + wil::com_ptr textAnalyzer; + wil::com_ptr renderingParams; + std::function warningCallback; + std::function swapChainChangedCallback; + + // Parameters which are constant for the existence of the backend. + wil::com_ptr dxgiFactory; + + // Parameters which change seldom. + til::generational s; + Dependents d; + + // Parameters which change every frame. + Buffer rows; + Buffer backgroundBitmap; + til::rect dirtyRect; + til::rect cursorRect; + til::CoordType scrollOffset = 0; + }; + + struct IBackend + { + virtual ~IBackend() = default; + virtual void Render(RenderingPayload& payload) = 0; + virtual bool RequiresContinuousRedraw() noexcept = 0; + virtual void WaitUntilCanRender() noexcept = 0; + }; + +} diff --git a/src/renderer/atlas/pch.h b/src/renderer/atlas/pch.h index f80306b1ea3..83b0a87bb7f 100644 --- a/src/renderer/atlas/pch.h +++ b/src/renderer/atlas/pch.h @@ -17,12 +17,12 @@ #include #include -#include -#include +#include +#include #include #include #include -#include +#include #include #include diff --git a/src/renderer/atlas/shader_common.hlsl b/src/renderer/atlas/shader_common.hlsl new file mode 100644 index 00000000000..58ce862161a --- /dev/null +++ b/src/renderer/atlas/shader_common.hlsl @@ -0,0 +1,47 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +// clang-format off +#define SHADING_TYPE_TEXT_BACKGROUND 0 +#define SHADING_TYPE_TEXT_GRAYSCALE 1 +#define SHADING_TYPE_TEXT_CLEARTYPE 2 +#define SHADING_TYPE_PASSTHROUGH 3 +#define SHADING_TYPE_DASHED_LINE 4 +#define SHADING_TYPE_SOLID_FILL 5 +// clang-format on + +struct VSData +{ + int4 position : POSITION; + int4 texcoord : TEXCOORD; + uint color : COLOR; + uint shadingType : ShadingType; + // Structured Buffers are tightly packed. Nvidia recommends padding them to avoid crossing 128-bit + // cache lines: https://developer.nvidia.com/content/understanding-structured-buffer-performance + uint2 padding; +}; + +struct PSData +{ + nointerpolation uint shadingType : ShadingType; + nointerpolation float4 color : COLOR; + float4 position : SV_Position; + float2 texcoord : TEXCOORD; +}; + +float4 premultiplyColor(float4 color) +{ + color.rgb *= color.a; + return color; +} + +float4 alphaBlendPremultiplied(float4 bottom, float4 top) +{ + bottom *= 1 - top.a; + return bottom + top; +} + +float4 decodeRGBA(uint i) +{ + return (i >> uint4(0, 8, 16, 24) & 0xff) / 255.0f; +} diff --git a/src/renderer/atlas/shader_ps.hlsl b/src/renderer/atlas/shader_ps.hlsl index 867bcccf2fd..06a7ff0c0bc 100644 --- a/src/renderer/atlas/shader_ps.hlsl +++ b/src/renderer/atlas/shader_ps.hlsl @@ -2,200 +2,96 @@ // Licensed under the MIT license. #include "dwrite.hlsl" - -#define INVALID_COLOR 0xffffffff - -// These flags are shared with AtlasEngine::CellFlags. -// -// clang-format off -#define CellFlags_None 0x00000000 -#define CellFlags_Inlined 0x00000001 - -#define CellFlags_ColoredGlyph 0x00000002 - -#define CellFlags_Cursor 0x00000008 -#define CellFlags_Selected 0x00000010 - -#define CellFlags_BorderLeft 0x00000020 -#define CellFlags_BorderTop 0x00000040 -#define CellFlags_BorderRight 0x00000080 -#define CellFlags_BorderBottom 0x00000100 -#define CellFlags_Underline 0x00000200 -#define CellFlags_UnderlineDotted 0x00000400 -#define CellFlags_UnderlineDouble 0x00000800 -#define CellFlags_Strikethrough 0x00001000 -// clang-format on - -// According to Nvidia's "Understanding Structured Buffer Performance" guide -// one should aim for structures with sizes divisible by 128 bits (16 bytes). -// This prevents elements from spanning cache lines. -struct Cell -{ - uint glyphPos; - uint flags; - uint2 color; // x: foreground, y: background -}; +#include "shader_common.hlsl" cbuffer ConstBuffer : register(b0) { - float4 viewport; + float4 backgroundColor; + float2 cellCount; + float2 cellSize; float4 gammaRatios; float enhancedContrast; - uint cellCountX; - uint2 cellSize; - uint underlinePos; - uint underlineWidth; - uint strikethroughPos; - uint strikethroughWidth; - uint2 doubleUnderlinePos; - uint thinLineWidth; - uint backgroundColor; - uint cursorColor; - uint selectionColor; - uint useClearType; -}; -StructuredBuffer cells : register(t0); -Texture2D glyphs : register(t1); - -float4 decodeRGBA(uint i) -{ - float4 c = (i >> uint4(0, 8, 16, 24) & 0xff) / 255.0f; - // Convert to premultiplied alpha for simpler alpha blending. - c.rgb *= c.a; - return c; + float dashedLineLength; } -uint2 decodeU16x2(uint i) -{ - return uint2(i & 0xffff, i >> 16); -} +SamplerState backgroundSampler : register(s0); +Texture2D background : register(t0); +Texture2D glyphAtlas : register(t1); -float4 alphaBlendPremultiplied(float4 bottom, float4 top) +struct Output { - bottom *= 1 - top.a; - return bottom + top; -} + float4 color; + float4 weights; +}; // clang-format off -float4 main(float4 pos: SV_Position): SV_Target +Output main(PSData data) : SV_Target // clang-format on { - // We need to fill the entire render target with pixels, but only our "viewport" - // has cells we want to draw. The rest gets treated with the background color. - [branch] if (any(pos.xy < viewport.xy || pos.xy >= viewport.zw)) - { - return decodeRGBA(backgroundColor); - } + float4 color; + float4 weights; - uint2 viewportPos = pos.xy - viewport.xy; - uint2 cellIndex = viewportPos / cellSize; - uint2 cellPos = viewportPos % cellSize; - Cell cell = cells[cellIndex.y * cellCountX + cellIndex.x]; - - // Layer 0: - // The cell's background color - float4 color = decodeRGBA(cell.color.y); - float4 fg = decodeRGBA(cell.color.x); - - // Layer 1 (optional): - // Colored cursors are drawn "in between" the background color and the text of a cell. - [branch] if (cell.flags & CellFlags_Cursor) + switch (data.shadingType) { - [branch] if (cursorColor != INVALID_COLOR) + case SHADING_TYPE_TEXT_BACKGROUND: + float2 pos = data.texcoord / cellSize; + color = background[pos]; + weights = float4(1, 1, 1, 1); + if (any(pos >= cellCount)) { - // The cursor texture is stored at the top-left-most glyph cell. - // Cursor pixels are either entirely transparent or opaque. - // --> We can just use .a as a mask to flip cursor pixels on or off. - color = alphaBlendPremultiplied(color, decodeRGBA(cursorColor) * glyphs[cellPos].a); - } - else if (glyphs[cellPos].a != 0) - { - // Make sure the cursor is always readable (see gh-3647) - // If we imagine the two colors to be in 0-255 instead of 0-1, - // this effectively XORs them with 63. This avoids a situation - // where a gray background color (0.5) gets inverted to the - // same gray making the cursor invisible. - float2x4 colors = { color, fg }; - float2x4 ip; // integral part - float2x4 frac = modf(colors * (255.0f / 64.0f), ip); - colors = (3.0f - ip + frac) * (64.0f / 255.0f); - color = float4(colors[0].rgb, 1); - fg = float4(colors[1].rgb, 1); + color = backgroundColor; } + break; + case SHADING_TYPE_TEXT_GRAYSCALE: + { + // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things. + const float4 foreground = premultiplyColor(data.color); + const float blendEnhancedContrast = DWrite_ApplyLightOnDarkContrastAdjustment(enhancedContrast, data.color.rgb); + const float intensity = DWrite_CalcColorIntensity(data.color.rgb); + // These aren't. + const float4 glyph = glyphAtlas[data.texcoord]; + const float contrasted = DWrite_EnhanceContrast(glyph.a, blendEnhancedContrast); + const float alphaCorrected = DWrite_ApplyAlphaCorrection(contrasted, intensity, gammaRatios); + color = alphaCorrected * foreground; + weights = color.aaaa; + break; } - - // Layer 2: - // Step 1: The cell's glyph, potentially drawn in the foreground color + case SHADING_TYPE_TEXT_CLEARTYPE: { - float4 glyph = glyphs[decodeU16x2(cell.glyphPos) + cellPos]; - - [branch] if (cell.flags & CellFlags_ColoredGlyph) - { - color = alphaBlendPremultiplied(color, glyph); - } - else - { - float3 foregroundStraight = DWrite_UnpremultiplyColor(fg); - float blendEnhancedContrast = DWrite_ApplyLightOnDarkContrastAdjustment(enhancedContrast, foregroundStraight); - - [branch] if (useClearType) - { - // See DWrite_ClearTypeBlend - float3 contrasted = DWrite_EnhanceContrast3(glyph.rgb, blendEnhancedContrast); - float3 alphaCorrected = DWrite_ApplyAlphaCorrection3(contrasted, foregroundStraight, gammaRatios); - color = float4(lerp(color.rgb, foregroundStraight, alphaCorrected * fg.a), 1.0f); - } - else - { - // See DWrite_GrayscaleBlend - float intensity = DWrite_CalcColorIntensity(foregroundStraight); - float contrasted = DWrite_EnhanceContrast(glyph.a, blendEnhancedContrast); - float4 alphaCorrected = DWrite_ApplyAlphaCorrection(contrasted, intensity, gammaRatios); - color = alphaBlendPremultiplied(color, alphaCorrected * fg); - } - } + // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things. + const float blendEnhancedContrast = DWrite_ApplyLightOnDarkContrastAdjustment(enhancedContrast, data.color.rgb); + // These aren't. + const float4 glyph = glyphAtlas[data.texcoord]; + const float3 contrasted = DWrite_EnhanceContrast3(glyph.rgb, blendEnhancedContrast); + const float3 alphaCorrected = DWrite_ApplyAlphaCorrection3(contrasted, data.color.rgb, gammaRatios); + weights = float4(alphaCorrected * data.color.a, 1); + color = weights * data.color; + break; } - // Step 2: Lines + case SHADING_TYPE_PASSTHROUGH: { - // What a nice coincidence that we have exactly 8 flags to handle right now! - // `mask` will mask away any positive results from checks we don't want. - // (I.e. even if we're in an underline, it doesn't matter if we don't want an underline.) - bool2x4 mask = { - cell.flags & CellFlags_BorderLeft, - cell.flags & CellFlags_BorderTop, - cell.flags & CellFlags_BorderRight, - cell.flags & CellFlags_BorderBottom, - cell.flags & CellFlags_Underline, - cell.flags & CellFlags_UnderlineDotted, - cell.flags & CellFlags_UnderlineDouble, - cell.flags & CellFlags_Strikethrough, - }; - // The following = lo && y < hi`. - bool2x4 checks = { - // These 2 expand to 4 bools, because cellPos is a - // uint2 vector which results in a bool2 result each. - cellPos < thinLineWidth, - (cellSize - cellPos) <= thinLineWidth, - // These 4 are 4 regular bools. - (cellPos.y - underlinePos) < underlineWidth, - (cellPos.y - underlinePos) < underlineWidth && (viewportPos.x / underlineWidth & 3) == 0, - any((cellPos.y - doubleUnderlinePos) < thinLineWidth), - (cellPos.y - strikethroughPos) < strikethroughWidth, - }; - [flatten] if (any(mask && checks)) - { - color = alphaBlendPremultiplied(color, fg); - } + color = glyphAtlas[data.texcoord]; + weights = color.aaaa; + break; } - - // Layer 4: - // The current selection is drawn semi-transparent on top. - [branch] if (cell.flags & CellFlags_Selected) + case SHADING_TYPE_DASHED_LINE: + { + const bool on = frac(data.position.x / dashedLineLength) < 0.333333333f; + color = on * premultiplyColor(data.color); + weights = color.aaaa; + break; + } + case SHADING_TYPE_SOLID_FILL: + default: { - color = alphaBlendPremultiplied(color, decodeRGBA(selectionColor)); + color = premultiplyColor(data.color); + weights = color.aaaa; + break; + } } - return color; + Output output; + output.color = color; + output.weights = weights; + return output; } diff --git a/src/renderer/atlas/shader_vs.hlsl b/src/renderer/atlas/shader_vs.hlsl index e4c75b3e984..fea289a0978 100644 --- a/src/renderer/atlas/shader_vs.hlsl +++ b/src/renderer/atlas/shader_vs.hlsl @@ -1,17 +1,31 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. +#include "shader_common.hlsl" + +cbuffer ConstBuffer : register(b0) +{ + float2 positionScale; +} + +StructuredBuffer instances : register(t0); + // clang-format off -float4 main(uint id: SV_VERTEXID): SV_POSITION +PSData main(uint id: SV_VertexID) // clang-format on { - // The algorithm below is a fast way to generate a full screen triangle, - // published by Bill Bilodeau "Vertex Shader Tricks" at GDC14. - // It covers the entire viewport and is faster for the GPU than a quad/rectangle. - return float4( - float(id / 2) * 4.0 - 1.0, - float(id % 2) * 4.0 - 1.0, - 0.0, - 1.0 - ); + VSData data = instances[id / 4]; + + PSData output; + output.shadingType = data.shadingType; + output.color = decodeRGBA(data.color); + output.position.x = (id & 1) ? data.position.z : data.position.x; + output.position.y = (id & 2) ? data.position.w : data.position.y; + // positionScale is expected to be float2(2.0f / sizeInPixel.x, -2.0f / sizeInPixel.y). Together with the + // addition below this will transform our "position" from pixel into normalized device coordinate (NDC) space. + output.position.xy = output.position.xy * positionScale + float2(-1.0f, 1.0f); + output.position.zw = float2(0, 1); + output.texcoord.x = (id & 1) ? data.texcoord.z : data.texcoord.x; + output.texcoord.y = (id & 2) ? data.texcoord.w : data.texcoord.y; + return output; } diff --git a/src/renderer/atlas/stb_rect_pack.cpp b/src/renderer/atlas/stb_rect_pack.cpp new file mode 100644 index 00000000000..7cba8a5330b --- /dev/null +++ b/src/renderer/atlas/stb_rect_pack.cpp @@ -0,0 +1,4 @@ +#include "pch.h" + +#define STB_RECT_PACK_IMPLEMENTATION +#include "stb_rect_pack.h" diff --git a/src/renderer/base/renderer.cpp b/src/renderer/base/renderer.cpp index 0739a9281bb..ad8a3b3581e 100644 --- a/src/renderer/base/renderer.cpp +++ b/src/renderer/base/renderer.cpp @@ -913,55 +913,55 @@ void Renderer::_PaintBufferOutputHelper(_In_ IRenderEngine* const pEngine, } // Method Description: -// - Generates a IRenderEngine::GridLines structure from the values in the +// - Generates a GridLines structure from the values in the // provided textAttribute // Arguments: // - textAttribute: the TextAttribute to generate GridLines from. // Return Value: // - a GridLineSet containing all the gridline info from the TextAttribute -IRenderEngine::GridLineSet Renderer::s_GetGridlines(const TextAttribute& textAttribute) noexcept +GridLineSet Renderer::s_GetGridlines(const TextAttribute& textAttribute) noexcept { // Convert console grid line representations into rendering engine enum representations. - IRenderEngine::GridLineSet lines; + GridLineSet lines; if (textAttribute.IsTopHorizontalDisplayed()) { - lines.set(IRenderEngine::GridLines::Top); + lines.set(GridLines::Top); } if (textAttribute.IsBottomHorizontalDisplayed()) { - lines.set(IRenderEngine::GridLines::Bottom); + lines.set(GridLines::Bottom); } if (textAttribute.IsLeftVerticalDisplayed()) { - lines.set(IRenderEngine::GridLines::Left); + lines.set(GridLines::Left); } if (textAttribute.IsRightVerticalDisplayed()) { - lines.set(IRenderEngine::GridLines::Right); + lines.set(GridLines::Right); } if (textAttribute.IsCrossedOut()) { - lines.set(IRenderEngine::GridLines::Strikethrough); + lines.set(GridLines::Strikethrough); } if (textAttribute.IsUnderlined()) { - lines.set(IRenderEngine::GridLines::Underline); + lines.set(GridLines::Underline); } if (textAttribute.IsDoublyUnderlined()) { - lines.set(IRenderEngine::GridLines::DoubleUnderline); + lines.set(GridLines::DoubleUnderline); } if (textAttribute.IsHyperlink()) { - lines.set(IRenderEngine::GridLines::HyperlinkUnderline); + lines.set(GridLines::HyperlinkUnderline); } return lines; } @@ -995,7 +995,7 @@ void Renderer::_PaintBufferOutputGridLineHelper(_In_ IRenderEngine* const pEngin { if (_pData->GetPatternId(coordTarget).size() > 0) { - lines.set(IRenderEngine::GridLines::Underline); + lines.set(GridLines::Underline); } } } diff --git a/src/renderer/base/renderer.hpp b/src/renderer/base/renderer.hpp index f85b279abeb..e555bfa0e8b 100644 --- a/src/renderer/base/renderer.hpp +++ b/src/renderer/base/renderer.hpp @@ -91,7 +91,7 @@ namespace Microsoft::Console::Render void UpdateLastHoveredInterval(const std::optional::interval>& newInterval); private: - static IRenderEngine::GridLineSet s_GetGridlines(const TextAttribute& textAttribute) noexcept; + static GridLineSet s_GetGridlines(const TextAttribute& textAttribute) noexcept; static bool s_IsSoftFontChar(const std::wstring_view& v, const size_t firstSoftFontChar, const size_t lastSoftFontChar); [[nodiscard]] HRESULT _PaintFrameForEngine(_In_ IRenderEngine* const pEngine) noexcept; diff --git a/src/renderer/dx/DxRenderer.cpp b/src/renderer/dx/DxRenderer.cpp index 70e6e01806e..d7774fe9a6f 100644 --- a/src/renderer/dx/DxRenderer.cpp +++ b/src/renderer/dx/DxRenderer.cpp @@ -69,15 +69,13 @@ using namespace Microsoft::Console::Types; // TODO GH 2683: The default constructor should not throw. DxEngine::DxEngine() : RenderEngineBase(), - _pool{ til::pmr::get_default_resource() }, - _invalidMap{ &_pool }, _invalidScroll{}, _allInvalid{ false }, _firstFrame{ true }, _presentParams{ 0 }, _presentReady{ false }, _presentScroll{ 0 }, - _presentDirty{ 0 }, + _presentDirty{ { 0, 0, 120, 30 } }, _presentOffset{ 0 }, _isEnabled{ false }, _isPainting{ false }, @@ -97,8 +95,8 @@ DxEngine::DxEngine() : _pixelShaderPath{}, _forceFullRepaintRendering{ false }, _softwareRendering{ false }, - _antialiasingMode{ D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE }, - _defaultBackgroundIsTransparent{ true }, + _antialiasingMode{ D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE }, + _defaultBackgroundIsTransparent{ false }, _hwndTarget{ static_cast(INVALID_HANDLE_VALUE) }, _sizeTarget{}, _dpi{ USER_DEFAULT_SCREEN_DPI }, @@ -1054,17 +1052,13 @@ try } CATCH_LOG() -void DxEngine::_InvalidateRectangle(const til::rect& rc) +void DxEngine::_InvalidateRectangle(const til::rect&) { - const auto size = _invalidMap.size(); - const auto topLeft = til::point{ 0, std::clamp(rc.top, 0, size.height) }; - const auto bottomRight = til::point{ size.width, std::clamp(rc.bottom, 0, size.height) }; - _invalidMap.set({ topLeft, bottomRight }); } bool DxEngine::_IsAllInvalid() const noexcept { - return std::abs(_invalidScroll.y) >= _invalidMap.size().height; + return true; } // Routine Description: @@ -1158,7 +1152,6 @@ try if (deltaCells != til::point{}) { // Shift the contents of the map and fill in revealed area. - _invalidMap.translate(deltaCells, true); _invalidScroll += deltaCells; _allInvalid = _IsAllInvalid(); } @@ -1177,7 +1170,6 @@ CATCH_RETURN(); [[nodiscard]] HRESULT DxEngine::InvalidateAll() noexcept try { - _invalidMap.set_all(); _allInvalid = true; // Since everything is invalidated here, mark this as a "first frame", so @@ -1267,19 +1259,6 @@ try RETURN_IF_FAILED(InvalidateAll()); } - if (TraceLoggingProviderEnabled(g_hDxRenderProvider, WINEVENT_LEVEL_VERBOSE, TIL_KEYWORD_TRACE)) - { - const auto invalidatedStr = _invalidMap.to_string(); - const auto invalidated = invalidatedStr.c_str(); - -#pragma warning(suppress : 26477 26485 26494 26482 26446 26447) // We don't control TraceLoggingWrite - TraceLoggingWrite(g_hDxRenderProvider, - "Invalid", - TraceLoggingWideString(invalidated), - TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), - TraceLoggingKeyword(TIL_KEYWORD_TRACE)); - } - if (_isEnabled) { const auto clientSize = _GetClientSize(); @@ -1315,12 +1294,6 @@ try _displaySizePixels = clientSize; } - if (const auto size = clientSize / glyphCellSize; size != _invalidMap.size()) - { - _invalidMap.resize(size); - RETURN_IF_FAILED(InvalidateAll()); - } - _d2dDeviceContext->BeginDraw(); _isPainting = true; @@ -1373,9 +1346,6 @@ try { if (_invalidScroll != til::point{ 0, 0 }) { - // Copy `til::rects` into RECT map. - _presentDirty.assign(_invalidMap.begin(), _invalidMap.end()); - // Scale all dirty rectangles into pixels std::transform(_presentDirty.begin(), _presentDirty.end(), _presentDirty.begin(), [&](const til::rect& rc) { return rc.scale_up(_fontRenderData->GlyphCell()); @@ -1385,7 +1355,7 @@ try const auto scrollPixels = (_invalidScroll * _fontRenderData->GlyphCell()); // The scroll rect is the entire field of cells, but in pixels. - til::rect scrollArea{ _invalidMap.size() * _fontRenderData->GlyphCell() }; + til::rect scrollArea{ til::size{ 120, 30 } * _fontRenderData->GlyphCell() }; // Reduce the size of the rectangle by the scroll. scrollArea.left = std::clamp(scrollArea.left + scrollPixels.x, scrollArea.left, scrollArea.right); @@ -1429,7 +1399,6 @@ try } } - _invalidMap.reset_all(); _allInvalid = false; _invalidScroll = {}; @@ -1482,7 +1451,7 @@ CATCH_RETURN() // Finally... if we're not using effects at all... let the render thread // go to sleep. It deserves it. That thread works hard. Also it sleeping // saves battery power and all sorts of related perf things. - return _terminalEffectsEnabled && !_pixelShaderPath.empty(); + return true; } // Method Description: @@ -1490,10 +1459,6 @@ CATCH_RETURN() // - See https://docs.microsoft.com/en-us/windows/uwp/gaming/reduce-latency-with-dxgi-1-3-swap-chains. void DxEngine::WaitUntilCanRender() noexcept { - // Throttle the DxEngine a bit down to ~60 FPS. - // This improves throughput for rendering complex or colored text. - Sleep(8); - if (_swapChainFrameLatencyWaitableObject) { WaitForSingleObjectEx(_swapChainFrameLatencyWaitableObject.get(), 100, true); @@ -1589,6 +1554,8 @@ void DxEngine::WaitUntilCanRender() noexcept _presentReady = false; _presentDirty.clear(); + _presentDirty.emplace_back(0, 0, 120, 30); + _presentOffset = { 0 }; _presentScroll = { 0 }; _presentParams = { 0 }; @@ -1627,29 +1594,7 @@ try } // If the entire thing is invalid, just use one big clear operation. - if (_invalidMap.all()) - { - _d2dDeviceContext->Clear(nothing); - } - else - { - // Runs are counts of cells. - // Use a transform by the size of one cell to convert cells-to-pixels - // as we clear. - _d2dDeviceContext->SetTransform(D2D1::Matrix3x2F::Scale(_fontRenderData->GlyphCell().to_d2d_size())); - for (const auto& rect : _invalidMap.runs()) - { - // Use aliased. - // For graphics reasons, it'll look better because it will ensure that - // the edges are cut nice and sharp (not blended by anti-aliasing). - // For performance reasons, it takes a lot less work to not - // do anti-alias blending. - _d2dDeviceContext->PushAxisAlignedClip(rect.to_d2d_rect(), D2D1_ANTIALIAS_MODE_ALIASED); - _d2dDeviceContext->Clear(nothing); - _d2dDeviceContext->PopAxisAlignedClip(); - } - _d2dDeviceContext->SetTransform(D2D1::Matrix3x2F::Identity()); - } + _d2dDeviceContext->Clear(nothing); return S_OK; } @@ -2115,7 +2060,7 @@ CATCH_RETURN(); [[nodiscard]] HRESULT DxEngine::GetDirtyArea(std::span& area) noexcept try { - area = _invalidMap.runs(); + area = _presentDirty; return S_OK; } CATCH_RETURN(); diff --git a/src/renderer/dx/DxRenderer.hpp b/src/renderer/dx/DxRenderer.hpp index 9dc9fdff741..fa83b04fd0e 100644 --- a/src/renderer/dx/DxRenderer.hpp +++ b/src/renderer/dx/DxRenderer.hpp @@ -180,8 +180,6 @@ namespace Microsoft::Console::Render uint16_t _hyperlinkHoveredId; bool _firstFrame; - std::pmr::unsynchronized_pool_resource _pool; - til::pmr::bitmap _invalidMap; til::point _invalidScroll; bool _allInvalid; diff --git a/src/renderer/inc/IRenderEngine.hpp b/src/renderer/inc/IRenderEngine.hpp index 05d6b33eed1..46221ae911d 100644 --- a/src/renderer/inc/IRenderEngine.hpp +++ b/src/renderer/inc/IRenderEngine.hpp @@ -32,23 +32,23 @@ namespace Microsoft::Console::Render std::optional cursorInfo; }; + enum class GridLines + { + None, + Top, + Bottom, + Left, + Right, + Underline, + DoubleUnderline, + Strikethrough, + HyperlinkUnderline + }; + using GridLineSet = til::enumset; + class __declspec(novtable) IRenderEngine { public: - enum class GridLines - { - None, - Top, - Bottom, - Left, - Right, - Underline, - DoubleUnderline, - Strikethrough, - HyperlinkUnderline - }; - using GridLineSet = til::enumset; - #pragma warning(suppress : 26432) // If you define or delete any default operation in the type '...', define or delete them all (c.21). virtual ~IRenderEngine() = default; diff --git a/src/tools/RenderingTests/RenderingTests.vcxproj b/src/tools/RenderingTests/RenderingTests.vcxproj new file mode 100644 index 00000000000..0824d3dd669 --- /dev/null +++ b/src/tools/RenderingTests/RenderingTests.vcxproj @@ -0,0 +1,26 @@ + + + + 16.0 + Win32Proj + {37c995e0-2349-4154-8e77-4a52c0c7f46d} + RenderingTests + 10.0 + + + + + + NotUsing + _CONSOLE;%(PreprocessorDefinitions) + + + Console + + + + + + + + diff --git a/src/tools/RenderingTests/RenderingTests.vcxproj.filters b/src/tools/RenderingTests/RenderingTests.vcxproj.filters new file mode 100644 index 00000000000..0f14913f3c7 --- /dev/null +++ b/src/tools/RenderingTests/RenderingTests.vcxproj.filters @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/src/tools/RenderingTests/main.cpp b/src/tools/RenderingTests/main.cpp new file mode 100644 index 00000000000..e64d4c131c0 --- /dev/null +++ b/src/tools/RenderingTests/main.cpp @@ -0,0 +1,175 @@ +#define NOMINMAX +#define WIN32_LEAN_AND_MEAN +#include +#include + +#include + +#include + +// Another variant of "defer" for C++. +namespace +{ + namespace detail + { + + template + class scope_guard + { + public: + scope_guard(F f) noexcept : + func(std::move(f)) + { + } + + ~scope_guard() + { + func(); + } + + scope_guard(const scope_guard&) = delete; + scope_guard(scope_guard&& rhs) = delete; + scope_guard& operator=(const scope_guard&) = delete; + scope_guard& operator=(scope_guard&&) = delete; + + private: + F func; + }; + + enum class scope_guard_helper + { + }; + + template + scope_guard operator+(scope_guard_helper /*unused*/, F&& fn) + { + return scope_guard(std::forward(fn)); + } + + } // namespace detail + +// The extra indirection is necessary to prevent __LINE__ to be treated literally. +#define _DEFER_CONCAT_IMPL(a, b) a##b +#define _DEFER_CONCAT(a, b) _DEFER_CONCAT_IMPL(a, b) +#define defer const auto _DEFER_CONCAT(_defer_, __LINE__) = ::detail::scope_guard_helper() + [&]() +} + +// wprintf() in the uCRT prints every single wchar_t individually and thus breaks surrogate +// pairs apart which Windows Terminal treats as invalid input and replaces it with U+FFFD. +static void printfUTF16(_In_z_ _Printf_format_string_ wchar_t const* const format, ...) +{ + wchar_t buffer[128]; + + va_list args; + va_start(args, format); + const auto length = _vsnwprintf_s(buffer, _countof(buffer), _TRUNCATE, format, args); + va_end(args); + + WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE), buffer, length, nullptr, nullptr); +} + +static void writeAsciiWithAttribute(WORD attribute, const wchar_t* text) +{ + const auto outputHandle = GetStdHandle(STD_OUTPUT_HANDLE); + const auto length = static_cast(wcslen(text)); + + CONSOLE_SCREEN_BUFFER_INFO info{}; + GetConsoleScreenBufferInfo(outputHandle, &info); + + WORD attributes[128]; + std::fill_n(&attributes[0], length, static_cast(attribute | FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED)); + + DWORD numberOfAttrsWritten; + WriteConsoleW(outputHandle, text, length, nullptr, nullptr); + WriteConsoleOutputAttribute(outputHandle, attributes, length, info.dwCursorPosition, &numberOfAttrsWritten); +} + +static void wait() +{ + printfUTF16(L"\r\nPress any key to continue..."); + _getch(); +} + +static void clear() +{ + printfUTF16( + L"\x1B[H" // move cursor to 0,0 + L"\x1B[2J" // clear screen + ); +} + +int main() +{ + const auto outputHandle = GetStdHandle(STD_OUTPUT_HANDLE); + DWORD consoleMode = ENABLE_PROCESSED_OUTPUT | ENABLE_WRAP_AT_EOL_OUTPUT; + GetConsoleMode(outputHandle, &consoleMode); + SetConsoleMode(outputHandle, consoleMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING | DISABLE_NEWLINE_AUTO_RETURN); + defer + { + SetConsoleMode(outputHandle, consoleMode); + }; + + printfUTF16( + L"\x1b[?1049h" // enable alternative screen buffer + L"\x1B[H" // move cursor to 0,0 + ); + defer + { + printfUTF16( + L"\x1b[?1049l" // disable alternative screen buffer + ); + }; + + { + struct Test + { + WORD attribute = 0; + const wchar_t* text = nullptr; + }; + static constexpr Test tests[]{ +#define MAKE_TEST_FOR_ATTRIBUTE(attr) Test{ attr, L## #attr } + MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_GRID_HORIZONTAL), + MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_GRID_LVERTICAL), + MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_GRID_RVERTICAL), + MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_REVERSE_VIDEO), + MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_UNDERSCORE), +#undef MAKE_TEST_FOR_ATTRIBUTE + }; + + for (const auto& t : tests) + { + printfUTF16(L"\r\n "); + writeAsciiWithAttribute(t.attribute, t.text); + printfUTF16(L"\r\n "); + } + } + + wait(); + clear(); + + { + struct Test + { + WORD sgr = 0; + const wchar_t* name = nullptr; + }; + static constexpr Test tests[]{ + { 3, L"italic" }, + { 4, L"underline" }, + { 7, L"reverse" }, + { 9, L"strikethrough" }, + { 21, L"double underline" }, + { 53, L"overlined" }, + }; + + for (const auto& t : tests) + { + printfUTF16(L"\r\n \x1b[%dm%s \\x1b[%dm\x1b[m\r\n ", t.sgr, t.name, t.sgr); + } + + printfUTF16(L"\r\n \x1b]8;;https://example.com\x1b\\hyperlink \\x1b]8;;https://example.com\\x1b\\\\hyperlink\\x1b]8;;\\x1b\\\\\x1b]8;;\x1b\\\r\n "); + } + + wait(); + return 0; +} From 6ba233a27fed720b6e3aae51de1391969dc72b74 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 7 Mar 2023 19:04:58 +0100 Subject: [PATCH 02/37] Fix transparency, scrolling, dirty rects --- src/renderer/atlas/AtlasEngine.cpp | 51 ++++++++++--------- src/renderer/atlas/Backend.cpp | 34 +++++++------ src/renderer/atlas/Backend.h | 6 +-- src/renderer/atlas/BackendD3D11.cpp | 2 +- src/renderer/atlas/common.h | 22 +++++++-- src/renderer/atlas/shader_ps.hlsl | 7 +-- src/renderer/dx/DxRenderer.cpp | 77 ++++++++++++++++++++++++----- src/renderer/dx/DxRenderer.hpp | 2 + 8 files changed, 138 insertions(+), 63 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 33b4878c5f0..52e223b97a5 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -100,8 +100,8 @@ try if (offset < 0) { // Scroll up (for instance when new text is being written at the end of the buffer). - const u16 endRow = _p.s->cellCount.y + offset; - _api.invalidatedRows.x = nothingInvalid ? endRow : std::min(_api.invalidatedRows.x, endRow); + const u16 begRow = _p.s->cellCount.y + offset; + _api.invalidatedRows.x = nothingInvalid ? begRow : std::min(_api.invalidatedRows.x, begRow); _api.invalidatedRows.y = _p.s->cellCount.y; // scrollOffset/offset = -1 @@ -110,15 +110,20 @@ try // | xxxxxxxxx| -> |xxxxxxx | + src | < beg - offset // |xxxxxxx | | | | v // +----------+ +----------+ v < end - const auto dest = _p.rows.begin(); - const auto last = _p.rows.end(); - const auto first = dest - offset; - const auto end = std::move(first, last, dest); - - for (auto it = dest; it != end; ++it) + const auto beg = _p.rows.begin(); + const auto end = _p.rows.end(); + auto first = beg - offset; + auto dest = beg; + + // Same as std::move, but with std::swap to preserve std::vector allocations. + // Also, it allows to include the top/bottom adjustment. + for (; first != end; ++dest, (void)++first) { - it->top += deltaPx; - it->bottom += deltaPx; + using std::swap; + auto& d = *dest; + swap(*first, d); + d.top += deltaPx; + d.bottom += deltaPx; } } else @@ -133,15 +138,20 @@ try // |xxxxxxx | -> | xxxxxxxxx| | ^ // | | |xxxxxxx | v | < end - offset // +----------+ +----------+ + dst < end - const auto first = _p.rows.begin(); - const auto dest = _p.rows.end(); - const auto last = dest - offset; - const auto beg = std::move_backward(first, last, dest); - - for (auto it = beg; it != dest; ++it) + const auto beg = _p.rows.begin(); + const auto end = _p.rows.end(); + auto last = end - offset; + auto dest = end; + + // Same as std::move_backwards, but with std::swap to preserve std::vector allocations. + // Also, it allows to include the top/bottom adjustment. + while (last != beg) { - it->top += deltaPx; - it->bottom += deltaPx; + using std::swap; + auto& d = *--dest; + swap(*--last, d); + d.top += deltaPx; + d.bottom += deltaPx; } } @@ -151,8 +161,8 @@ try const auto width = _p.s->cellCount.x; const auto beg = _p.backgroundBitmap.begin(); const auto end = _p.backgroundBitmap.end(); - const auto dst = beg + std::max(0, offset) * width; const auto src = beg - std::min(0, offset) * width; + const auto dst = beg + std::max(0, offset) * width; const auto count = end - std::max(src, dst); assert(dst >= beg && dst + count <= end); assert(src >= beg && src + count <= end); @@ -165,9 +175,6 @@ try _p.rows[y].clear(y, _p.s->font->cellSize.y); } - assert(_p.s->cellCount.x * _p.s->font->cellSize.x <= _p.s->targetSize.x); - assert(_p.s->cellCount.y * _p.s->font->cellSize.y <= _p.s->targetSize.y); - _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _p.s->cellCount.x, _api.invalidatedRows.y }; _p.dirtyRect = _api.dirtyRect; _p.cursorRect = {}; diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index 5118ce7f454..393ae22e10f 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -1,6 +1,8 @@ #include "pch.h" #include "Backend.h" +#include + TIL_FAST_MATH_BEGIN // Disable a bunch of warnings which get in the way of writing performant code. @@ -32,19 +34,23 @@ void SwapChainManager::Present(const RenderingPayload& p) dirtyRectInPx.bottom *= p.s->font->cellSize.y; // This block will enlarge the dirtyRectInPx to handle glyphs that overlap their rows. - // TODO: This only works because we redraw the entire back buffer every frame. const auto actualDirtyTop = gsl::at(p.rows, p.dirtyRect.top).top; const auto actualDirtyBottom = gsl::at(p.rows, gsl::narrow_cast(p.dirtyRect.bottom) - 1).bottom; // Since rows might be taller than their cells, they might have drawn outside of the viewport. - // This use of clamp() below avoids us from writing out of bounds coordinates into dirtyRectInPx. - dirtyRectInPx.top = clamp(actualDirtyTop, 0, dirtyRectInPx.top); - dirtyRectInPx.bottom = clamp(actualDirtyBottom, dirtyRectInPx.bottom, static_cast(_targetSize.y)); - - // TODO - //if (p.dirtyRect.right == fullRect.right) - //{ - // dirtyRectInPx.right = _targetSize.x; - //} + // FYI using std::clamp() here would be dangerous. If std::clamp() is given a "min" that is greater + // than "max" it'll return min, but our calculation of .bottom wants to do the exact opposite. + dirtyRectInPx.top = std::max(std::min(dirtyRectInPx.top, actualDirtyTop), 0); + dirtyRectInPx.bottom = std::min(std::max(dirtyRectInPx.bottom, actualDirtyBottom), static_cast(_targetSize.y)); + // The swap chain might have a different size than the TextBuffer (due to the renderer running asynchronously) and so + // we have to ensure to clamp the bottom/right coordinates into _targetSize. The above already did so for bottom. + dirtyRectInPx.right = std::min(dirtyRectInPx.right, static_cast(_targetSize.x)); + + // If a row of text has been changed, it's width will equal the full rect. In that case we should + // also redraw the margin on the right, as overlapping glyphs might have previously drawn into it. + if (p.dirtyRect.left == fullRect.left && p.dirtyRect.right == fullRect.right) + { + dirtyRectInPx.right = _targetSize.x; + } RECT scrollRect{}; POINT scrollOffset{}; @@ -56,7 +62,7 @@ void SwapChainManager::Present(const RenderingPayload& p) if (p.scrollOffset) { const auto offsetInPx = p.scrollOffset * p.s->font->cellSize.y; - const auto width = p.s->cellCount.x * p.s->font->cellSize.x; + const auto width = p.s->targetSize.x; const auto height = p.s->cellCount.y * p.s->font->cellSize.y; const auto top = std::max(0, offsetInPx); const auto bottom = height + std::min(0, offsetInPx); @@ -68,11 +74,7 @@ void SwapChainManager::Present(const RenderingPayload& p) params.pScrollOffset = &scrollOffset; } - if (const auto hr = _swapChain->Present1(1, 0, ¶ms); FAILED(hr)) - { - __debugbreak(); - THROW_HR(hr); - } + THROW_IF_FAILED(_swapChain->Present1(1, 0, ¶ms)); } else { diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 823c9a2b8e4..a4da5a982d7 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -24,7 +24,7 @@ namespace Microsoft::Console::Render::Atlas else if (_targetSize != p.s->targetSize) { prepareResize(); - THROW_IF_FAILED(_swapChain->ResizeBuffers(0, _targetSize.x, _targetSize.y, DXGI_FORMAT_UNKNOWN, flags)); + THROW_IF_FAILED(_swapChain->ResizeBuffers(0, p.s->targetSize.x, p.s->targetSize.y, DXGI_FORMAT_UNKNOWN, flags)); _targetSize = p.s->targetSize; } @@ -74,9 +74,9 @@ namespace Microsoft::Console::Render::Atlas // std::clamp(T, T, T, Predicate) with std::less{} as the argument, // which introduces branching. While not perfect, this is still better than std::clamp. template - static constexpr T clamp(T val, T min, T max) + constexpr T clamp(T val, T min, T max) { - return std::max(min, std::min(max, val)); + return val < min ? min : (max < val ? max : val); } f32r GetGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); diff --git a/src/renderer/atlas/BackendD3D11.cpp b/src/renderer/atlas/BackendD3D11.cpp index b06969dea5e..fed7151ecba 100644 --- a/src/renderer/atlas/BackendD3D11.cpp +++ b/src/renderer/atlas/BackendD3D11.cpp @@ -591,7 +591,7 @@ void BackendD3D11::_recreateConstBuffer(const RenderingPayload& p) } { PSConstBuffer data; - data.backgroundColor = colorFromU32Premultiply(p.s->misc->backgroundColor); + data.backgroundColor = colorFromU32(p.s->misc->backgroundColor); data.cellCount = { static_cast(p.s->cellCount.x), static_cast(p.s->cellCount.y) }; data.cellSize = { static_cast(p.s->font->cellSize.x), static_cast(p.s->font->cellSize.y) }; DWrite_GetGammaRatios(_gamma, data.gammaRatios); diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 940ce2e394a..41170b412e3 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -285,8 +285,8 @@ namespace Microsoft::Console::Render::Atlas std::wstring fontName; std::vector fontFeatures; std::vector fontAxisValues; - float baselineInDIP = 0.0f; - float fontSizeInDIP = 0.0f; + f32 baselineInDIP = 0.0f; + f32 fontSizeInDIP = 0.0f; f32 advanceScale = 0; u16x2 cellSize; u16 fontWeight = 0; @@ -341,9 +341,7 @@ namespace Microsoft::Console::Render::Atlas struct FontDependents { - //wil::com_ptr textFormats[2][2]; Buffer textFormatAxes[2][2]; - //wil::com_ptr typography; f32 dipPerPixel = 1.0f; // caches USER_DEFAULT_SCREEN_DPI / dpi f32 pixelPerDIP = 1.0f; // caches dpi / USER_DEFAULT_SCREEN_DPI f32x2 cellSizeDIP; // caches cellSize in DIP @@ -386,11 +384,25 @@ namespace Microsoft::Console::Render::Atlas bottom = top + cellHeight; } + friend void swap(ShapedRow& lhs, ShapedRow& rhs) noexcept + { + std::swap(lhs.mappings, rhs.mappings); + std::swap(lhs.glyphIndices, rhs.glyphIndices); + std::swap(lhs.glyphAdvances, rhs.glyphAdvances); + std::swap(lhs.glyphOffsets, rhs.glyphOffsets); + std::swap(lhs.colors, rhs.colors); + std::swap(lhs.gridLineRanges, rhs.gridLineRanges); + std::swap(lhs.selectionFrom, rhs.selectionFrom); + std::swap(lhs.selectionTo, rhs.selectionTo); + std::swap(lhs.top, rhs.top); + std::swap(lhs.bottom, rhs.bottom); + } + std::vector mappings; std::vector glyphIndices; std::vector glyphAdvances; // same size as glyphIndices std::vector glyphOffsets; // same size as glyphIndices - std::vector colors; + std::vector colors; // same size as glyphIndices std::vector gridLineRanges; u16 selectionFrom = 0; u16 selectionTo = 0; diff --git a/src/renderer/atlas/shader_ps.hlsl b/src/renderer/atlas/shader_ps.hlsl index 06a7ff0c0bc..26f16d834d6 100644 --- a/src/renderer/atlas/shader_ps.hlsl +++ b/src/renderer/atlas/shader_ps.hlsl @@ -35,12 +35,9 @@ Output main(PSData data) : SV_Target { case SHADING_TYPE_TEXT_BACKGROUND: float2 pos = data.texcoord / cellSize; - color = background[pos]; + color = all(pos < cellCount) ? background[pos] : backgroundColor; + color.rgb *= color.a; weights = float4(1, 1, 1, 1); - if (any(pos >= cellCount)) - { - color = backgroundColor; - } break; case SHADING_TYPE_TEXT_GRAYSCALE: { diff --git a/src/renderer/dx/DxRenderer.cpp b/src/renderer/dx/DxRenderer.cpp index d7774fe9a6f..70e6e01806e 100644 --- a/src/renderer/dx/DxRenderer.cpp +++ b/src/renderer/dx/DxRenderer.cpp @@ -69,13 +69,15 @@ using namespace Microsoft::Console::Types; // TODO GH 2683: The default constructor should not throw. DxEngine::DxEngine() : RenderEngineBase(), + _pool{ til::pmr::get_default_resource() }, + _invalidMap{ &_pool }, _invalidScroll{}, _allInvalid{ false }, _firstFrame{ true }, _presentParams{ 0 }, _presentReady{ false }, _presentScroll{ 0 }, - _presentDirty{ { 0, 0, 120, 30 } }, + _presentDirty{ 0 }, _presentOffset{ 0 }, _isEnabled{ false }, _isPainting{ false }, @@ -95,8 +97,8 @@ DxEngine::DxEngine() : _pixelShaderPath{}, _forceFullRepaintRendering{ false }, _softwareRendering{ false }, - _antialiasingMode{ D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE }, - _defaultBackgroundIsTransparent{ false }, + _antialiasingMode{ D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE }, + _defaultBackgroundIsTransparent{ true }, _hwndTarget{ static_cast(INVALID_HANDLE_VALUE) }, _sizeTarget{}, _dpi{ USER_DEFAULT_SCREEN_DPI }, @@ -1052,13 +1054,17 @@ try } CATCH_LOG() -void DxEngine::_InvalidateRectangle(const til::rect&) +void DxEngine::_InvalidateRectangle(const til::rect& rc) { + const auto size = _invalidMap.size(); + const auto topLeft = til::point{ 0, std::clamp(rc.top, 0, size.height) }; + const auto bottomRight = til::point{ size.width, std::clamp(rc.bottom, 0, size.height) }; + _invalidMap.set({ topLeft, bottomRight }); } bool DxEngine::_IsAllInvalid() const noexcept { - return true; + return std::abs(_invalidScroll.y) >= _invalidMap.size().height; } // Routine Description: @@ -1152,6 +1158,7 @@ try if (deltaCells != til::point{}) { // Shift the contents of the map and fill in revealed area. + _invalidMap.translate(deltaCells, true); _invalidScroll += deltaCells; _allInvalid = _IsAllInvalid(); } @@ -1170,6 +1177,7 @@ CATCH_RETURN(); [[nodiscard]] HRESULT DxEngine::InvalidateAll() noexcept try { + _invalidMap.set_all(); _allInvalid = true; // Since everything is invalidated here, mark this as a "first frame", so @@ -1259,6 +1267,19 @@ try RETURN_IF_FAILED(InvalidateAll()); } + if (TraceLoggingProviderEnabled(g_hDxRenderProvider, WINEVENT_LEVEL_VERBOSE, TIL_KEYWORD_TRACE)) + { + const auto invalidatedStr = _invalidMap.to_string(); + const auto invalidated = invalidatedStr.c_str(); + +#pragma warning(suppress : 26477 26485 26494 26482 26446 26447) // We don't control TraceLoggingWrite + TraceLoggingWrite(g_hDxRenderProvider, + "Invalid", + TraceLoggingWideString(invalidated), + TraceLoggingLevel(WINEVENT_LEVEL_VERBOSE), + TraceLoggingKeyword(TIL_KEYWORD_TRACE)); + } + if (_isEnabled) { const auto clientSize = _GetClientSize(); @@ -1294,6 +1315,12 @@ try _displaySizePixels = clientSize; } + if (const auto size = clientSize / glyphCellSize; size != _invalidMap.size()) + { + _invalidMap.resize(size); + RETURN_IF_FAILED(InvalidateAll()); + } + _d2dDeviceContext->BeginDraw(); _isPainting = true; @@ -1346,6 +1373,9 @@ try { if (_invalidScroll != til::point{ 0, 0 }) { + // Copy `til::rects` into RECT map. + _presentDirty.assign(_invalidMap.begin(), _invalidMap.end()); + // Scale all dirty rectangles into pixels std::transform(_presentDirty.begin(), _presentDirty.end(), _presentDirty.begin(), [&](const til::rect& rc) { return rc.scale_up(_fontRenderData->GlyphCell()); @@ -1355,7 +1385,7 @@ try const auto scrollPixels = (_invalidScroll * _fontRenderData->GlyphCell()); // The scroll rect is the entire field of cells, but in pixels. - til::rect scrollArea{ til::size{ 120, 30 } * _fontRenderData->GlyphCell() }; + til::rect scrollArea{ _invalidMap.size() * _fontRenderData->GlyphCell() }; // Reduce the size of the rectangle by the scroll. scrollArea.left = std::clamp(scrollArea.left + scrollPixels.x, scrollArea.left, scrollArea.right); @@ -1399,6 +1429,7 @@ try } } + _invalidMap.reset_all(); _allInvalid = false; _invalidScroll = {}; @@ -1451,7 +1482,7 @@ CATCH_RETURN() // Finally... if we're not using effects at all... let the render thread // go to sleep. It deserves it. That thread works hard. Also it sleeping // saves battery power and all sorts of related perf things. - return true; + return _terminalEffectsEnabled && !_pixelShaderPath.empty(); } // Method Description: @@ -1459,6 +1490,10 @@ CATCH_RETURN() // - See https://docs.microsoft.com/en-us/windows/uwp/gaming/reduce-latency-with-dxgi-1-3-swap-chains. void DxEngine::WaitUntilCanRender() noexcept { + // Throttle the DxEngine a bit down to ~60 FPS. + // This improves throughput for rendering complex or colored text. + Sleep(8); + if (_swapChainFrameLatencyWaitableObject) { WaitForSingleObjectEx(_swapChainFrameLatencyWaitableObject.get(), 100, true); @@ -1554,8 +1589,6 @@ void DxEngine::WaitUntilCanRender() noexcept _presentReady = false; _presentDirty.clear(); - _presentDirty.emplace_back(0, 0, 120, 30); - _presentOffset = { 0 }; _presentScroll = { 0 }; _presentParams = { 0 }; @@ -1594,7 +1627,29 @@ try } // If the entire thing is invalid, just use one big clear operation. - _d2dDeviceContext->Clear(nothing); + if (_invalidMap.all()) + { + _d2dDeviceContext->Clear(nothing); + } + else + { + // Runs are counts of cells. + // Use a transform by the size of one cell to convert cells-to-pixels + // as we clear. + _d2dDeviceContext->SetTransform(D2D1::Matrix3x2F::Scale(_fontRenderData->GlyphCell().to_d2d_size())); + for (const auto& rect : _invalidMap.runs()) + { + // Use aliased. + // For graphics reasons, it'll look better because it will ensure that + // the edges are cut nice and sharp (not blended by anti-aliasing). + // For performance reasons, it takes a lot less work to not + // do anti-alias blending. + _d2dDeviceContext->PushAxisAlignedClip(rect.to_d2d_rect(), D2D1_ANTIALIAS_MODE_ALIASED); + _d2dDeviceContext->Clear(nothing); + _d2dDeviceContext->PopAxisAlignedClip(); + } + _d2dDeviceContext->SetTransform(D2D1::Matrix3x2F::Identity()); + } return S_OK; } @@ -2060,7 +2115,7 @@ CATCH_RETURN(); [[nodiscard]] HRESULT DxEngine::GetDirtyArea(std::span& area) noexcept try { - area = _presentDirty; + area = _invalidMap.runs(); return S_OK; } CATCH_RETURN(); diff --git a/src/renderer/dx/DxRenderer.hpp b/src/renderer/dx/DxRenderer.hpp index fa83b04fd0e..9dc9fdff741 100644 --- a/src/renderer/dx/DxRenderer.hpp +++ b/src/renderer/dx/DxRenderer.hpp @@ -180,6 +180,8 @@ namespace Microsoft::Console::Render uint16_t _hyperlinkHoveredId; bool _firstFrame; + std::pmr::unsynchronized_pool_resource _pool; + til::pmr::bitmap _invalidMap; til::point _invalidScroll; bool _allInvalid; From 7ddddfe1611707518d960942325b28a8109ae693 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 8 Mar 2023 20:25:36 +0100 Subject: [PATCH 03/37] Improve performance, Fix OOM when drawing whitespace --- src/renderer/atlas/AtlasEngine.cpp | 4 +- src/renderer/atlas/AtlasEngine.r.cpp | 11 +- src/renderer/atlas/BackendD3D11.cpp | 208 +++++++++++++++----------- src/renderer/atlas/BackendD3D11.h | 17 +-- src/renderer/atlas/shader_common.hlsl | 21 +-- src/renderer/atlas/shader_ps.hlsl | 3 +- src/renderer/atlas/shader_vs.hlsl | 15 +- 7 files changed, 154 insertions(+), 125 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 52e223b97a5..692bb49cf29 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -27,10 +27,10 @@ using namespace Microsoft::Console::Render::Atlas; AtlasEngine::AtlasEngine() { #ifdef NDEBUG - THROW_IF_FAILED(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(_p.d2dFactory), nullptr, _p.d2dFactory.put_void())); + THROW_IF_FAILED(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(_p.d2dFactory), nullptr, reinterpret_cast(_p.d2dFactory.addressof()))); #else static constexpr D2D1_FACTORY_OPTIONS options{ .debugLevel = D2D1_DEBUG_LEVEL_INFORMATION }; - THROW_IF_FAILED(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(_p.d2dFactory), &options, _p.d2dFactory.put_void())); + THROW_IF_FAILED(D2D1CreateFactory(D2D1_FACTORY_TYPE_SINGLE_THREADED, __uuidof(_p.d2dFactory), &options, reinterpret_cast(_p.d2dFactory.addressof()))); #endif THROW_IF_FAILED(DWriteCreateFactory(DWRITE_FACTORY_TYPE_SHARED, __uuidof(_p.dwriteFactory), reinterpret_cast<::IUnknown**>(_p.dwriteFactory.addressof()))); diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 78137ae0b58..65da7c37e94 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -39,6 +39,7 @@ try if (_p.dxgiFactory && !_p.dxgiFactory->IsCurrent()) { + _p.dxgiFactory.reset(); _b.reset(); } @@ -52,18 +53,21 @@ try } catch (const wil::ResultException& exception) { - if (_p.warningCallback) + const auto hr = exception.GetErrorCode(); + const auto isExpected = hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET || hr == D2DERR_RECREATE_TARGET; + + if (!isExpected && _p.warningCallback) { try { - _p.warningCallback(exception.GetErrorCode()); + _p.warningCallback(hr); } CATCH_LOG() } _p.dxgiFactory.reset(); _b.reset(); - return E_PENDING; // Indicate a retry to the renderer + return isExpected ? E_PENDING : hr; } CATCH_RETURN() @@ -116,6 +120,7 @@ void AtlasEngine::_recreateBackend() static constexpr UINT flags = 0; #endif + // IID_PPV_ARGS doesn't work here for some reason. THROW_IF_FAILED(CreateDXGIFactory2(flags, __uuidof(_p.dxgiFactory), _p.dxgiFactory.put_void())); auto d2dMode = debugForceD2DMode; diff --git a/src/renderer/atlas/BackendD3D11.cpp b/src/renderer/atlas/BackendD3D11.cpp index fed7151ecba..63fd8a60f91 100644 --- a/src/renderer/atlas/BackendD3D11.cpp +++ b/src/renderer/atlas/BackendD3D11.cpp @@ -127,6 +127,52 @@ BackendD3D11::BackendD3D11(wil::com_ptr device, wil::com_ptrCreateVertexShader(&shader_vs[0], sizeof(shader_vs), nullptr, _vertexShader.addressof())); THROW_IF_FAILED(_device->CreatePixelShader(&shader_ps[0], sizeof(shader_ps), nullptr, _pixelShader.addressof())); + { + static constexpr D3D11_INPUT_ELEMENT_DESC layout[]{ + { "SV_Position", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0 }, + { "position", 0, DXGI_FORMAT_R16G16_SINT, 1, offsetof(QuadInstance, position), D3D11_INPUT_PER_INSTANCE_DATA, 1 }, + { "size", 0, DXGI_FORMAT_R16G16_SINT, 1, offsetof(QuadInstance, size), D3D11_INPUT_PER_INSTANCE_DATA, 1 }, + { "texcoord", 0, DXGI_FORMAT_R16G16_SINT, 1, offsetof(QuadInstance, texcoord), D3D11_INPUT_PER_INSTANCE_DATA, 1 }, + { "shadingType", 0, DXGI_FORMAT_R32_UINT, 1, offsetof(QuadInstance, shadingType), D3D11_INPUT_PER_INSTANCE_DATA, 1 }, + { "color", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 1, offsetof(QuadInstance, color), D3D11_INPUT_PER_INSTANCE_DATA, 1 }, + }; + THROW_IF_FAILED(_device->CreateInputLayout(&layout[0], gsl::narrow_cast(std::size(layout)), &shader_vs[0], sizeof(shader_vs), _inputLayout.addressof())); + } + + { + static constexpr f32x2 vertices[]{ + { 0, 0 }, + { 1, 0 }, + { 1, 1 }, + { 0, 1 }, + }; + static constexpr D3D11_SUBRESOURCE_DATA initialData{ &vertices[0] }; + + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(vertices); + desc.Usage = D3D11_USAGE_IMMUTABLE; + desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; + THROW_IF_FAILED(_device->CreateBuffer(&desc, &initialData, _vertexBuffer.addressof())); + } + + { + static constexpr u16 indices[]{ + 0, // { 0, 0 } + 1, // { 1, 0 } + 2, // { 1, 1 } + 2, // { 1, 1 } + 3, // { 0, 1 } + 0, // { 0, 0 } + }; + static constexpr D3D11_SUBRESOURCE_DATA initialData{ &indices[0] }; + + D3D11_BUFFER_DESC desc{}; + desc.ByteWidth = sizeof(indices); + desc.Usage = D3D11_USAGE_IMMUTABLE; + desc.BindFlags = D3D11_BIND_INDEX_BUFFER; + THROW_IF_FAILED(_device->CreateBuffer(&desc, &initialData, _indexBuffer.addressof())); + } + { static constexpr D3D11_BUFFER_DESC desc{ .ByteWidth = sizeof(VSConstBuffer), @@ -592,8 +638,8 @@ void BackendD3D11::_recreateConstBuffer(const RenderingPayload& p) { PSConstBuffer data; data.backgroundColor = colorFromU32(p.s->misc->backgroundColor); - data.cellCount = { static_cast(p.s->cellCount.x), static_cast(p.s->cellCount.y) }; data.cellSize = { static_cast(p.s->font->cellSize.x), static_cast(p.s->font->cellSize.y) }; + data.cellCount = { static_cast(p.s->cellCount.x), static_cast(p.s->cellCount.y) }; DWrite_GetGammaRatios(_gamma, data.gammaRatios); data.enhancedContrast = p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? _cleartypeEnhancedContrast : _grayscaleEnhancedContrast; data.dashedLineLength = p.s->font->underlineWidth * 3.0f; @@ -604,13 +650,17 @@ void BackendD3D11::_recreateConstBuffer(const RenderingPayload& p) void BackendD3D11::_setupDeviceContextState(const RenderingPayload& p) { // IA: Input Assembler + ID3D11Buffer* vertexBuffers[]{ _vertexBuffer.get(), _instanceBuffer.get() }; + static constexpr UINT strides[]{ sizeof(f32x2), sizeof(QuadInstance) }; + static constexpr UINT offsets[]{ 0, 0 }; + _deviceContext->IASetIndexBuffer(_indexBuffer.get(), DXGI_FORMAT_R16_UINT, 0); + _deviceContext->IASetInputLayout(_inputLayout.get()); _deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); - _deviceContext->IASetIndexBuffer(_indexBuffer.get(), _indicesFormat, 0); + _deviceContext->IASetVertexBuffers(0, 2, &vertexBuffers[0], &strides[0], &offsets[0]); // VS: Vertex Shader _deviceContext->VSSetShader(_vertexShader.get(), nullptr, 0); _deviceContext->VSSetConstantBuffers(0, 1, _vsConstantBuffer.addressof()); - _deviceContext->VSSetShaderResources(0, 1, _instanceBufferView.addressof()); // RS: Rasterizer Stage D3D11_VIEWPORT viewport{}; @@ -619,7 +669,7 @@ void BackendD3D11::_setupDeviceContextState(const RenderingPayload& p) _deviceContext->RSSetViewports(1, &viewport); // PS: Pixel Shader - ID3D11ShaderResourceView* const resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; + ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; _deviceContext->PSSetShader(_pixelShader.get(), nullptr, 0); _deviceContext->PSSetConstantBuffers(0, 1, _psConstantBuffer.addressof()); _deviceContext->PSSetShaderResources(0, 2, &resources[0]); @@ -707,7 +757,7 @@ void BackendD3D11::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _brush.put())); } - ID3D11ShaderResourceView* const resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; + ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; _deviceContext->PSSetShaderResources(0, 2, &resources[0]); } @@ -737,7 +787,13 @@ void BackendD3D11::_appendQuad(i32r position, i32r texcoord, u32 color, ShadingT _bumpInstancesSize(); } - _instances[_instancesSize++] = QuadInstance{ position, texcoord, color, static_cast(shadingType) }; + static constexpr auto pack = [](i32 x, i32 y) { + return i16x2{ gsl::narrow_cast(x), gsl::narrow_cast(y) }; + }; + const i16x2 position2 = pack(position.left, position.top); + const i16x2 size2 = pack(position.right - position.left, position.bottom - position.top); + const i16x2 texcoord2 = pack(texcoord.left, texcoord.top); + _instances[_instancesSize++] = QuadInstance{ position2, size2, texcoord2, static_cast(shadingType), color }; } void BackendD3D11::_bumpInstancesSize() @@ -764,59 +820,25 @@ void BackendD3D11::_flushQuads(const RenderingPayload& p) _deviceContext->Unmap(_instanceBuffer.get(), 0); } - { - D3D11_MAPPED_SUBRESOURCE mapped{}; - THROW_IF_FAILED(_deviceContext->Map(_indexBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - - if (_indicesFormat == DXGI_FORMAT_R16_UINT) - { - auto data = static_cast(mapped.pData); - const u16 vertices = gsl::narrow_cast(4 * _instancesSize); - for (u16 off = 0; off < vertices; off += 4) - { - *data++ = off + 0; - *data++ = off + 1; - *data++ = off + 2; - *data++ = off + 3; - *data++ = off + 2; - *data++ = off + 1; - } - } - else - { - assert(_indicesFormat == DXGI_FORMAT_R32_UINT); - auto data = static_cast(mapped.pData); - const u32 vertices = gsl::narrow_cast(4 * _instancesSize); - for (u32 off = 0; off < vertices; off += 4) - { - *data++ = off + 0; - *data++ = off + 1; - *data++ = off + 2; - *data++ = off + 3; - *data++ = off + 2; - *data++ = off + 1; - } - } - - _deviceContext->Unmap(_indexBuffer.get(), 0); - } - - // I found 4 approaches to drawing lots of quads quickly. + // I found 4 approaches to drawing lots of quads quickly. There are probably even more. // They can often be found in discussions about "particle" or "point sprite" rendering in game development. // * Compute Shader: My understanding is that at the time of writing games are moving over to bucketing // particles into "tiles" on the screen and drawing them with a compute shader. While this improves // performance, it doesn't mix well with our goal of allowing arbitrary overlaps between glyphs. // Additionally none of the next 3 approaches use any significant amount of GPU time in the first place. - // * Geometry Shader: Geometry shaders can generate vertices on the fly, which would neatly replace - // our need for an index buffer. The reason this wasn't chosen is the same as for the next point. - // * DrawInstanced: On my own hardware (Nvidia RTX 4090) this seems to perform ~50% better than the final point, - // but with no significant difference in power draw. However the popular "Vertex Shader Tricks" talk from - // Bill Bilodeau at GDC 2014 suggests that this at least doesn't apply to 2014ish hardware, which supposedly - // performs poorly with very small, instanced meshes. Furthermore, public feedback suggests that we still - // have a lot of users with older hardware, so I've chosen the following approach, suggested in the talk. - // * DrawIndexed: This works about the same as DrawInstanced, but instead of using D3D11_INPUT_PER_INSTANCE_DATA, - // it uses a SRV (shader resource view) for instance data and maps each SV_VertexID to a SRV slot. - _deviceContext->DrawIndexed(gsl::narrow_cast(6 * _instancesSize), 0, 0); + // * Geometry Shader: Geometry shaders can generate vertices on the fly, which would neatly replace our need + // for an index buffer. However, many sources claim they're significantly slower than the following approaches. + // * DrawIndexed & DrawInstanced: Again, many sources claim that GPU instancing (Draw(Indexed)Instanced) performs + // poorly for small meshes, and instead indexed vertices with a SRV (shader resource view) should be used. + // The popular "Vertex Shader Tricks" talk from Bill Bilodeau at GDC 2014 suggests this approach, explains + // how it works (you divide the `SV_VertexID` by 4 and index into the SRV that contains the per-instance data; + // it's basically manual instancing inside the vertex shader) and shows how it outperforms regular instancing. + // However on my own limited test hardware (built around ~2020), I found that for at least our use case, + // GPU instancing matches the performance of using a custom buffer. In fact on my Nvidia GPU in particular, + // instancing with ~10k instances appears to be about 50% faster and so DrawInstanced was chosen. + // Instead I found that packing instance data as tightly as possible made the biggest performance difference, + // and packing 16 bit integers with ID3D11InputLayout is quite a bit more convenient too. + _deviceContext->DrawIndexedInstanced(6, gsl::narrow_cast(_instancesSize), 0, 0, 0); _instancesSize = 0; } @@ -831,41 +853,26 @@ void BackendD3D11::_recreateInstanceBuffers(const RenderingPayload& p) // std::bit_ceil will result in a nice exponential growth curve. I don't know exactly how structured buffers are treated // by various drivers, but I'm assuming that they prefer buffer sizes that are close to power-of-2 sizes as well. const auto newInstancesSize = std::bit_ceil(minSize * sizeof(QuadInstance)) / sizeof(QuadInstance); - const auto newIndicesSize = newInstancesSize * 6; - const auto vertices = newInstancesSize * 4; - const auto indicesFormat = vertices <= R16max ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; - const auto indexSize = vertices <= R16max ? sizeof(u16) : sizeof(u32); - _indexBuffer.reset(); _instanceBuffer.reset(); - _instanceBufferView.reset(); - - { - D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = gsl::narrow(newIndicesSize * indexSize); - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_INDEX_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - THROW_IF_FAILED(_device->CreateBuffer(&desc, nullptr, _indexBuffer.addressof())); - } { D3D11_BUFFER_DESC desc{}; desc.ByteWidth = gsl::narrow(newInstancesSize * sizeof(QuadInstance)); desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; + desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED; desc.StructureByteStride = sizeof(QuadInstance); THROW_IF_FAILED(_device->CreateBuffer(&desc, nullptr, _instanceBuffer.addressof())); - THROW_IF_FAILED(_device->CreateShaderResourceView(_instanceBuffer.get(), nullptr, _instanceBufferView.addressof())); } - _deviceContext->IASetIndexBuffer(_indexBuffer.get(), indicesFormat, 0); - _deviceContext->VSSetShaderResources(0, 1, _instanceBufferView.addressof()); + // IA: Input Assembler + ID3D11Buffer* vertexBuffers[]{ _vertexBuffer.get(), _instanceBuffer.get() }; + static constexpr UINT strides[]{ sizeof(f32x2), sizeof(QuadInstance) }; + static constexpr UINT offsets[]{ 0, 0 }; + _deviceContext->IASetVertexBuffers(0, 2, &vertexBuffers[0], &strides[0], &offsets[0]); _instanceBufferSize = newInstancesSize; - _indicesFormat = indicesFormat; } void BackendD3D11::_drawBackground(const RenderingPayload& p) @@ -881,6 +888,11 @@ void BackendD3D11::_drawBackground(const RenderingPayload& p) } _deviceContext->Unmap(_backgroundBitmap.get(), 0); } + // In testing I found that on my AMD GPU separating the background pass out from the rest + // improves performance by ~20%, if a fullscreen triangle is used for it. However, I felt + // like the added code didn't justify the improvement (6.4% -> 5.2% GPU load at 60 FPS), + // given that AGS_PRIMITIVE_TOPOLOGY_SCREENRECTLIST and AGS_PRIMITIVE_TOPOLOGY_QUADLIST exist + // and would serve us much better. Finally, Chromium is still ~2.5x faster than us. { const i32r rect{ 0, 0, p.s->targetSize.x, p.s->targetSize.y }; _appendQuad(rect, rect, 0, ShadingType::Background); @@ -955,7 +967,7 @@ bool BackendD3D11::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, auto box = GetGlyphRunBlackBox(glyphRun, 0, 0); if (box.left >= box.right || box.top >= box.bottom) { - entry = {}; + entry.shadingType = 0; return true; } @@ -1205,7 +1217,7 @@ void BackendD3D11::_drawSelection(const RenderingPayload& p) // The way this is implemented isn't very smart, but we also don't have very many rows to iterate through. if (row.selectionFrom == lastFrom && row.selectionTo == lastTo) { - _getLastQuad().position.bottom = p.s->font->cellSize.y * (y + 1); + _getLastQuad().size.y += p.s->font->cellSize.y; } else { @@ -1247,19 +1259,14 @@ void BackendD3D11::_executeCustomShader(RenderingPayload& p) _deviceContext->OMSetRenderTargets(1, _customRenderTargetView.addressof(), nullptr); // IA: Input Assembler + _deviceContext->IASetIndexBuffer(nullptr, DXGI_FORMAT_UNKNOWN, 0); + _deviceContext->IASetInputLayout(nullptr); _deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP); - _deviceContext->IASetIndexBuffer(_indexBuffer.get(), _indicesFormat, 0); + _deviceContext->IASetVertexBuffers(0, 0, nullptr, nullptr, nullptr); // VS: Vertex Shader _deviceContext->VSSetShader(_customVertexShader.get(), nullptr, 0); _deviceContext->VSSetConstantBuffers(0, 0, nullptr); - _deviceContext->VSSetShaderResources(0, 0, nullptr); - - // RS: Rasterizer Stage - D3D11_VIEWPORT viewport{}; - viewport.Width = static_cast(p.s->targetSize.x); - viewport.Height = static_cast(p.s->targetSize.y); - _deviceContext->RSSetViewports(1, &viewport); // PS: Pixel Shader _deviceContext->PSSetShader(_customPixelShader.get(), nullptr, 0); @@ -1269,11 +1276,36 @@ void BackendD3D11::_executeCustomShader(RenderingPayload& p) // OM: Output Merger _deviceContext->OMSetBlendState(nullptr, nullptr, 0xffffffff); - - _deviceContext->Draw(4, 0); } - _setupDeviceContextState(p); + _deviceContext->Draw(4, 0); + + { + _deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); + + // IA: Input Assembler + ID3D11Buffer* vertexBuffers[]{ _vertexBuffer.get(), _instanceBuffer.get() }; + static constexpr UINT strides[]{ sizeof(f32x2), sizeof(QuadInstance) }; + static constexpr UINT offsets[]{ 0, 0 }; + _deviceContext->IASetIndexBuffer(_indexBuffer.get(), DXGI_FORMAT_R16_UINT, 0); + _deviceContext->IASetInputLayout(_inputLayout.get()); + _deviceContext->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + _deviceContext->IASetVertexBuffers(0, 2, &vertexBuffers[0], &strides[0], &offsets[0]); + + // VS: Vertex Shader + _deviceContext->VSSetShader(_vertexShader.get(), nullptr, 0); + _deviceContext->VSSetConstantBuffers(0, 1, _vsConstantBuffer.addressof()); + + // PS: Pixel Shader + ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; + _deviceContext->PSSetShader(_pixelShader.get(), nullptr, 0); + _deviceContext->PSSetConstantBuffers(0, 1, _psConstantBuffer.addressof()); + _deviceContext->PSSetShaderResources(0, 2, &resources[0]); + _deviceContext->PSSetSamplers(0, 0, nullptr); + + // OM: Output Merger + _deviceContext->OMSetBlendState(_blendState.get(), nullptr, 0xffffffff); + } // With custom shaders, everything might be invalidated, so we have to // indirectly disable Present1() and its dirty rects this way. diff --git a/src/renderer/atlas/BackendD3D11.h b/src/renderer/atlas/BackendD3D11.h index becf9562a19..94cca1b941b 100644 --- a/src/renderer/atlas/BackendD3D11.h +++ b/src/renderer/atlas/BackendD3D11.h @@ -34,8 +34,8 @@ namespace Microsoft::Console::Render::Atlas struct alignas(16) PSConstBuffer { alignas(sizeof(f32x4)) f32x4 backgroundColor; - alignas(sizeof(f32x2)) f32x2 cellCount; alignas(sizeof(f32x2)) f32x2 cellSize; + alignas(sizeof(f32x2)) f32x2 cellCount; alignas(sizeof(f32x4)) f32 gammaRatios[4]{}; alignas(sizeof(f32)) f32 enhancedContrast = 0; alignas(sizeof(f32)) f32 dashedLineLength = 0; @@ -62,15 +62,14 @@ namespace Microsoft::Console::Render::Atlas SolidFill, }; - struct alignas(16) QuadInstance + struct QuadInstance { - alignas(sizeof(i32r)) i32r position; - alignas(sizeof(i32r)) i32r texcoord; - alignas(sizeof(u32)) u32 color = 0; + alignas(sizeof(i16x2)) i16x2 position; + alignas(sizeof(i16x2)) i16x2 size; + alignas(sizeof(i16x2)) i16x2 texcoord; alignas(sizeof(u32)) u32 shadingType = 0; - alignas(sizeof(u32x2)) u32x2 padding; + alignas(sizeof(u32)) u32 color = 0; }; - static_assert(sizeof(QuadInstance) == 48); struct GlyphCacheEntry { @@ -145,19 +144,19 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _deviceContext; wil::com_ptr _renderTargetView; + wil::com_ptr _inputLayout; wil::com_ptr _vertexShader; wil::com_ptr _pixelShader; wil::com_ptr _blendState; wil::com_ptr _blendStateInvert; wil::com_ptr _vsConstantBuffer; wil::com_ptr _psConstantBuffer; + wil::com_ptr _vertexBuffer; wil::com_ptr _indexBuffer; wil::com_ptr _instanceBuffer; - wil::com_ptr _instanceBufferView; size_t _instanceBufferSize = 0; Buffer _instances; size_t _instancesSize = 0; - DXGI_FORMAT _indicesFormat = DXGI_FORMAT_UNKNOWN; wil::com_ptr _customRenderTargetView; wil::com_ptr _customOffscreenTexture; diff --git a/src/renderer/atlas/shader_common.hlsl b/src/renderer/atlas/shader_common.hlsl index 58ce862161a..82049a0c3d0 100644 --- a/src/renderer/atlas/shader_common.hlsl +++ b/src/renderer/atlas/shader_common.hlsl @@ -10,23 +10,24 @@ #define SHADING_TYPE_SOLID_FILL 5 // clang-format on +// Structured Buffers are tightly packed. Nvidia recommends padding them to avoid crossing 128-bit +// cache lines: https://developer.nvidia.com/content/understanding-structured-buffer-performance struct VSData { - int4 position : POSITION; - int4 texcoord : TEXCOORD; - uint color : COLOR; - uint shadingType : ShadingType; - // Structured Buffers are tightly packed. Nvidia recommends padding them to avoid crossing 128-bit - // cache lines: https://developer.nvidia.com/content/understanding-structured-buffer-performance - uint2 padding; + float2 vertex : SV_Position; + int2 position : position; + int2 size : size; + int2 texcoord : texcoord; + uint shadingType : shadingType; + float4 color : color; }; struct PSData { - nointerpolation uint shadingType : ShadingType; - nointerpolation float4 color : COLOR; float4 position : SV_Position; - float2 texcoord : TEXCOORD; + float2 texcoord : texcoord; + nointerpolation uint shadingType : shadingType; + nointerpolation float4 color : color; }; float4 premultiplyColor(float4 color) diff --git a/src/renderer/atlas/shader_ps.hlsl b/src/renderer/atlas/shader_ps.hlsl index 26f16d834d6..2b86534ded4 100644 --- a/src/renderer/atlas/shader_ps.hlsl +++ b/src/renderer/atlas/shader_ps.hlsl @@ -7,14 +7,13 @@ cbuffer ConstBuffer : register(b0) { float4 backgroundColor; - float2 cellCount; float2 cellSize; + float2 cellCount; float4 gammaRatios; float enhancedContrast; float dashedLineLength; } -SamplerState backgroundSampler : register(s0); Texture2D background : register(t0); Texture2D glyphAtlas : register(t1); diff --git a/src/renderer/atlas/shader_vs.hlsl b/src/renderer/atlas/shader_vs.hlsl index fea289a0978..49b9030b156 100644 --- a/src/renderer/atlas/shader_vs.hlsl +++ b/src/renderer/atlas/shader_vs.hlsl @@ -8,24 +8,17 @@ cbuffer ConstBuffer : register(b0) float2 positionScale; } -StructuredBuffer instances : register(t0); - // clang-format off -PSData main(uint id: SV_VertexID) +PSData main(VSData data) // clang-format on { - VSData data = instances[id / 4]; - PSData output; + output.color = data.color; output.shadingType = data.shadingType; - output.color = decodeRGBA(data.color); - output.position.x = (id & 1) ? data.position.z : data.position.x; - output.position.y = (id & 2) ? data.position.w : data.position.y; // positionScale is expected to be float2(2.0f / sizeInPixel.x, -2.0f / sizeInPixel.y). Together with the // addition below this will transform our "position" from pixel into normalized device coordinate (NDC) space. - output.position.xy = output.position.xy * positionScale + float2(-1.0f, 1.0f); + output.position.xy = (data.position + data.vertex.xy * data.size) * positionScale + float2(-1.0f, 1.0f); output.position.zw = float2(0, 1); - output.texcoord.x = (id & 1) ? data.texcoord.z : data.texcoord.x; - output.texcoord.y = (id & 2) ? data.texcoord.w : data.texcoord.y; + output.texcoord = data.texcoord + data.vertex.xy * data.size; return output; } From 1eafcd4dd1356df347ccd0b9244601bb61376ca1 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 9 Mar 2023 00:35:29 +0100 Subject: [PATCH 04/37] Fix glyph rounding error, Fix custom shaders --- src/renderer/atlas/BackendD3D11.cpp | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/renderer/atlas/BackendD3D11.cpp b/src/renderer/atlas/BackendD3D11.cpp index 63fd8a60f91..b78e9dfdcc7 100644 --- a/src/renderer/atlas/BackendD3D11.cpp +++ b/src/renderer/atlas/BackendD3D11.cpp @@ -967,35 +967,34 @@ bool BackendD3D11::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, auto box = GetGlyphRunBlackBox(glyphRun, 0, 0); if (box.left >= box.right || box.top >= box.bottom) { + // This will indicate to BackendD3D11::_drawText that this glyph is whitespace. entry.shadingType = 0; return true; } - box.left *= p.d.font.pixelPerDIP; - box.top *= p.d.font.pixelPerDIP; - box.right *= p.d.font.pixelPerDIP; - box.bottom *= p.d.font.pixelPerDIP; + const auto l = lround(box.left * p.d.font.pixelPerDIP) - 1; + const auto t = lround(box.top * p.d.font.pixelPerDIP) - 1; + const auto r = lround(box.right * p.d.font.pixelPerDIP) + 1; + const auto b = lround(box.bottom * p.d.font.pixelPerDIP) + 1; // We'll add a 1px padding on all 4 sides, by adding +2px to the width and +1px to the baseline origin. // We do this to avoid neighboring glyphs from overlapping, since the blackbox measurement is only an estimate. stbrp_rect rect{}; - rect.w = gsl::narrow_cast(box.right - box.left + 2.5f); - rect.h = gsl::narrow_cast(box.bottom - box.top + 2.5f); + rect.w = gsl::narrow_cast(r - l); + rect.h = gsl::narrow_cast(b - t); if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) { return false; } - const D2D1_POINT_2F baseline{ - roundf(rect.x - box.left + 1.0f) * p.d.font.dipPerPixel, - roundf(rect.y - box.top + 1.0f) * p.d.font.dipPerPixel, - }; + const D2D1_POINT_2F baseline{ (rect.x - l) * p.d.font.dipPerPixel, (rect.y - t) * p.d.font.dipPerPixel }; const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baseline, &glyphRun, _brush.get()); + const auto shadingType = colorGlyph ? ShadingType::Passthrough : (p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale); - entry.shadingType = gsl::narrow_cast(colorGlyph ? ShadingType::Passthrough : (p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale)); - entry.offset.x = gsl::narrow_cast(lround(box.left)); - entry.offset.y = gsl::narrow_cast(lround(box.top)); + entry.shadingType = gsl::narrow_cast(shadingType); + entry.offset.x = gsl::narrow_cast(l); + entry.offset.y = gsl::narrow_cast(t); entry.texcoord.left = rect.x; entry.texcoord.top = rect.y; entry.texcoord.right = rect.x + rect.w; @@ -1281,8 +1280,6 @@ void BackendD3D11::_executeCustomShader(RenderingPayload& p) _deviceContext->Draw(4, 0); { - _deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); - // IA: Input Assembler ID3D11Buffer* vertexBuffers[]{ _vertexBuffer.get(), _instanceBuffer.get() }; static constexpr UINT strides[]{ sizeof(f32x2), sizeof(QuadInstance) }; @@ -1305,6 +1302,7 @@ void BackendD3D11::_executeCustomShader(RenderingPayload& p) // OM: Output Merger _deviceContext->OMSetBlendState(_blendState.get(), nullptr, 0xffffffff); + _deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); } // With custom shaders, everything might be invalidated, so we have to From 01e596ca7ce85b1add48b7b8abba0f8b0fd1e287 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 15 Mar 2023 18:06:59 +0100 Subject: [PATCH 05/37] Adapter selection, Overlapping gridlines, QuadInstance simplification --- src/renderer/atlas/AtlasEngine.cpp | 2 +- src/renderer/atlas/AtlasEngine.h | 2 +- src/renderer/atlas/AtlasEngine.r.cpp | 50 +- src/renderer/atlas/Backend.cpp | 40 +- src/renderer/atlas/BackendD2D.cpp | 166 +++--- src/renderer/atlas/BackendD2D.h | 1 + .../{BackendD3D11.cpp => BackendD3D.cpp} | 484 ++++++++++-------- .../atlas/{BackendD3D11.h => BackendD3D.h} | 30 +- src/renderer/atlas/atlas.vcxproj | 6 +- src/renderer/atlas/common.h | 51 +- src/renderer/atlas/shader_common.hlsl | 6 +- 11 files changed, 449 insertions(+), 389 deletions(-) rename src/renderer/atlas/{BackendD3D11.cpp => BackendD3D.cpp} (78%) rename src/renderer/atlas/{BackendD3D11.h => BackendD3D.h} (85%) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 692bb49cf29..b078bd7d44e 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -117,7 +117,7 @@ try // Same as std::move, but with std::swap to preserve std::vector allocations. // Also, it allows to include the top/bottom adjustment. - for (; first != end; ++dest, (void)++first) + for (; first != end; ++dest, ++first) { using std::swap; auto& d = *dest; diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 64b125c0b77..83a2c2b74f2 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -115,7 +115,7 @@ namespace Microsoft::Console::Render::Atlas struct ApiState { - til::generational s = Settings::invalidated(); + til::generational s; // This structure is loosely sorted in chunks from "very often accessed together" // to seldom accessed and/or usually not together. diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 65da7c37e94..e0d8c0e1fec 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -5,7 +5,7 @@ #include "AtlasEngine.h" #include "BackendD2D.h" -#include "BackendD3D11.h" +#include "BackendD3D.h" // #### NOTE #### // If you see any code in here that contains "_api." you might be seeing a race condition. @@ -98,7 +98,7 @@ void AtlasEngine::_recreateBackend() static constexpr GUID dxgiDebugAll{ 0xe48ae283, 0xda80, 0x490b, { 0x87, 0xe6, 0x43, 0xe9, 0xa9, 0xcf, 0xda, 0x8 } }; for (const auto severity : { DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING, DXGI_INFO_QUEUE_MESSAGE_SEVERITY_INFO }) { - infoQueue->SetBreakOnSeverity(dxgiDebugAll, severity, true); + LOG_IF_FAILED(infoQueue->SetBreakOnSeverity(dxgiDebugAll, severity, true)); } } } @@ -110,7 +110,7 @@ void AtlasEngine::_recreateBackend() { if (const auto func = GetProcAddressByFunctionDeclaration(module, DXGIDeclareAdapterRemovalSupport)) { - func(); + LOG_IF_FAILED(func()); } } @@ -137,34 +137,30 @@ void AtlasEngine::_recreateBackend() | D3D11_CREATE_DEVICE_BGRA_SUPPORT; wil::com_ptr dxgiAdapter; - THROW_IF_FAILED(_p.dxgiFactory->EnumAdapters1(0, dxgiAdapter.addressof())); - { - auto findSoftwareAdapter = _p.s->target->useSoftwareRendering; - auto adapter = dxgiAdapter; - UINT i = 0; + const auto useSoftwareRendering = _p.s->target->useSoftwareRendering; + DXGI_ADAPTER_DESC1 desc{}; + UINT index = 0; - for (;;) + do { - DXGI_ADAPTER_DESC1 desc; - THROW_IF_FAILED(adapter->GetDesc1(&desc)); + THROW_IF_FAILED(_p.dxgiFactory->EnumAdapters1(index++, dxgiAdapter.put())); + THROW_IF_FAILED(dxgiAdapter->GetDesc1(&desc)); - // Switch to D2D mode if any adapter is a remote adapter (RDP). - d2dMode |= WI_IsFlagSet(desc.Flags, DXGI_ADAPTER_FLAG_REMOTE); + - // If useSoftwareRendering is true we search for the first WARP adapter. - if (findSoftwareAdapter && WI_IsFlagSet(desc.Flags, DXGI_ADAPTER_FLAG_SOFTWARE)) - { - WI_ClearFlag(deviceFlags, D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS); - dxgiAdapter = std::move(adapter); - findSoftwareAdapter = false; - } + // If useSoftwareRendering is false we exit during the first iteration. Using the default adapter (index 0) + // is the right thing to do under most circumstances, unless you _really_ want to get your hands dirty. + // The alternative is to track the window rectangle in respect to all IDXGIOutputs and select the right + // IDXGIAdapter, while also considering the "graphics preference" override in the windows settings app, etc. + // + // If useSoftwareRendering is true we search until we find the first WARP adapter (usually the last adapter). + } while (useSoftwareRendering && WI_IsFlagClear(desc.Flags, DXGI_ADAPTER_FLAG_SOFTWARE)); - ++i; - if (_p.dxgiFactory->EnumAdapters1(i, adapter.put()) == DXGI_ERROR_NOT_FOUND) - { - break; - } + if (WI_IsFlagSet(desc.Flags, DXGI_ADAPTER_FLAG_SOFTWARE)) + { + WI_ClearFlag(deviceFlags, D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS); + d2dMode = true; } } @@ -204,7 +200,7 @@ void AtlasEngine::_recreateBackend() { for (const auto severity : { D3D11_MESSAGE_SEVERITY_CORRUPTION, D3D11_MESSAGE_SEVERITY_ERROR, D3D11_MESSAGE_SEVERITY_WARNING, D3D11_MESSAGE_SEVERITY_INFO }) { - d3dInfoQueue->SetBreakOnSeverity(severity, true); + LOG_IF_FAILED(d3dInfoQueue->SetBreakOnSeverity(severity, true)); } } } @@ -228,6 +224,6 @@ void AtlasEngine::_recreateBackend() } else { - _b = std::make_unique(std::move(device), std::move(deviceContext)); + _b = std::make_unique(std::move(device), std::move(deviceContext)); } } diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index 393ae22e10f..46cc143a72c 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -25,39 +25,40 @@ void SwapChainManager::Present(const RenderingPayload& p) { const til::rect fullRect{ 0, 0, p.s->cellCount.x, p.s->cellCount.y }; + DXGI_PRESENT_PARAMETERS params{}; + RECT dirtyRect{}; + RECT scrollRect{}; + POINT scrollOffset{}; + if (p.dirtyRect != fullRect) { - auto dirtyRectInPx = p.dirtyRect; - dirtyRectInPx.left *= p.s->font->cellSize.x; - dirtyRectInPx.top *= p.s->font->cellSize.y; - dirtyRectInPx.right *= p.s->font->cellSize.x; - dirtyRectInPx.bottom *= p.s->font->cellSize.y; + dirtyRect = p.dirtyRect.to_win32_rect(); + dirtyRect.left *= p.s->font->cellSize.x; + dirtyRect.top *= p.s->font->cellSize.y; + dirtyRect.right *= p.s->font->cellSize.x; + dirtyRect.bottom *= p.s->font->cellSize.y; - // This block will enlarge the dirtyRectInPx to handle glyphs that overlap their rows. + // This block will enlarge the dirtyRect to handle glyphs that overlap their rows vertically. const auto actualDirtyTop = gsl::at(p.rows, p.dirtyRect.top).top; const auto actualDirtyBottom = gsl::at(p.rows, gsl::narrow_cast(p.dirtyRect.bottom) - 1).bottom; // Since rows might be taller than their cells, they might have drawn outside of the viewport. // FYI using std::clamp() here would be dangerous. If std::clamp() is given a "min" that is greater // than "max" it'll return min, but our calculation of .bottom wants to do the exact opposite. - dirtyRectInPx.top = std::max(std::min(dirtyRectInPx.top, actualDirtyTop), 0); - dirtyRectInPx.bottom = std::min(std::max(dirtyRectInPx.bottom, actualDirtyBottom), static_cast(_targetSize.y)); + dirtyRect.top = std::max(std::min(dirtyRect.top, LONG{ actualDirtyTop }), 0l); + dirtyRect.bottom = std::min(std::max(dirtyRect.bottom, LONG{ actualDirtyBottom }), LONG{ _targetSize.y }); // The swap chain might have a different size than the TextBuffer (due to the renderer running asynchronously) and so // we have to ensure to clamp the bottom/right coordinates into _targetSize. The above already did so for bottom. - dirtyRectInPx.right = std::min(dirtyRectInPx.right, static_cast(_targetSize.x)); + dirtyRect.right = std::min(dirtyRect.right, LONG{ _targetSize.x }); // If a row of text has been changed, it's width will equal the full rect. In that case we should // also redraw the margin on the right, as overlapping glyphs might have previously drawn into it. if (p.dirtyRect.left == fullRect.left && p.dirtyRect.right == fullRect.right) { - dirtyRectInPx.right = _targetSize.x; + dirtyRect.right = _targetSize.x; } - RECT scrollRect{}; - POINT scrollOffset{}; - DXGI_PRESENT_PARAMETERS params{ - .DirtyRectsCount = 1, - .pDirtyRects = dirtyRectInPx.as_win32_rect(), - }; + params.DirtyRectsCount = 1; + params.pDirtyRects = &dirtyRect; if (p.scrollOffset) { @@ -73,14 +74,9 @@ void SwapChainManager::Present(const RenderingPayload& p) params.pScrollRect = &scrollRect; params.pScrollOffset = &scrollOffset; } - - THROW_IF_FAILED(_swapChain->Present1(1, 0, ¶ms)); - } - else - { - THROW_IF_FAILED(_swapChain->Present(1, 0)); } + THROW_IF_FAILED(_swapChain->Present1(1, 0, ¶ms)); _waitForPresentation = true; } diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 6e9588f38fb..1e2812dc7f4 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -113,8 +113,8 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) }; const D2D1_SIZE_U size{ p.s->cellCount.x, p.s->cellCount.y }; const D2D1_MATRIX_3X2_F transform{ - ._11 = static_cast(p.s->font->cellSize.x), - ._22 = static_cast(p.s->font->cellSize.y), + .m11 = static_cast(p.s->font->cellSize.x), + .m22 = static_cast(p.s->font->cellSize.y), }; THROW_IF_FAILED(_renderTarget->CreateBitmap(size, nullptr, 0, &props, _backgroundBitmap.put())); THROW_IF_FAILED(_renderTarget->CreateBitmapBrush(_backgroundBitmap.get(), _backgroundBrush.put())); @@ -206,78 +206,116 @@ void BackendD2D::_drawGridlines(const RenderingPayload& p) u16 y = 0; for (const auto& row : p.rows) { - const auto top = p.d.font.cellSizeDIP.y * y; - const auto bottom = p.d.font.cellSizeDIP.y * (y + 1); - - for (const auto& r : row.gridLineRanges) + if (!row.gridLineRanges.empty()) { - // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. - assert(r.lines.any()); + _drawGridlineRow(p, row, y); + } + y++; + } +} - D2D1_RECT_F rect{ r.from * p.d.font.cellSizeDIP.x, top, r.to * p.d.font.cellSizeDIP.x, bottom }; +void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y) +{ + const auto columnToDIP = [&](til::CoordType i) { + return i * p.d.font.cellSizeDIP.x; + }; + const auto rowToDIP = [&](til::CoordType i) { + return i * p.d.font.cellSizeDIP.y; + }; + const auto pxToDIP = [&](til::CoordType i) { + return i * p.d.font.dipPerPixel; + }; + + const auto top = rowToDIP(y); + const auto bottom = top + p.d.font.cellSizeDIP.y; + const auto thinLineWidth = pxToDIP(p.s->font->thinLineWidth); - if (r.lines.test(GridLines::Left)) - { - for (auto i = r.from; i < r.to; ++i) - { - rect.left = i * p.d.font.cellSizeDIP.x; - rect.right = rect.left + p.s->font->thinLineWidth * p.d.font.dipPerPixel; - _fillRectangle(rect, r.color); - } - } - if (r.lines.test(GridLines::Top)) - { - rect.bottom = rect.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; - _fillRectangle(rect, r.color); - } - if (r.lines.test(GridLines::Right)) - { - for (auto i = r.to; i > r.from; --i) - { - rect.right = i * p.d.font.cellSizeDIP.x; - rect.left = rect.right - p.s->font->thinLineWidth * p.d.font.dipPerPixel; - _fillRectangle(rect, r.color); - } - } - if (r.lines.test(GridLines::Bottom)) - { - rect.top = rect.bottom - p.s->font->thinLineWidth * p.d.font.dipPerPixel; - _fillRectangle(rect, r.color); - } - if (r.lines.test(GridLines::Underline)) - { - rect.top += p.s->font->underlinePos * p.d.font.dipPerPixel; - rect.bottom = rect.top + p.s->font->underlineWidth * p.d.font.dipPerPixel; - _fillRectangle(rect, r.color); - } - if (r.lines.test(GridLines::HyperlinkUnderline)) - { - const auto w = p.s->font->underlineWidth * p.d.font.dipPerPixel; - const auto centerY = rect.top + p.s->font->underlinePos * p.d.font.dipPerPixel + w * 0.5f; - const auto brush = _brushWithColor(r.color); - const D2D1_POINT_2F point0{ rect.left, centerY }; - const D2D1_POINT_2F point1{ rect.right, centerY }; - _renderTarget->DrawLine(point0, point1, brush, w, _dottedStrokeStyle.get()); - } - if (r.lines.test(GridLines::DoubleUnderline)) - { - rect.top = top + p.s->font->doubleUnderlinePos.x * p.d.font.dipPerPixel; - rect.bottom = rect.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; - _fillRectangle(rect, r.color); + for (const auto& r : row.gridLineRanges) + { + // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. + assert(r.lines.any()); + + const auto left = columnToDIP(r.from); + const auto right = columnToDIP(r.to); + D2D1_RECT_F rect{}; - rect.top = top + p.s->font->doubleUnderlinePos.y * p.d.font.dipPerPixel; - rect.bottom = rect.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + if (r.lines.test(GridLines::Left)) + { + rect.top = top; + rect.bottom = bottom; + for (auto i = r.from; i < r.to; ++i) + { + rect.left = columnToDIP(i); + rect.right = rect.left + thinLineWidth; _fillRectangle(rect, r.color); } - if (r.lines.test(GridLines::Strikethrough)) + } + if (r.lines.test(GridLines::Top)) + { + rect.left = left; + rect.top = top; + rect.right = right; + rect.bottom = rect.top + thinLineWidth; + _fillRectangle(rect, r.color); + } + if (r.lines.test(GridLines::Right)) + { + rect.top = top; + rect.bottom = bottom; + for (auto i = r.to; i > r.from; --i) { - rect.top = top + p.s->font->strikethroughPos * p.d.font.dipPerPixel; - rect.bottom = rect.top + p.s->font->strikethroughWidth * p.d.font.dipPerPixel; + rect.right = columnToDIP(i); + rect.left = rect.right - thinLineWidth; _fillRectangle(rect, r.color); } } - - y++; + if (r.lines.test(GridLines::Bottom)) + { + rect.left = left; + rect.top = bottom - thinLineWidth; + rect.right = right; + rect.bottom = bottom; + _fillRectangle(rect, r.color); + } + if (r.lines.test(GridLines::Underline)) + { + rect.left = left; + rect.top = top + pxToDIP(p.s->font->underlinePos); + rect.right = right; + rect.bottom = rect.top + pxToDIP(p.s->font->underlineWidth); + _fillRectangle(rect, r.color); + } + if (r.lines.test(GridLines::HyperlinkUnderline)) + { + const auto w = pxToDIP(p.s->font->underlineWidth); + const auto centerY = top + pxToDIP(p.s->font->underlinePos) + w * 0.5f; + const auto brush = _brushWithColor(r.color); + const D2D1_POINT_2F point0{ left, centerY }; + const D2D1_POINT_2F point1{ right, centerY }; + _renderTarget->DrawLine(point0, point1, brush, w, _dottedStrokeStyle.get()); + } + if (r.lines.test(GridLines::DoubleUnderline)) + { + rect.left = left; + rect.top = top + pxToDIP(p.s->font->doubleUnderlinePos.x); + rect.right = right; + rect.bottom = rect.top + thinLineWidth; + _fillRectangle(rect, r.color); + + rect.left = left; + rect.top = top + pxToDIP(p.s->font->doubleUnderlinePos.y); + rect.right = right; + rect.bottom = rect.top + thinLineWidth; + _fillRectangle(rect, r.color); + } + if (r.lines.test(GridLines::Strikethrough)) + { + rect.left = left; + rect.top = top + pxToDIP(p.s->font->strikethroughPos); + rect.right = right; + rect.bottom = rect.top + pxToDIP(p.s->font->strikethroughWidth); + _fillRectangle(rect, r.color); + } } } diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index ed26d322e10..58aca1f6804 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -17,6 +17,7 @@ namespace Microsoft::Console::Render::Atlas void _drawBackground(const RenderingPayload& p) noexcept; void _drawText(RenderingPayload& p); void _drawGridlines(const RenderingPayload& p); + void _drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y); void _drawCursor(const RenderingPayload& p); void _drawSelection(const RenderingPayload& p); ID2D1Brush* _brushWithColor(u32 color); diff --git a/src/renderer/atlas/BackendD3D11.cpp b/src/renderer/atlas/BackendD3D.cpp similarity index 78% rename from src/renderer/atlas/BackendD3D11.cpp rename to src/renderer/atlas/BackendD3D.cpp index b78e9dfdcc7..ba20c1419cd 100644 --- a/src/renderer/atlas/BackendD3D11.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -1,5 +1,5 @@ #include "pch.h" -#include "BackendD3D11.h" +#include "BackendD3D.h" #include @@ -12,6 +12,13 @@ TIL_FAST_MATH_BEGIN +// This code packs various data into smaller-than-int types to save both CPU and GPU memory. This warning would force +// us to add dozens upon dozens of gsl::narrow_cast<>s throughout the file which is more annoying than helpful. +#pragma warning(disable : 4242) // '=': conversion from '...' to '...', possible loss of data +#pragma warning(disable : 4244) // 'initializing': conversion from '...' to '...', possible loss of data +#pragma warning(disable : 4267) // 'argument': conversion from '...' to '...', possible loss of data +#pragma warning(disable : 4838) // conversion from '...' to '...' requires a narrowing conversion +#pragma warning(disable : 26472) // Don't use a static_cast for arithmetic conversions. Use brace initialization, gsl::narrow_cast or gsl::narrow (type.1). // Disable a bunch of warnings which get in the way of writing performant code. #pragma warning(disable : 26429) // Symbol 'data' is never tested for nullness, it can be marked as not_null (f.23). #pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). @@ -21,12 +28,12 @@ TIL_FAST_MATH_BEGIN using namespace Microsoft::Console::Render::Atlas; -BackendD3D11::GlyphCacheMap::~GlyphCacheMap() +BackendD3D::GlyphCacheMap::~GlyphCacheMap() { Clear(); } -BackendD3D11::GlyphCacheMap& BackendD3D11::GlyphCacheMap::operator=(GlyphCacheMap&& other) noexcept +BackendD3D::GlyphCacheMap& BackendD3D::GlyphCacheMap::operator=(GlyphCacheMap&& other) noexcept { _map = std::exchange(other._map, {}); _mapMask = std::exchange(other._mapMask, 0); @@ -35,7 +42,7 @@ BackendD3D11::GlyphCacheMap& BackendD3D11::GlyphCacheMap::operator=(GlyphCacheMa return *this; } -void BackendD3D11::GlyphCacheMap::Clear() noexcept +void BackendD3D::GlyphCacheMap::Clear() noexcept { if (_size) { @@ -52,7 +59,7 @@ void BackendD3D11::GlyphCacheMap::Clear() noexcept } } -BackendD3D11::GlyphCacheEntry& BackendD3D11::GlyphCacheMap::FindOrInsert(IDWriteFontFace* fontFace, u16 glyphIndex, bool& inserted) +BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::FindOrInsert(IDWriteFontFace* fontFace, u16 glyphIndex, bool& inserted) { const auto hash = _hash(fontFace, glyphIndex); @@ -72,14 +79,14 @@ BackendD3D11::GlyphCacheEntry& BackendD3D11::GlyphCacheMap::FindOrInsert(IDWrite } } -size_t BackendD3D11::GlyphCacheMap::_hash(IDWriteFontFace* fontFace, u16 glyphIndex) noexcept +size_t BackendD3D::GlyphCacheMap::_hash(IDWriteFontFace* fontFace, u16 glyphIndex) noexcept { // MSVC 19.33 produces surprisingly good assembly for this without stack allocation. const uintptr_t data[2]{ std::bit_cast(fontFace), glyphIndex }; return til::hash(&data[0], sizeof(data)); } -BackendD3D11::GlyphCacheEntry& BackendD3D11::GlyphCacheMap::_insert(IDWriteFontFace* fontFace, u16 glyphIndex, size_t hash) +BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::_insert(IDWriteFontFace* fontFace, u16 glyphIndex, size_t hash) { if (_size >= _capacity) { @@ -101,7 +108,7 @@ BackendD3D11::GlyphCacheEntry& BackendD3D11::GlyphCacheMap::_insert(IDWriteFontF } } -void BackendD3D11::GlyphCacheMap::_bumpSize() +void BackendD3D::GlyphCacheMap::_bumpSize() { const auto newMapSize = _map.size() * 2; const auto newMapMask = newMapSize - 1; @@ -120,7 +127,7 @@ void BackendD3D11::GlyphCacheMap::_bumpSize() _capacity = newMapSize / 2; } -BackendD3D11::BackendD3D11(wil::com_ptr device, wil::com_ptr deviceContext) : +BackendD3D::BackendD3D(wil::com_ptr device, wil::com_ptr deviceContext) : _device{ std::move(device) }, _deviceContext{ std::move(deviceContext) } { @@ -131,12 +138,12 @@ BackendD3D11::BackendD3D11(wil::com_ptr device, wil::com_ptrCreateInputLayout(&layout[0], gsl::narrow_cast(std::size(layout)), &shader_vs[0], sizeof(shader_vs), _inputLayout.addressof())); + THROW_IF_FAILED(_device->CreateInputLayout(&layout[0], std::size(layout), &shader_vs[0], sizeof(shader_vs), _inputLayout.addressof())); } { @@ -262,14 +269,16 @@ BackendD3D11::BackendD3D11(wil::com_ptr device, wil::com_ptrOMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); @@ -290,102 +299,17 @@ void BackendD3D11::Render(RenderingPayload& p) _swapChainManager.Present(p); } -bool BackendD3D11::RequiresContinuousRedraw() noexcept +bool BackendD3D::RequiresContinuousRedraw() noexcept { return _requiresContinuousRedraw; } -void BackendD3D11::WaitUntilCanRender() noexcept +void BackendD3D::WaitUntilCanRender() noexcept { _swapChainManager.WaitUntilCanRender(); } -void BackendD3D11::_debugUpdateShaders() noexcept -try -{ -#ifndef NDEBUG - const auto invalidationTime = _sourceCodeInvalidationTime.load(std::memory_order_relaxed); - - if (invalidationTime == INT64_MAX || invalidationTime > std::chrono::steady_clock::now().time_since_epoch().count()) - { - return; - } - - _sourceCodeInvalidationTime.store(INT64_MAX, std::memory_order_relaxed); - - static const auto compile = [](const std::filesystem::path& path, const char* target) { - wil::com_ptr error; - wil::com_ptr blob; - const auto hr = D3DCompileFromFile( - /* pFileName */ path.c_str(), - /* pDefines */ nullptr, - /* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE, - /* pEntrypoint */ "main", - /* pTarget */ target, - /* Flags1 */ D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS, - /* Flags2 */ 0, - /* ppCode */ blob.addressof(), - /* ppErrorMsgs */ error.addressof()); - - if (error) - { - std::thread t{ [error = std::move(error)]() noexcept { - MessageBoxA(nullptr, static_cast(error->GetBufferPointer()), "Compilation error", MB_ICONERROR | MB_OK); - } }; - t.detach(); - } - - THROW_IF_FAILED(hr); - return blob; - }; - - struct FileVS - { - std::wstring_view filename; - wil::com_ptr BackendD3D11::*target; - }; - struct FilePS - { - std::wstring_view filename; - wil::com_ptr BackendD3D11::*target; - }; - - static constexpr std::array filesVS{ - FileVS{ L"shader_vs.hlsl", &BackendD3D11::_vertexShader }, - }; - static constexpr std::array filesPS{ - FilePS{ L"shader_ps.hlsl", &BackendD3D11::_pixelShader }, - }; - - std::array, filesVS.size()> compiledVS; - std::array, filesPS.size()> compiledPS; - - // Compile our files before moving them into `this` below to ensure we're - // always in a consistent state where all shaders are seemingly valid. - for (size_t i = 0; i < filesVS.size(); ++i) - { - const auto blob = compile(_sourceDirectory / filesVS[i].filename, "vs_4_0"); - THROW_IF_FAILED(_device->CreateVertexShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compiledVS[i].addressof())); - } - for (size_t i = 0; i < filesPS.size(); ++i) - { - const auto blob = compile(_sourceDirectory / filesPS[i].filename, "ps_4_0"); - THROW_IF_FAILED(_device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compiledPS[i].addressof())); - } - - for (size_t i = 0; i < filesVS.size(); ++i) - { - this->*filesVS[i].target = std::move(compiledVS[i]); - } - for (size_t i = 0; i < filesPS.size(); ++i) - { - this->*filesPS[i].target = std::move(compiledPS[i]); - } -#endif -} -CATCH_LOG() - -void BackendD3D11::_handleSettingsUpdate(const RenderingPayload& p) +void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) { _swapChainManager.UpdateSwapChainSettings( p, @@ -448,7 +372,7 @@ void BackendD3D11::_handleSettingsUpdate(const RenderingPayload& p) _cellCount = p.s->cellCount; } -void BackendD3D11::_recreateCustomShader(const RenderingPayload& p) +void BackendD3D::_recreateCustomShader(const RenderingPayload& p) { _customRenderTargetView.reset(); _customOffscreenTexture.reset(); @@ -579,7 +503,7 @@ void BackendD3D11::_recreateCustomShader(const RenderingPayload& p) } } -void BackendD3D11::_recreateCustomRenderTargetView(u16x2 targetSize) +void BackendD3D::_recreateCustomRenderTargetView(u16x2 targetSize) { // Avoid memory usage spikes by releasing memory first. _customOffscreenTexture.reset(); @@ -602,7 +526,7 @@ void BackendD3D11::_recreateCustomRenderTargetView(u16x2 targetSize) THROW_IF_FAILED(_device->CreateRenderTargetView(_customOffscreenTexture.get(), nullptr, _renderTargetView.addressof())); } -void BackendD3D11::_recreateBackgroundColorBitmap(u16x2 cellCount) +void BackendD3D::_recreateBackgroundColorBitmap(u16x2 cellCount) { // Avoid memory usage spikes by releasing memory first. _backgroundBitmap.reset(); @@ -622,13 +546,13 @@ void BackendD3D11::_recreateBackgroundColorBitmap(u16x2 cellCount) THROW_IF_FAILED(_device->CreateShaderResourceView(_backgroundBitmap.get(), nullptr, _backgroundBitmapView.addressof())); } -void BackendD3D11::_d2dRenderTargetUpdateFontSettings(const FontSettings& font) noexcept +void BackendD3D::_d2dRenderTargetUpdateFontSettings(const FontSettings& font) noexcept { _d2dRenderTarget->SetDpi(font.dpi, font.dpi); _d2dRenderTarget->SetTextAntialiasMode(static_cast(font.antialiasingMode)); } -void BackendD3D11::_recreateConstBuffer(const RenderingPayload& p) +void BackendD3D::_recreateConstBuffer(const RenderingPayload& p) { { VSConstBuffer data; @@ -647,7 +571,7 @@ void BackendD3D11::_recreateConstBuffer(const RenderingPayload& p) } } -void BackendD3D11::_setupDeviceContextState(const RenderingPayload& p) +void BackendD3D::_setupDeviceContextState(const RenderingPayload& p) { // IA: Input Assembler ID3D11Buffer* vertexBuffers[]{ _vertexBuffer.get(), _instanceBuffer.get() }; @@ -679,7 +603,94 @@ void BackendD3D11::_setupDeviceContextState(const RenderingPayload& p) _deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); } -void BackendD3D11::_d2dBeginDrawing() noexcept +#ifndef NDEBUG +void BackendD3D::_debugUpdateShaders(const RenderingPayload& p) noexcept +try +{ + const auto invalidationTime = _sourceCodeInvalidationTime.load(std::memory_order_relaxed); + + if (invalidationTime == INT64_MAX || invalidationTime > std::chrono::steady_clock::now().time_since_epoch().count()) + { + return; + } + + _sourceCodeInvalidationTime.store(INT64_MAX, std::memory_order_relaxed); + + static const auto compile = [](const std::filesystem::path& path, const char* target) { + wil::com_ptr error; + wil::com_ptr blob; + const auto hr = D3DCompileFromFile( + /* pFileName */ path.c_str(), + /* pDefines */ nullptr, + /* pInclude */ D3D_COMPILE_STANDARD_FILE_INCLUDE, + /* pEntrypoint */ "main", + /* pTarget */ target, + /* Flags1 */ D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_PACK_MATRIX_COLUMN_MAJOR | D3DCOMPILE_ENABLE_STRICTNESS | D3DCOMPILE_WARNINGS_ARE_ERRORS, + /* Flags2 */ 0, + /* ppCode */ blob.addressof(), + /* ppErrorMsgs */ error.addressof()); + + if (error) + { + std::thread t{ [error = std::move(error)]() noexcept { + MessageBoxA(nullptr, static_cast(error->GetBufferPointer()), "Compilation error", MB_ICONERROR | MB_OK); + } }; + t.detach(); + } + + THROW_IF_FAILED(hr); + return blob; + }; + + struct FileVS + { + std::wstring_view filename; + wil::com_ptr BackendD3D::*target; + }; + struct FilePS + { + std::wstring_view filename; + wil::com_ptr BackendD3D::*target; + }; + + static constexpr std::array filesVS{ + FileVS{ L"shader_vs.hlsl", &BackendD3D::_vertexShader }, + }; + static constexpr std::array filesPS{ + FilePS{ L"shader_ps.hlsl", &BackendD3D::_pixelShader }, + }; + + std::array, filesVS.size()> compiledVS; + std::array, filesPS.size()> compiledPS; + + // Compile our files before moving them into `this` below to ensure we're + // always in a consistent state where all shaders are seemingly valid. + for (size_t i = 0; i < filesVS.size(); ++i) + { + const auto blob = compile(_sourceDirectory / filesVS[i].filename, "vs_4_0"); + THROW_IF_FAILED(_device->CreateVertexShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compiledVS[i].addressof())); + } + for (size_t i = 0; i < filesPS.size(); ++i) + { + const auto blob = compile(_sourceDirectory / filesPS[i].filename, "ps_4_0"); + THROW_IF_FAILED(_device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compiledPS[i].addressof())); + } + + for (size_t i = 0; i < filesVS.size(); ++i) + { + this->*filesVS[i].target = std::move(compiledVS[i]); + } + for (size_t i = 0; i < filesPS.size(); ++i) + { + this->*filesPS[i].target = std::move(compiledPS[i]); + } + + _setupDeviceContextState(p); +} +CATCH_LOG() +#endif + +void BackendD3D::_d2dBeginDrawing() noexcept { if (!_d2dBeganDrawing) { @@ -688,7 +699,7 @@ void BackendD3D11::_d2dBeginDrawing() noexcept } } -void BackendD3D11::_d2dEndDrawing() +void BackendD3D::_d2dEndDrawing() { if (_d2dBeganDrawing) { @@ -697,7 +708,7 @@ void BackendD3D11::_d2dEndDrawing() } } -void BackendD3D11::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) +void BackendD3D::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) { // This block of code calculates the size of a power-of-2 texture that has an area larger than the targetSize // of the swap chain. In other words for a 985x1946 pixel swap chain (area = 1916810) it would result in a u/v @@ -763,45 +774,39 @@ void BackendD3D11::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) _glyphCache.Clear(); _rectPackerData = Buffer{ u }; - stbrp_init_target(&_rectPacker, u, v, _rectPackerData.data(), gsl::narrow_cast(_rectPackerData.size())); + stbrp_init_target(&_rectPacker, u, v, _rectPackerData.data(), _rectPackerData.size()); _d2dBeginDrawing(); _d2dRenderTarget->Clear(); } -BackendD3D11::QuadInstance& BackendD3D11::_getLastQuad() noexcept +BackendD3D::QuadInstance& BackendD3D::_getLastQuad() noexcept { assert(_instancesSize != 0); return _instances[_instancesSize - 1]; } -void BackendD3D11::_appendQuad(i32r position, u32 color, ShadingType shadingType) +void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u32 color, ShadingType shadingType) { - _appendQuad(position, {}, color, shadingType); + _appendQuad(position, size, {}, color, shadingType); } -void BackendD3D11::_appendQuad(i32r position, i32r texcoord, u32 color, ShadingType shadingType) +void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 color, ShadingType shadingType) { if (_instancesSize >= _instances.size()) { _bumpInstancesSize(); } - static constexpr auto pack = [](i32 x, i32 y) { - return i16x2{ gsl::narrow_cast(x), gsl::narrow_cast(y) }; - }; - const i16x2 position2 = pack(position.left, position.top); - const i16x2 size2 = pack(position.right - position.left, position.bottom - position.top); - const i16x2 texcoord2 = pack(texcoord.left, texcoord.top); - _instances[_instancesSize++] = QuadInstance{ position2, size2, texcoord2, static_cast(shadingType), color }; + _instances[_instancesSize++] = QuadInstance{ position, size, texcoord, static_cast(shadingType), color }; } -void BackendD3D11::_bumpInstancesSize() +void BackendD3D::_bumpInstancesSize() { _instances = Buffer{ std::max(1024, _instances.size() << 1) }; } -void BackendD3D11::_flushQuads(const RenderingPayload& p) +void BackendD3D::_flushQuads(const RenderingPayload& p) { if (!_instancesSize) { @@ -838,12 +843,12 @@ void BackendD3D11::_flushQuads(const RenderingPayload& p) // instancing with ~10k instances appears to be about 50% faster and so DrawInstanced was chosen. // Instead I found that packing instance data as tightly as possible made the biggest performance difference, // and packing 16 bit integers with ID3D11InputLayout is quite a bit more convenient too. - _deviceContext->DrawIndexedInstanced(6, gsl::narrow_cast(_instancesSize), 0, 0, 0); + _deviceContext->DrawIndexedInstanced(6, _instancesSize, 0, 0, 0); _instancesSize = 0; } -void BackendD3D11::_recreateInstanceBuffers(const RenderingPayload& p) +void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) { static constexpr size_t R16max = 1 << 16; // While the viewport size of the terminal is probably a good initial estimate for the amount of instances we'll see, @@ -875,7 +880,7 @@ void BackendD3D11::_recreateInstanceBuffers(const RenderingPayload& p) _instanceBufferSize = newInstancesSize; } -void BackendD3D11::_drawBackground(const RenderingPayload& p) +void BackendD3D::_drawBackground(const RenderingPayload& p) { { D3D11_MAPPED_SUBRESOURCE mapped{}; @@ -894,12 +899,11 @@ void BackendD3D11::_drawBackground(const RenderingPayload& p) // given that AGS_PRIMITIVE_TOPOLOGY_SCREENRECTLIST and AGS_PRIMITIVE_TOPOLOGY_QUADLIST exist // and would serve us much better. Finally, Chromium is still ~2.5x faster than us. { - const i32r rect{ 0, 0, p.s->targetSize.x, p.s->targetSize.y }; - _appendQuad(rect, rect, 0, ShadingType::Background); + _appendQuad({}, p.s->targetSize, 0, ShadingType::Background); } } -void BackendD3D11::_drawText(RenderingPayload& p) +void BackendD3D::_drawText(RenderingPayload& p) { if (_resetGlyphAtlasNeeded) { @@ -938,12 +942,9 @@ void BackendD3D11::_drawText(RenderingPayload& p) { const auto l = static_cast((cumulativeAdvance + row.glyphOffsets[x].advanceOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.x; const auto t = static_cast((baselineY - row.glyphOffsets[x].ascenderOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.y; - const auto w = entry.texcoord.right - entry.texcoord.left; - const auto h = entry.texcoord.bottom - entry.texcoord.top; - const i32r rect{ l, t, l + w, t + h }; - row.top = std::min(row.top, rect.top); - row.bottom = std::max(row.bottom, rect.bottom); - _appendQuad(rect, entry.texcoord, row.colors[x], static_cast(entry.shadingType)); + row.top = std::min(row.top, t); + row.bottom = std::max(row.bottom, t + entry.size.y); + _appendQuad({ static_cast(l), static_cast(t) }, entry.size, entry.texcoord, row.colors[x], static_cast(entry.shadingType)); } cumulativeAdvance += row.glyphAdvances[x]; @@ -956,7 +957,7 @@ void BackendD3D11::_drawText(RenderingPayload& p) _d2dEndDrawing(); } -bool BackendD3D11::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) +bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) { DWRITE_GLYPH_RUN glyphRun{}; glyphRun.fontFace = entry.fontFace; @@ -964,25 +965,27 @@ bool BackendD3D11::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, glyphRun.glyphCount = 1; glyphRun.glyphIndices = &entry.glyphIndex; - auto box = GetGlyphRunBlackBox(glyphRun, 0, 0); - if (box.left >= box.right || box.top >= box.bottom) + const auto box = GetGlyphRunBlackBox(glyphRun, 0, 0); + if (box.empty()) { - // This will indicate to BackendD3D11::_drawText that this glyph is whitespace. + // This will indicate to BackendD3D::_drawText that this glyph is whitespace. entry.shadingType = 0; return true; } + // We'll add a 1px padding on all 4 sides to avoid neighboring glyphs + // from overlapping, since the blackbox measurement is only an estimate. + // We need to use round (and not ceil/floor) to ensure we pixel-snap individual + // glyphs correctly and form a consistent baseline across an entire run of glyphs. + const auto l = lround(box.left * p.d.font.pixelPerDIP) - 1; const auto t = lround(box.top * p.d.font.pixelPerDIP) - 1; const auto r = lround(box.right * p.d.font.pixelPerDIP) + 1; const auto b = lround(box.bottom * p.d.font.pixelPerDIP) + 1; - // We'll add a 1px padding on all 4 sides, by adding +2px to the width and +1px to the baseline origin. - // We do this to avoid neighboring glyphs from overlapping, since the blackbox measurement is only an estimate. - stbrp_rect rect{}; - rect.w = gsl::narrow_cast(r - l); - rect.h = gsl::narrow_cast(b - t); + rect.w = r - l; + rect.h = b - t; if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) { return false; @@ -992,17 +995,17 @@ bool BackendD3D11::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baseline, &glyphRun, _brush.get()); const auto shadingType = colorGlyph ? ShadingType::Passthrough : (p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale); - entry.shadingType = gsl::narrow_cast(shadingType); - entry.offset.x = gsl::narrow_cast(l); - entry.offset.y = gsl::narrow_cast(t); - entry.texcoord.left = rect.x; - entry.texcoord.top = rect.y; - entry.texcoord.right = rect.x + rect.w; - entry.texcoord.bottom = rect.y + rect.h; + entry.shadingType = static_cast(shadingType); + entry.offset.x = l; + entry.offset.y = t; + entry.size.x = rect.w; + entry.size.y = rect.h; + entry.texcoord.x = rect.x; + entry.texcoord.y = rect.y; return true; } -void BackendD3D11::_drawGridlines(const RenderingPayload& p) +void BackendD3D::_drawGridlines(const RenderingPayload& p) { u16 y = 0; for (const auto& row : p.rows) @@ -1015,78 +1018,100 @@ void BackendD3D11::_drawGridlines(const RenderingPayload& p) } } -void BackendD3D11::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y) +void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y) { const auto top = p.s->font->cellSize.y * y; - const auto bottom = top + p.s->font->cellSize.y; for (const auto& r : row.gridLineRanges) { // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. assert(r.lines.any()); - - i32r rect{ r.from * p.s->font->cellSize.x, top, r.to * p.s->font->cellSize.x, bottom }; + + const auto left = r.from * p.s->font->cellSize.x; + const auto width = (r.to - r.from) * p.s->font->cellSize.x; + i16x2 position; + u16x2 size; if (r.lines.test(GridLines::Left)) { for (auto i = r.from; i < r.to; ++i) { - rect.left = i * p.s->font->cellSize.x; - rect.right = rect.left + p.s->font->thinLineWidth; - _appendQuad(rect, r.color, ShadingType::SolidFill); + position.x = i * p.s->font->cellSize.x; + position.y = top; + size.x = p.s->font->thinLineWidth; + size.y = p.s->font->cellSize.y; + _appendQuad(position, size, r.color, ShadingType::SolidFill); } } if (r.lines.test(GridLines::Top)) { - rect.bottom = rect.top + p.s->font->thinLineWidth; - _appendQuad(rect, r.color, ShadingType::SolidFill); + position.x = left; + position.y = top; + size.x = width; + size.y = p.s->font->thinLineWidth; + _appendQuad(position, size, r.color, ShadingType::SolidFill); } if (r.lines.test(GridLines::Right)) { for (auto i = r.to; i > r.from; --i) { - rect.right = i * p.s->font->cellSize.x; - rect.left = rect.right - p.s->font->thinLineWidth; - _appendQuad(rect, r.color, ShadingType::SolidFill); + position.x = i * p.s->font->cellSize.x; + position.y = top; + size.x = p.s->font->thinLineWidth; + size.y = p.s->font->cellSize.y; + _appendQuad(position, size, r.color, ShadingType::SolidFill); } } if (r.lines.test(GridLines::Bottom)) { - rect.top = rect.bottom - p.s->font->thinLineWidth; - _appendQuad(rect, r.color, ShadingType::SolidFill); + position.x = left; + position.y = top + p.s->font->cellSize.y - p.s->font->thinLineWidth; + size.x = width; + size.y = p.s->font->thinLineWidth; + _appendQuad(position, size, r.color, ShadingType::SolidFill); } if (r.lines.test(GridLines::Underline)) { - rect.top += p.s->font->underlinePos; - rect.bottom = rect.top + p.s->font->underlineWidth; - _appendQuad(rect, r.color, ShadingType::SolidFill); + position.x = left; + position.y = top + p.s->font->underlinePos; + size.x = width; + size.y = p.s->font->underlineWidth; + _appendQuad(position, size, r.color, ShadingType::SolidFill); } if (r.lines.test(GridLines::HyperlinkUnderline)) { - rect.top += p.s->font->underlinePos; - rect.bottom = rect.top + p.s->font->underlineWidth; - _appendQuad(rect, r.color, ShadingType::DashedLine); + position.x = left; + position.y = top + p.s->font->underlinePos; + size.x = width; + size.y = p.s->font->underlineWidth; + _appendQuad(position, size, r.color, ShadingType::DashedLine); } if (r.lines.test(GridLines::DoubleUnderline)) { - rect.top = top + p.s->font->doubleUnderlinePos.x; - rect.bottom = rect.top + p.s->font->thinLineWidth; - _appendQuad(rect, r.color, ShadingType::SolidFill); - - rect.top = top + p.s->font->doubleUnderlinePos.y; - rect.bottom = rect.top + p.s->font->thinLineWidth; - _appendQuad(rect, r.color, ShadingType::SolidFill); + position.x = left; + position.y = top + p.s->font->doubleUnderlinePos.x; + size.x = width; + size.y = p.s->font->thinLineWidth; + _appendQuad(position, size, r.color, ShadingType::SolidFill); + + position.x = left; + position.y = top + p.s->font->doubleUnderlinePos.y; + size.x = width; + size.y = p.s->font->thinLineWidth; + _appendQuad(position, size, r.color, ShadingType::SolidFill); } if (r.lines.test(GridLines::Strikethrough)) { - rect.top = top + p.s->font->strikethroughPos; - rect.bottom = rect.top + p.s->font->strikethroughWidth; - _appendQuad(rect, r.color, ShadingType::SolidFill); + position.x = left; + position.y = top + p.s->font->strikethroughPos; + size.x = width; + size.y = p.s->font->strikethroughWidth; + _appendQuad(position, size, r.color, ShadingType::SolidFill); } } } -void BackendD3D11::_drawCursorPart1(const RenderingPayload& p) +void BackendD3D::_drawCursorPart1(const RenderingPayload& p) { _cursorRects.clear(); if (!p.cursorRect) @@ -1094,8 +1119,8 @@ void BackendD3D11::_drawCursorPart1(const RenderingPayload& p) return; } - const auto color = p.s->cursor->cursorColor; - const auto offset = gsl::narrow_cast(p.cursorRect.top * p.s->cellCount.x); + const auto cursorColor = p.s->cursor->cursorColor; + const auto offset = p.cursorRect.top * static_cast(p.s->cellCount.x); for (auto x1 = p.cursorRect.left; x1 < p.cursorRect.right; ++x1) { @@ -1106,27 +1131,32 @@ void BackendD3D11::_drawCursorPart1(const RenderingPayload& p) { } - auto& c0 = _cursorRects.emplace_back(CursorRect{ - i32r{ - p.s->font->cellSize.x * x0, - p.s->font->cellSize.y * p.cursorRect.top, - p.s->font->cellSize.x * x1, - p.s->font->cellSize.y * p.cursorRect.bottom, - }, - color == 0xffffffff ? bg ^ 0x3f3f3f : color, - }); + const i16x2 position{ + p.s->font->cellSize.x * x0, + p.s->font->cellSize.y * p.cursorRect.top, + }; + const u16x2 size{ + static_cast(p.s->font->cellSize.x * (x1 - x0)), + p.s->font->cellSize.y, + }; + const auto color = cursorColor == 0xffffffff ? bg ^ 0x3f3f3f : cursorColor; + auto& c0 = _cursorRects.emplace_back(CursorRect{ position, size, color }); switch (static_cast(p.s->cursor->cursorType)) { case CursorType::Legacy: - c0.rect.top = c0.rect.bottom - ((c0.rect.bottom - c0.rect.top) * p.s->cursor->heightPercentage + 50) / 100; + { + const auto height = (c0.size.y * p.s->cursor->heightPercentage + 50) / 100; + c0.position.y += c0.size.y - height; + c0.size.y = height; break; + } case CursorType::VerticalBar: - c0.rect.right = c0.rect.left + p.s->font->thinLineWidth; + c0.size.x = p.s->font->thinLineWidth; break; case CursorType::Underscore: - c0.rect.top += p.s->font->underlinePos; - c0.rect.bottom = c0.rect.top + p.s->font->underlineWidth; + c0.position.y += p.s->font->underlinePos; + c0.size.y = p.s->font->underlineWidth; break; case CursorType::EmptyBox: { @@ -1134,19 +1164,25 @@ void BackendD3D11::_drawCursorPart1(const RenderingPayload& p) if (x0 == p.cursorRect.left) { auto& c = _cursorRects.emplace_back(c0); - c.rect.top += p.s->font->thinLineWidth; - c.rect.bottom -= p.s->font->thinLineWidth; - c.rect.right = c.rect.left + p.s->font->thinLineWidth; + // Make line a little shorter vertically so it doesn't overlap with the top/bottom horizontal lines. + c.position.y += p.s->font->thinLineWidth; + c.size.y -= 2 * p.s->font->thinLineWidth; + // The actual adjustment... + c.size.x = p.s->font->thinLineWidth; } if (x1 == p.cursorRect.right) { auto& c = _cursorRects.emplace_back(c0); - c.rect.top += p.s->font->thinLineWidth; - c.rect.bottom -= p.s->font->thinLineWidth; - c.rect.left = c.rect.right - p.s->font->thinLineWidth; + // Make line a little shorter vertically so it doesn't overlap with the top/bottom horizontal lines. + c.position.y += p.s->font->thinLineWidth; + c.size.y -= 2 * p.s->font->thinLineWidth; + // The actual adjustment... + c.position.x += c.size.x - p.s->font->thinLineWidth; + c.size.x = p.s->font->thinLineWidth; } - c0.rect.bottom = c0.rect.top + p.s->font->thinLineWidth; - c1.rect.top = c1.rect.bottom - p.s->font->thinLineWidth; + c0.size.y = p.s->font->thinLineWidth; + c1.position.y += c1.size.y - p.s->font->thinLineWidth; + c1.size.y = p.s->font->thinLineWidth; break; } case CursorType::FullBox: @@ -1154,10 +1190,10 @@ void BackendD3D11::_drawCursorPart1(const RenderingPayload& p) case CursorType::DoubleUnderscore: { auto& c1 = _cursorRects.emplace_back(c0); - c0.rect.top += p.s->font->doubleUnderlinePos.x; - c0.rect.bottom = c0.rect.top + p.s->font->thinLineWidth; - c1.rect.top += p.s->font->doubleUnderlinePos.y; - c1.rect.bottom = c1.rect.top + p.s->font->thinLineWidth; + c0.position.y += p.s->font->doubleUnderlinePos.x; + c0.size.y = p.s->font->thinLineWidth; + c1.position.y += p.s->font->doubleUnderlinePos.y; + c1.size.y = p.s->font->thinLineWidth; break; } default: @@ -1165,17 +1201,17 @@ void BackendD3D11::_drawCursorPart1(const RenderingPayload& p) } } - if (color == 0xffffffff) + if (cursorColor == 0xffffffff) { for (auto& c : _cursorRects) { - _appendQuad(c.rect, c.color, ShadingType::SolidFill); + _appendQuad(c.position, c.size, c.color, ShadingType::SolidFill); c.color = 0xffffffff; } } } -void BackendD3D11::_drawCursorPart2(const RenderingPayload& p) +void BackendD3D::_drawCursorPart2(const RenderingPayload& p) { if (!p.cursorRect) { @@ -1192,7 +1228,7 @@ void BackendD3D11::_drawCursorPart2(const RenderingPayload& p) for (const auto& c : _cursorRects) { - _appendQuad(c.rect, c.color, ShadingType::SolidFill); + _appendQuad(c.position, c.size, c.color, ShadingType::SolidFill); } if (color == 0xffffffff) @@ -1202,7 +1238,7 @@ void BackendD3D11::_drawCursorPart2(const RenderingPayload& p) } } -void BackendD3D11::_drawSelection(const RenderingPayload& p) +void BackendD3D::_drawSelection(const RenderingPayload& p) { u16 y = 0; u16 lastFrom = 0; @@ -1220,13 +1256,15 @@ void BackendD3D11::_drawSelection(const RenderingPayload& p) } else { - const i32r rect{ + const i16x2 position{ p.s->font->cellSize.x * row.selectionFrom, p.s->font->cellSize.y * y, - p.s->font->cellSize.x * row.selectionTo, - p.s->font->cellSize.y * (y + 1), }; - _appendQuad(rect, p.s->misc->selectionColor, ShadingType::SolidFill); + const u16x2 size{ + (p.s->font->cellSize.x * (row.selectionTo - row.selectionFrom)), + p.s->font->cellSize.y, + }; + _appendQuad(position, size, p.s->misc->selectionColor, ShadingType::SolidFill); lastFrom = row.selectionFrom; lastTo = row.selectionTo; } @@ -1236,7 +1274,7 @@ void BackendD3D11::_drawSelection(const RenderingPayload& p) } } -void BackendD3D11::_executeCustomShader(RenderingPayload& p) +void BackendD3D::_executeCustomShader(RenderingPayload& p) { { CustomConstBuffer data; diff --git a/src/renderer/atlas/BackendD3D11.h b/src/renderer/atlas/BackendD3D.h similarity index 85% rename from src/renderer/atlas/BackendD3D11.h rename to src/renderer/atlas/BackendD3D.h index 94cca1b941b..6f238a26d3f 100644 --- a/src/renderer/atlas/BackendD3D11.h +++ b/src/renderer/atlas/BackendD3D.h @@ -7,9 +7,9 @@ namespace Microsoft::Console::Render::Atlas { - struct BackendD3D11 : IBackend + struct BackendD3D : IBackend { - BackendD3D11(wil::com_ptr device, wil::com_ptr deviceContext); + BackendD3D(wil::com_ptr device, wil::com_ptr deviceContext); void Render(RenderingPayload& payload) override; bool RequiresContinuousRedraw() noexcept override; @@ -52,7 +52,7 @@ namespace Microsoft::Console::Render::Atlas #pragma warning(suppress : 4324) // 'CustomConstBuffer': structure was padded due to alignment specifier }; - enum class ShadingType + enum class ShadingType : u32 { Background = 0, TextGrayscale, @@ -64,9 +64,14 @@ namespace Microsoft::Console::Render::Atlas struct QuadInstance { + // `position` might clip outside of the bounds of the viewport and so it needs to be a + // signed coordinate. i16x2 is used as the size of the instance buffer made the largest + // impact on performance and power draw. If (when?) displays with >32k resolution make their + // appearance in the future, this should be changed to f32x2. But if you do so, please change + // all other occurrences of i16x2 positions/offsets throughout the class to keep it consistent. alignas(sizeof(i16x2)) i16x2 position; - alignas(sizeof(i16x2)) i16x2 size; - alignas(sizeof(i16x2)) i16x2 texcoord; + alignas(sizeof(i16x2)) u16x2 size; + alignas(sizeof(i16x2)) u16x2 texcoord; alignas(sizeof(u32)) u32 shadingType = 0; alignas(sizeof(u32)) u32 color = 0; }; @@ -79,10 +84,10 @@ namespace Microsoft::Console::Render::Atlas IDWriteFontFace* fontFace = nullptr; u16 glyphIndex = 0; u16 shadingType = 0; - i32x2 offset; - i32r texcoord; + i16x2 offset; + u16x2 size; + u16x2 texcoord; }; - static_assert(sizeof(GlyphCacheEntry) == 40); struct GlyphCacheMap { @@ -111,7 +116,6 @@ namespace Microsoft::Console::Render::Atlas size_t _size = 0; }; - void _debugUpdateShaders() noexcept; __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); void _recreateCustomShader(const RenderingPayload& p); void _recreateCustomRenderTargetView(u16x2 targetSize); @@ -119,12 +123,13 @@ namespace Microsoft::Console::Render::Atlas void _recreateBackgroundColorBitmap(u16x2 cellCount); void _recreateConstBuffer(const RenderingPayload& p); void _setupDeviceContextState(const RenderingPayload& p); + void _debugUpdateShaders(const RenderingPayload& p) noexcept; void _d2dBeginDrawing() noexcept; void _d2dEndDrawing(); void _resetGlyphAtlasAndBeginDraw(const RenderingPayload& p); QuadInstance& _getLastQuad() noexcept; - void _appendQuad(i32r position, u32 color, ShadingType shadingType); - void _appendQuad(i32r position, i32r texcoord, u32 color, ShadingType shadingType); + void _appendQuad(i16x2 position, u16x2 size, u32 color, ShadingType shadingType); + void _appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 color, ShadingType shadingType); __declspec(noinline) void _bumpInstancesSize(); void _flushQuads(const RenderingPayload& p); __declspec(noinline) void _recreateInstanceBuffers(const RenderingPayload& p); @@ -197,7 +202,8 @@ namespace Microsoft::Console::Render::Atlas // background colors on each side results in 6 lines being drawn. struct CursorRect { - i32r rect; + i16x2 position; + u16x2 size; u32 color = 0; }; til::small_vector _cursorRects; diff --git a/src/renderer/atlas/atlas.vcxproj b/src/renderer/atlas/atlas.vcxproj index d46028a2130..a05edf3892d 100644 --- a/src/renderer/atlas/atlas.vcxproj +++ b/src/renderer/atlas/atlas.vcxproj @@ -15,7 +15,7 @@ - + @@ -27,7 +27,7 @@ - + @@ -94,4 +94,4 @@ $(SolutionDir)\oss\stb;$(OutDir)$(ProjectName);%(AdditionalIncludeDirectories) - \ No newline at end of file + diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 41170b412e3..3c9684d5911 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -9,26 +9,6 @@ namespace Microsoft::Console::Render::Atlas { -#define ATLAS_POD_OPS(type) \ - constexpr auto operator<=>(const type&) const noexcept = default; \ - \ - constexpr bool operator==(const type& rhs) const noexcept \ - { \ - if constexpr (std::has_unique_object_representations_v) \ - { \ - return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0; \ - } \ - else \ - { \ - return std::is_eq(*this <=> rhs); \ - } \ - } \ - \ - constexpr bool operator!=(const type& rhs) const noexcept \ - { \ - return !(*this == rhs); \ - } - #define ATLAS_FLAG_OPS(type, underlying) \ friend constexpr type operator~(type v) noexcept \ { \ @@ -59,6 +39,19 @@ namespace Microsoft::Console::Render::Atlas lhs = lhs ^ rhs; \ } +#define ATLAS_POD_OPS(type) \ + constexpr auto operator<=>(const type&) const noexcept = default; \ + \ + constexpr bool operator==(const type& rhs) const noexcept \ + { \ + return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0; \ + } \ + \ + constexpr bool operator!=(const type& rhs) const noexcept \ + { \ + return !(*this == rhs); \ + } + template struct vec2 { @@ -89,9 +82,14 @@ namespace Microsoft::Console::Render::Atlas ATLAS_POD_OPS(rect) + constexpr bool empty() const noexcept + { + return (left >= right) || (top >= bottom); + } + constexpr bool non_empty() const noexcept { - return (left < right) & (top < bottom); + return (left < right) && (top < bottom); } }; @@ -320,17 +318,6 @@ namespace Microsoft::Console::Render::Atlas struct Settings { - static auto invalidated() noexcept - { - return til::generational{ - til::generation_t{ 1 }, - til::generational{ til::generation_t{ 1 } }, - til::generational{ til::generation_t{ 1 } }, - til::generational{ til::generation_t{ 1 } }, - til::generational{ til::generation_t{ 1 } }, - }; - } - til::generational target; til::generational font; til::generational cursor; diff --git a/src/renderer/atlas/shader_common.hlsl b/src/renderer/atlas/shader_common.hlsl index 82049a0c3d0..15b603e1de3 100644 --- a/src/renderer/atlas/shader_common.hlsl +++ b/src/renderer/atlas/shader_common.hlsl @@ -10,14 +10,12 @@ #define SHADING_TYPE_SOLID_FILL 5 // clang-format on -// Structured Buffers are tightly packed. Nvidia recommends padding them to avoid crossing 128-bit -// cache lines: https://developer.nvidia.com/content/understanding-structured-buffer-performance struct VSData { float2 vertex : SV_Position; int2 position : position; - int2 size : size; - int2 texcoord : texcoord; + uint2 size : size; + uint2 texcoord : texcoord; uint shadingType : shadingType; float4 color : color; }; From c270284b174a501801b950c7af3696b7d2fd3b2d Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 15 Mar 2023 23:28:33 +0100 Subject: [PATCH 06/37] Better dirty rect tracking and partial rerendering (WIP) --- src/inc/til/point.h | 2 + src/renderer/atlas/AtlasEngine.cpp | 71 +++++++++++++---- src/renderer/atlas/AtlasEngine.r.cpp | 6 +- src/renderer/atlas/Backend.cpp | 39 +++------- src/renderer/atlas/BackendD2D.cpp | 25 +++++- src/renderer/atlas/BackendD3D.cpp | 111 ++++++++++++++++++--------- src/renderer/atlas/BackendD3D.h | 16 +++- src/renderer/atlas/common.h | 6 +- tools/ConsoleTypes.natvis | 40 +++++----- 9 files changed, 205 insertions(+), 111 deletions(-) diff --git a/src/inc/til/point.h b/src/inc/til/point.h index f5e4d64a5d3..332b6fd8e0b 100644 --- a/src/inc/til/point.h +++ b/src/inc/til/point.h @@ -6,6 +6,8 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" { using CoordType = int32_t; + inline constexpr CoordType CoordTypeMin = INT32_MIN; + inline constexpr CoordType CoordTypeMax = INT32_MAX; namespace details { diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index b078bd7d44e..62ce8c8b416 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -91,6 +91,13 @@ try _api.scrollOffset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); } + _p.dirtyRectInPx = { + til::CoordTypeMax, + til::CoordTypeMax, + til::CoordTypeMin, + til::CoordTypeMin, + }; + // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". if (const auto offset = _api.scrollOffset) { @@ -101,7 +108,7 @@ try { // Scroll up (for instance when new text is being written at the end of the buffer). const u16 begRow = _p.s->cellCount.y + offset; - _api.invalidatedRows.x = nothingInvalid ? begRow : std::min(_api.invalidatedRows.x, begRow); + _api.invalidatedRows.x = nothingInvalid ? begRow : std::min(_api.invalidatedRows.x, begRow); _api.invalidatedRows.y = _p.s->cellCount.y; // scrollOffset/offset = -1 @@ -125,12 +132,19 @@ try d.top += deltaPx; d.bottom += deltaPx; } + + for (auto y = _api.invalidatedRows.x; y < begRow; ++y) + { + _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, _p.rows[y].top); + } + _p.dirtyRectInPx.bottom = _p.s->targetSize.y; } else { // Scroll down. + const u16 endRow = offset; _api.invalidatedRows.x = 0; - _api.invalidatedRows.y = nothingInvalid ? offset : std::max(_api.invalidatedRows.y, offset); + _api.invalidatedRows.y = nothingInvalid ? endRow : std::max(_api.invalidatedRows.y, endRow); // scrollOffset/offset = 1 // +----------+ +----------+ @@ -153,6 +167,12 @@ try d.top += deltaPx; d.bottom += deltaPx; } + + _p.dirtyRectInPx.top = 0; + for (auto y = endRow; y < _api.invalidatedRows.y; ++y) + { + _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.top, _p.rows[y].top); + } } // Scrolling the background bitmap is a lot easier because we can rely on memmove which works @@ -169,6 +189,15 @@ try memmove(dst, src, count * sizeof(u32)); } } + else + { + for (auto y = _api.invalidatedRows.x; y < _api.invalidatedRows.y; ++y) + { + const auto& r = _p.rows[y]; + _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, r.top); + _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, r.bottom); + } + } for (auto y = _api.invalidatedRows.x; y < _api.invalidatedRows.y; ++y) { @@ -176,7 +205,6 @@ try } _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _p.s->cellCount.x, _api.invalidatedRows.y }; - _p.dirtyRect = _api.dirtyRect; _p.cursorRect = {}; _p.scrollOffset = _api.scrollOffset; @@ -281,9 +309,7 @@ try const auto from = gsl::narrow_cast(clamp(coordTarget.x, 0, _p.s->cellCount.x - 1)); const auto to = gsl::narrow_cast(clamp(coordTarget.x + cchLine, from, _p.s->cellCount.x)); const auto fg = gsl::narrow_cast(color) | 0xff000000; - auto& row = _p.rows[y]; - - row.gridLineRanges.emplace_back(lines, fg, from, to); + _p.rows[y].gridLineRanges.emplace_back(lines, fg, from, to); return S_OK; } CATCH_RETURN() @@ -299,11 +325,15 @@ try const auto y = gsl::narrow_cast(clamp(rect.top, 0, _p.s->cellCount.y)); const auto from = gsl::narrow_cast(clamp(rect.left, 0, _p.s->cellCount.x - 1)); const auto to = gsl::narrow_cast(clamp(rect.right, from, _p.s->cellCount.x)); - auto& row = _p.rows[y]; + auto& row = _p.rows[y]; row.selectionFrom = from; row.selectionTo = to; - _p.dirtyRect |= rect; + + _p.dirtyRectInPx.left = std::min(_p.dirtyRectInPx.left, from * _p.s->font->cellSize.x); + _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, y * _p.s->font->cellSize.y); + _p.dirtyRectInPx.right = std::max(_p.dirtyRectInPx.right, to * _p.s->font->cellSize.x); + _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, _p.dirtyRectInPx.top + _p.s->font->cellSize.y); return S_OK; } CATCH_RETURN() @@ -332,7 +362,10 @@ try // Clear the previous cursor if (const auto r = _api.invalidatedCursorArea; r.non_empty()) { - _p.dirtyRect |= til::rect{ r.left, r.top, r.right, r.bottom }; + _p.dirtyRectInPx.left = std::min(_p.dirtyRectInPx.left, r.left * _p.s->font->cellSize.x); + _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, r.top * _p.s->font->cellSize.y); + _p.dirtyRectInPx.right = std::max(_p.dirtyRectInPx.right, r.right * _p.s->font->cellSize.x); + _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, r.bottom * _p.s->font->cellSize.y); } if (options.isOn) @@ -340,13 +373,16 @@ try const auto point = options.coordCursor; // TODO: options.coordCursor can contain invalid out of bounds coordinates when // the window is being resized and the cursor is on the last line of the viewport. - const auto x = gsl::narrow_cast(clamp(point.x, 0, _p.s->cellCount.x - 1)); - const auto y = gsl::narrow_cast(clamp(point.y, 0, _p.s->cellCount.y - 1)); + const auto x = gsl::narrow_cast(clamp(point.x, 0, _p.s->cellCount.x - 1)); + const auto y = gsl::narrow_cast(clamp(point.y, 0, _p.s->cellCount.y - 1)); const auto cursorWidth = 1 + (options.fIsDoubleWidth & (options.cursorType != CursorType::VerticalBar)); - const auto right = gsl::narrow_cast(clamp(x + cursorWidth, 0, _p.s->cellCount.x - 0)); - const auto bottom = gsl::narrow_cast(y + 1); + const auto right = gsl::narrow_cast(clamp(x + cursorWidth, 0, _p.s->cellCount.x - 0)); + const auto bottom = gsl::narrow_cast(y + 1); _p.cursorRect = { x, y, right, bottom }; - _p.dirtyRect |= til::rect{ x, y, right, bottom }; + _p.dirtyRectInPx.left = std::min(_p.dirtyRectInPx.left, x * _p.s->font->cellSize.x); + _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, y * _p.s->font->cellSize.y); + _p.dirtyRectInPx.right = std::max(_p.dirtyRectInPx.right, right * _p.s->font->cellSize.x); + _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, bottom * _p.s->font->cellSize.y); } return S_OK; @@ -410,6 +446,13 @@ void AtlasEngine::_handleSettingsUpdate() } _api.invalidatedRows = invalidatedRowsAll; + + u16 y = 0; + for (auto& r : _p.rows) + { + r.clear(y, _p.s->font->cellSize.y); + ++y; + } } void AtlasEngine::_recreateFontDependentResources() diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index e0d8c0e1fec..8c5a4693ec1 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -32,7 +32,7 @@ using namespace Microsoft::Console::Render::Atlas; [[nodiscard]] HRESULT AtlasEngine::Present() noexcept try { - if (!_p.dirtyRect) + if (!_p.dirtyRectInPx) { return S_OK; } @@ -126,7 +126,7 @@ void AtlasEngine::_recreateBackend() auto d2dMode = debugForceD2DMode; auto deviceFlags = D3D11_CREATE_DEVICE_SINGLETHREADED #ifndef NDEBUG - | D3D11_CREATE_DEVICE_DEBUG + //| D3D11_CREATE_DEVICE_DEBUG #endif // This flag prevents the driver from creating a large thread pool for things like shader computations // that would be advantageous for games. For us this has only a minimal performance benefit, @@ -147,8 +147,6 @@ void AtlasEngine::_recreateBackend() THROW_IF_FAILED(_p.dxgiFactory->EnumAdapters1(index++, dxgiAdapter.put())); THROW_IF_FAILED(dxgiAdapter->GetDesc1(&desc)); - - // If useSoftwareRendering is false we exit during the first iteration. Using the default adapter (index 0) // is the right thing to do under most circumstances, unless you _really_ want to get your hands dirty. // The alternative is to track the window rectangle in respect to all IDXGIOutputs and select the right diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index 46cc143a72c..67649202320 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -23,42 +23,23 @@ wil::com_ptr SwapChainManager::GetBuffer() const void SwapChainManager::Present(const RenderingPayload& p) { - const til::rect fullRect{ 0, 0, p.s->cellCount.x, p.s->cellCount.y }; + const til::rect fullRect{ 0, 0, _targetSize.x, _targetSize.y }; DXGI_PRESENT_PARAMETERS params{}; - RECT dirtyRect{}; RECT scrollRect{}; POINT scrollOffset{}; - if (p.dirtyRect != fullRect) - { - dirtyRect = p.dirtyRect.to_win32_rect(); - dirtyRect.left *= p.s->font->cellSize.x; - dirtyRect.top *= p.s->font->cellSize.y; - dirtyRect.right *= p.s->font->cellSize.x; - dirtyRect.bottom *= p.s->font->cellSize.y; - - // This block will enlarge the dirtyRect to handle glyphs that overlap their rows vertically. - const auto actualDirtyTop = gsl::at(p.rows, p.dirtyRect.top).top; - const auto actualDirtyBottom = gsl::at(p.rows, gsl::narrow_cast(p.dirtyRect.bottom) - 1).bottom; - // Since rows might be taller than their cells, they might have drawn outside of the viewport. - // FYI using std::clamp() here would be dangerous. If std::clamp() is given a "min" that is greater - // than "max" it'll return min, but our calculation of .bottom wants to do the exact opposite. - dirtyRect.top = std::max(std::min(dirtyRect.top, LONG{ actualDirtyTop }), 0l); - dirtyRect.bottom = std::min(std::max(dirtyRect.bottom, LONG{ actualDirtyBottom }), LONG{ _targetSize.y }); - // The swap chain might have a different size than the TextBuffer (due to the renderer running asynchronously) and so - // we have to ensure to clamp the bottom/right coordinates into _targetSize. The above already did so for bottom. - dirtyRect.right = std::min(dirtyRect.right, LONG{ _targetSize.x }); - - // If a row of text has been changed, it's width will equal the full rect. In that case we should - // also redraw the margin on the right, as overlapping glyphs might have previously drawn into it. - if (p.dirtyRect.left == fullRect.left && p.dirtyRect.right == fullRect.right) - { - dirtyRect.right = _targetSize.x; - } + // Since rows might be taller than their cells, they might have drawn outside of the viewport. + auto dirtyRect = p.dirtyRectInPx; + dirtyRect.left = std::max(dirtyRect.left, 0); + dirtyRect.top = std::max(dirtyRect.top, 0); + dirtyRect.right = std::min(dirtyRect.right, til::CoordType{ _targetSize.x }); + dirtyRect.bottom = std::min(dirtyRect.bottom, til::CoordType{ _targetSize.y }); + if (dirtyRect != fullRect) + { params.DirtyRectsCount = 1; - params.pDirtyRects = &dirtyRect; + params.pDirtyRects = dirtyRect.as_win32_rect(); if (p.scrollOffset) { diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 1e2812dc7f4..99b5ddcd475 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -142,6 +142,9 @@ void BackendD2D::_drawBackground(const RenderingPayload& p) noexcept void BackendD2D::_drawText(RenderingPayload& p) { + til::CoordType dirtyTop = til::CoordTypeMax; + til::CoordType dirtyBottom = til::CoordTypeMin; + // It is possible to create a "_foregroundBrush" similar to how the `_backgroundBrush` is created and // use that as the brush for text rendering below. That way we wouldn't have to search `row.colors` for color // changes and could draw entire lines of text in a single call. Unfortunately Direct2D is not particularly @@ -152,6 +155,12 @@ void BackendD2D::_drawText(RenderingPayload& p) u16 y = 0; for (auto& row : p.rows) { + if (row.top > p.dirtyRectInPx.bottom || row.bottom < p.dirtyRectInPx.top) + { + ++y; + continue; + } + f32 baselineX = 0.0f; for (const auto& m : row.mappings) @@ -197,7 +206,17 @@ void BackendD2D::_drawText(RenderingPayload& p) } while (it != end); } - y++; + dirtyTop = std::min(dirtyTop, row.top); + dirtyBottom = std::max(dirtyBottom, row.bottom); + ++y; + } + + if (dirtyTop < dirtyBottom) + { + p.dirtyRectInPx.left = 0; + p.dirtyRectInPx.top = std::min(p.dirtyRectInPx.top, dirtyTop); + p.dirtyRectInPx.right = p.s->targetSize.x; + p.dirtyRectInPx.bottom = std::max(p.dirtyRectInPx.bottom, dirtyBottom); } } @@ -225,7 +244,7 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& ro const auto pxToDIP = [&](til::CoordType i) { return i * p.d.font.dipPerPixel; }; - + const auto top = rowToDIP(y); const auto bottom = top + p.d.font.cellSizeDIP.y; const auto thinLineWidth = pxToDIP(p.s->font->thinLineWidth); @@ -321,7 +340,7 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& ro void BackendD2D::_drawCursor(const RenderingPayload& p) { - if (!p.cursorRect) + if (p.cursorRect.empty()) { return; } diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index ba20c1419cd..d41c4db4bda 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -112,7 +112,9 @@ void BackendD3D::GlyphCacheMap::_bumpSize() { const auto newMapSize = _map.size() * 2; const auto newMapMask = newMapSize - 1; - FAIL_FAST_IF(newMapSize >= INT32_MAX); // overflow/truncation protection + + static constexpr auto sizeLimit = std::numeric_limits::max() / 2; + THROW_HR_IF_MSG(E_OUTOFMEMORY, newMapSize >= sizeLimit, "GlyphCacheMap overflow"); auto newMap = Buffer(newMapSize); @@ -275,7 +277,7 @@ void BackendD3D::Render(RenderingPayload& p) { _handleSettingsUpdate(p); } - + #ifndef NDEBUG _debugUpdateShaders(p); #endif @@ -684,7 +686,7 @@ try { this->*filesPS[i].target = std::move(compiledPS[i]); } - + _setupDeviceContextState(p); } CATCH_LOG() @@ -746,9 +748,10 @@ void BackendD3D::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) { const auto surface = _glyphAtlas.query(); - D2D1_RENDER_TARGET_PROPERTIES props{}; - props.type = D2D1_RENDER_TARGET_TYPE_DEFAULT; - props.pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }; + const D2D1_RENDER_TARGET_PROPERTIES props{ + .type = D2D1_RENDER_TARGET_TYPE_DEFAULT, + .pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }, + }; wil::com_ptr renderTarget; THROW_IF_FAILED(p.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); _d2dRenderTarget = renderTarget.query(); @@ -780,10 +783,18 @@ void BackendD3D::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) _d2dRenderTarget->Clear(); } +void BackendD3D::_markStateChange(ID3D11BlendState* blendState) +{ + _instancesStateChanges.emplace_back(StateChange{ + .blendState = blendState, + .offset = _instancesCount, + }); +} + BackendD3D::QuadInstance& BackendD3D::_getLastQuad() noexcept { - assert(_instancesSize != 0); - return _instances[_instancesSize - 1]; + assert(_instancesCount != 0); + return _instances[_instancesCount - 1]; } void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u32 color, ShadingType shadingType) @@ -793,12 +804,12 @@ void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u32 color, ShadingType void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 color, ShadingType shadingType) { - if (_instancesSize >= _instances.size()) + if (_instancesCount >= _instances.size()) { _bumpInstancesSize(); } - _instances[_instancesSize++] = QuadInstance{ position, size, texcoord, static_cast(shadingType), color }; + _instances[_instancesCount++] = QuadInstance{ position, size, texcoord, static_cast(shadingType), color }; } void BackendD3D::_bumpInstancesSize() @@ -808,12 +819,12 @@ void BackendD3D::_bumpInstancesSize() void BackendD3D::_flushQuads(const RenderingPayload& p) { - if (!_instancesSize) + if (!_instancesCount) { return; } - if (_instancesSize > _instanceBufferSize) + if (_instancesCount > _instanceBufferCapacity) { _recreateInstanceBuffers(p); } @@ -821,7 +832,7 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) { D3D11_MAPPED_SUBRESOURCE mapped{}; THROW_IF_FAILED(_deviceContext->Map(_instanceBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - memcpy(mapped.pData, _instances.data(), _instancesSize * sizeof(QuadInstance)); + memcpy(mapped.pData, _instances.data(), _instancesCount * sizeof(QuadInstance)); _deviceContext->Unmap(_instanceBuffer.get(), 0); } @@ -843,9 +854,26 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) // instancing with ~10k instances appears to be about 50% faster and so DrawInstanced was chosen. // Instead I found that packing instance data as tightly as possible made the biggest performance difference, // and packing 16 bit integers with ID3D11InputLayout is quite a bit more convenient too. - _deviceContext->DrawIndexedInstanced(6, _instancesSize, 0, 0, 0); - _instancesSize = 0; + // This will cause the loop below to emit one final DrawIndexedInstanced() for the remainder of instances. + _markStateChange(nullptr); + + size_t previousOffset = 0; + for (const auto& state : _instancesStateChanges) + { + if (const auto count = state.offset - previousOffset) + { + _deviceContext->DrawIndexedInstanced(6, count, 0, 0, previousOffset); + } + if (state.blendState) + { + _deviceContext->OMSetBlendState(state.blendState, nullptr, 0xffffffff); + } + previousOffset = state.offset; + } + + _instancesStateChanges.clear(); + _instancesCount = 0; } void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) @@ -854,16 +882,16 @@ void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) // While the viewport size of the terminal is probably a good initial estimate for the amount of instances we'll see, // I feel like we should ensure that the estimate doesn't exceed the limit for a DXGI_FORMAT_R16_UINT index buffer. const auto estimatedInstances = std::min(R16max / 4, static_cast(p.s->cellCount.x) * p.s->cellCount.y); - const auto minSize = std::max(_instancesSize, estimatedInstances); + const auto minSize = std::max(_instancesCount, estimatedInstances); // std::bit_ceil will result in a nice exponential growth curve. I don't know exactly how structured buffers are treated // by various drivers, but I'm assuming that they prefer buffer sizes that are close to power-of-2 sizes as well. - const auto newInstancesSize = std::bit_ceil(minSize * sizeof(QuadInstance)) / sizeof(QuadInstance); + const auto newInstancesCapacity = std::bit_ceil(minSize * sizeof(QuadInstance)) / sizeof(QuadInstance); _instanceBuffer.reset(); { D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = gsl::narrow(newInstancesSize * sizeof(QuadInstance)); + desc.ByteWidth = gsl::narrow(newInstancesCapacity * sizeof(QuadInstance)); desc.Usage = D3D11_USAGE_DYNAMIC; desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; @@ -877,7 +905,7 @@ void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) static constexpr UINT offsets[]{ 0, 0 }; _deviceContext->IASetVertexBuffers(0, 2, &vertexBuffers[0], &strides[0], &offsets[0]); - _instanceBufferSize = newInstancesSize; + _instanceBufferCapacity = newInstancesCapacity; } void BackendD3D::_drawBackground(const RenderingPayload& p) @@ -893,14 +921,8 @@ void BackendD3D::_drawBackground(const RenderingPayload& p) } _deviceContext->Unmap(_backgroundBitmap.get(), 0); } - // In testing I found that on my AMD GPU separating the background pass out from the rest - // improves performance by ~20%, if a fullscreen triangle is used for it. However, I felt - // like the added code didn't justify the improvement (6.4% -> 5.2% GPU load at 60 FPS), - // given that AGS_PRIMITIVE_TOPOLOGY_SCREENRECTLIST and AGS_PRIMITIVE_TOPOLOGY_QUADLIST exist - // and would serve us much better. Finally, Chromium is still ~2.5x faster than us. - { - _appendQuad({}, p.s->targetSize, 0, ShadingType::Background); - } + + _appendQuad({}, p.s->targetSize, 0, ShadingType::Background); } void BackendD3D::_drawText(RenderingPayload& p) @@ -911,10 +933,18 @@ void BackendD3D::_drawText(RenderingPayload& p) _resetGlyphAtlasNeeded = false; } - u16 y = 0; + til::CoordType dirtyTop = til::CoordTypeMax; + til::CoordType dirtyBottom = til::CoordTypeMin; + u16 y = 0; for (auto& row : p.rows) { + if (row.top >= p.dirtyRectInPx.bottom || row.bottom <= p.dirtyRectInPx.top) + { + ++y; + continue; + } + const auto baselineY = y * p.d.font.cellSizeDIP.y + p.s->font->baselineInDIP; f32 cumulativeAdvance = 0; @@ -951,9 +981,19 @@ void BackendD3D::_drawText(RenderingPayload& p) } } + dirtyTop = std::min(dirtyTop, row.top); + dirtyBottom = std::max(dirtyBottom, row.bottom); ++y; } + if (dirtyTop < dirtyBottom) + { + p.dirtyRectInPx.left = 0; + p.dirtyRectInPx.top = std::min(p.dirtyRectInPx.top, dirtyTop); + p.dirtyRectInPx.right = p.s->targetSize.x; + p.dirtyRectInPx.bottom = std::max(p.dirtyRectInPx.bottom, dirtyBottom); + } + _d2dEndDrawing(); } @@ -1026,7 +1066,7 @@ void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& ro { // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. assert(r.lines.any()); - + const auto left = r.from * p.s->font->cellSize.x; const auto width = (r.to - r.from) * p.s->font->cellSize.x; i16x2 position; @@ -1114,7 +1154,8 @@ void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& ro void BackendD3D::_drawCursorPart1(const RenderingPayload& p) { _cursorRects.clear(); - if (!p.cursorRect) + + if (p.cursorRect.empty()) { return; } @@ -1213,7 +1254,7 @@ void BackendD3D::_drawCursorPart1(const RenderingPayload& p) void BackendD3D::_drawCursorPart2(const RenderingPayload& p) { - if (!p.cursorRect) + if (_cursorRects.empty()) { return; } @@ -1222,8 +1263,7 @@ void BackendD3D::_drawCursorPart2(const RenderingPayload& p) if (color == 0xffffffff) { - _flushQuads(p); - _deviceContext->OMSetBlendState(_blendStateInvert.get(), nullptr, 0xffffffff); + _markStateChange(_blendStateInvert.get()); } for (const auto& c : _cursorRects) @@ -1233,8 +1273,7 @@ void BackendD3D::_drawCursorPart2(const RenderingPayload& p) if (color == 0xffffffff) { - _flushQuads(p); - _deviceContext->OMSetBlendState(_blendState.get(), nullptr, 0xffffffff); + _markStateChange(_blendState.get()); } } @@ -1345,7 +1384,7 @@ void BackendD3D::_executeCustomShader(RenderingPayload& p) // With custom shaders, everything might be invalidated, so we have to // indirectly disable Present1() and its dirty rects this way. - p.dirtyRect = { 0, 0, p.s->cellCount.x, p.s->cellCount.y }; + p.dirtyRectInPx = { 0, 0, p.s->targetSize.x, p.s->targetSize.y }; } TIL_FAST_MATH_END diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 6f238a26d3f..e5366c44001 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -127,6 +127,7 @@ namespace Microsoft::Console::Render::Atlas void _d2dBeginDrawing() noexcept; void _d2dEndDrawing(); void _resetGlyphAtlasAndBeginDraw(const RenderingPayload& p); + void _markStateChange(ID3D11BlendState* blendState); QuadInstance& _getLastQuad() noexcept; void _appendQuad(i16x2 position, u16x2 size, u32 color, ShadingType shadingType); void _appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 color, ShadingType shadingType); @@ -159,9 +160,20 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _vertexBuffer; wil::com_ptr _indexBuffer; wil::com_ptr _instanceBuffer; - size_t _instanceBufferSize = 0; + size_t _instanceBufferCapacity = 0; Buffer _instances; - size_t _instancesSize = 0; + size_t _instancesCount = 0; + + // This allows us to batch inverted cursors into the same + // _instanceBuffer upload as the rest of all other instances. + struct StateChange + { + ID3D11BlendState* blendState; + size_t offset; + }; + // 3 allows for 1 state change to _blendStateInvert, followed by 1 change back to _blendState, + // and finally 1 entry to signal the past-the-end size, as used by _flushQuads. + til::small_vector _instancesStateChanges; wil::com_ptr _customRenderTargetView; wil::com_ptr _customOffscreenTexture; diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 3c9684d5911..c8bcb0e5e19 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -420,9 +420,9 @@ namespace Microsoft::Console::Render::Atlas // Parameters which change every frame. Buffer rows; Buffer backgroundBitmap; - til::rect dirtyRect; - til::rect cursorRect; - til::CoordType scrollOffset = 0; + til::rect dirtyRectInPx; + u16r cursorRect; + i16 scrollOffset = 0; }; struct IBackend diff --git a/tools/ConsoleTypes.natvis b/tools/ConsoleTypes.natvis index 75695240871..48f950c55c7 100644 --- a/tools/ConsoleTypes.natvis +++ b/tools/ConsoleTypes.natvis @@ -38,26 +38,6 @@ {{LT({Left}, {Top}) RB({Right}, {Bottom}) In:[{Right-Left+1} x {Bottom-Top+1}] Ex:[{Right-Left} x {Bottom-Top}]}} - - Stored Glyph, go to UnicodeStorage. - {_wch,X} Single - {_wch,X} Lead - {_wch,X} Trail - - - - - _data - - - - - {{ wrap={_wrapForced} padded={_doubleBytePadded} }} - - _data - - - {{ id={_id} width={_rowWidth} }} @@ -124,4 +104,24 @@ _ptr + + + {{ size={_size} }} + + _size + + _size + _data + + + + + + {{ top={top}, bottom={bottom} }} + + + + (empty) + {(void*)fontFace}, {glyphIndex} + From 339b8924723b55d141aa7234402fff5765964d8d Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 16 Mar 2023 17:16:29 +0100 Subject: [PATCH 07/37] Mostly fix dirty rects, Reduce memory/PCIe usage --- src/inc/til/generational.h | 7 +- src/renderer/atlas/AtlasEngine.cpp | 193 ++++++++++++++++------------- src/renderer/atlas/BackendD2D.cpp | 59 +++++---- src/renderer/atlas/BackendD2D.h | 3 +- src/renderer/atlas/BackendD3D.cpp | 173 ++++++++++++++------------ src/renderer/atlas/BackendD3D.h | 8 +- src/renderer/atlas/common.h | 17 ++- 7 files changed, 255 insertions(+), 205 deletions(-) diff --git a/src/inc/til/generational.h b/src/inc/til/generational.h index d1631133251..a77f33e2e29 100644 --- a/src/inc/til/generational.h +++ b/src/inc/til/generational.h @@ -9,6 +9,11 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" { auto operator<=>(const generation_t&) const = default; + constexpr void bump() noexcept + { + _value++; + } + uint32_t _value = 0; }; @@ -42,7 +47,7 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" [[nodiscard]] constexpr T* write() noexcept { - _generation = generation_t{ _generation._value + 1u }; + _generation.bump(); return &_value; } diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 62ce8c8b416..925fe5b0c1c 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -91,90 +91,44 @@ try _api.scrollOffset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); } - _p.dirtyRectInPx = { - til::CoordTypeMax, - til::CoordTypeMax, - til::CoordTypeMin, - til::CoordTypeMin, - }; - // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". if (const auto offset = _api.scrollOffset) { const auto nothingInvalid = _api.invalidatedRows.x == _api.invalidatedRows.y; - const auto deltaPx = offset * _p.s->font->cellSize.y; if (offset < 0) { - // Scroll up (for instance when new text is being written at the end of the buffer). + // scrollOffset/offset = -1 + // +----------+ +----------+ + // | | | xxxxxxxxx| + // | xxxxxxxxx| -> |xxxxxxx | + // |xxxxxxx | | | + // +----------+ +----------+ const u16 begRow = _p.s->cellCount.y + offset; _api.invalidatedRows.x = nothingInvalid ? begRow : std::min(_api.invalidatedRows.x, begRow); _api.invalidatedRows.y = _p.s->cellCount.y; - // scrollOffset/offset = -1 - // +----------+ +----------+ - // | | | xxxxxxxxx| + dst < beg - // | xxxxxxxxx| -> |xxxxxxx | + src | < beg - offset - // |xxxxxxx | | | | v - // +----------+ +----------+ v < end - const auto beg = _p.rows.begin(); - const auto end = _p.rows.end(); - auto first = beg - offset; - auto dest = beg; - - // Same as std::move, but with std::swap to preserve std::vector allocations. - // Also, it allows to include the top/bottom adjustment. - for (; first != end; ++dest, ++first) - { - using std::swap; - auto& d = *dest; - swap(*first, d); - d.top += deltaPx; - d.bottom += deltaPx; - } - - for (auto y = _api.invalidatedRows.x; y < begRow; ++y) - { - _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, _p.rows[y].top); - } - _p.dirtyRectInPx.bottom = _p.s->targetSize.y; + const auto dst = std::copy_n(_p.rows.begin() - offset, _p.rows.size() + offset, _p.rowsScratch.begin()); + std::copy_n(_p.rows.begin(), -offset, dst); } else { - // Scroll down. + // scrollOffset/offset = 1 + // +----------+ +----------+ + // | xxxxxxxxx| | | + // |xxxxxxx | -> | xxxxxxxxx| + // | | |xxxxxxx | + // +----------+ +----------+ const u16 endRow = offset; _api.invalidatedRows.x = 0; _api.invalidatedRows.y = nothingInvalid ? endRow : std::max(_api.invalidatedRows.y, endRow); - // scrollOffset/offset = 1 - // +----------+ +----------+ - // | xxxxxxxxx| | | + src < beg - // |xxxxxxx | -> | xxxxxxxxx| | ^ - // | | |xxxxxxx | v | < end - offset - // +----------+ +----------+ + dst < end - const auto beg = _p.rows.begin(); - const auto end = _p.rows.end(); - auto last = end - offset; - auto dest = end; - - // Same as std::move_backwards, but with std::swap to preserve std::vector allocations. - // Also, it allows to include the top/bottom adjustment. - while (last != beg) - { - using std::swap; - auto& d = *--dest; - swap(*--last, d); - d.top += deltaPx; - d.bottom += deltaPx; - } - - _p.dirtyRectInPx.top = 0; - for (auto y = endRow; y < _api.invalidatedRows.y; ++y) - { - _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.top, _p.rows[y].top); - } + const auto dst = std::copy_n(_p.rows.end() - offset, offset, _p.rowsScratch.begin()); + std::copy_n(_p.rows.begin(), _p.rows.size() - offset, dst); } + std::swap(_p.rows, _p.rowsScratch); + // Scrolling the background bitmap is a lot easier because we can rely on memmove which works // with both forwards and backwards copying. It's a mystery why the STL doesn't have this. { @@ -187,27 +141,69 @@ try assert(dst >= beg && dst + count <= end); assert(src >= beg && src + count <= end); memmove(dst, src, count * sizeof(u32)); + _p.backgroundBitmapGeneration.bump(); } } - else + + _api.dirtyRect = { + 0, + _api.invalidatedRows.x, + _p.s->cellCount.x, + _api.invalidatedRows.y, + }; + + _p.dirtyRectInPx = { + til::CoordTypeMax, + til::CoordTypeMax, + til::CoordTypeMin, + til::CoordTypeMin, + }; + _p.cursorRect = {}; + _p.scrollOffset = _api.scrollOffset; + + if (_api.invalidatedRows.x != _api.invalidatedRows.y) { - for (auto y = _api.invalidatedRows.x; y < _api.invalidatedRows.y; ++y) + const auto deltaPx = _api.scrollOffset * _p.s->font->cellSize.y; + const til::CoordType targetSizeX = _p.s->targetSize.x; + const til::CoordType targetSizeY = _p.s->targetSize.y; + u16 y = 0; + + _p.dirtyRectInPx.left = 0; + _p.dirtyRectInPx.top = _api.invalidatedRows.x * _p.s->font->cellSize.y; + _p.dirtyRectInPx.right = targetSizeX; + _p.dirtyRectInPx.bottom = _api.invalidatedRows.y * _p.s->font->cellSize.y; + + for (const auto r : _p.rows) { - const auto& r = _p.rows[y]; - _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, r.top); - _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, r.bottom); + r->top += deltaPx; + r->bottom += deltaPx; + + if (y >= _api.invalidatedRows.x && y < _api.invalidatedRows.y) + { + const auto clampedTop = clamp(r->top, 0, targetSizeY); + const auto clampedBottom = clamp(r->bottom, 0, targetSizeY); + if (clampedTop != clampedBottom) + { + _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, clampedTop); + _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, clampedBottom); + } + + r->clear(y, _p.s->font->cellSize.y); + } + + ++y; } - } - for (auto y = _api.invalidatedRows.x; y < _api.invalidatedRows.y; ++y) - { - _p.rows[y].clear(y, _p.s->font->cellSize.y); + // I feel a little bit like this is a hack, but I'm not sure how to better express this. + // This ensures that we end up calling Present1() without dirty rects if the swap chain is + // recreated/resized, because DXGI requires you to then call Present1() without dirty rects. + if (_api.invalidatedRows.x == 0 && _api.invalidatedRows.y == _p.s->cellCount.y) + { + _p.dirtyRectInPx.top = 0; + _p.dirtyRectInPx.bottom = targetSizeY; + } } - _api.dirtyRect = til::rect{ 0, _api.invalidatedRows.x, _p.s->cellCount.x, _api.invalidatedRows.y }; - _p.cursorRect = {}; - _p.scrollOffset = _api.scrollOffset; - return S_OK; } CATCH_RETURN() @@ -274,11 +270,11 @@ try } const auto x = gsl::narrow_cast(clamp(coord.x, 0, _p.s->cellCount.x)); + auto column = x; // Due to the current IRenderEngine interface (that wasn't refactored yet) we need to assemble // the current buffer line first as the remaining function operates on whole lines of text. { - auto column = x; for (const auto& cluster : clusters) { for (const auto& ch : cluster.GetText()) @@ -291,13 +287,30 @@ try } _api.bufferLineColumn.emplace_back(column); + } + { std::fill(_api.colorsForeground.begin() + x, _api.colorsForeground.begin() + column, _api.currentColor.x); - std::fill_n(_p.backgroundBitmap.begin() + (static_cast(y) * _p.s->cellCount.x + x), column - x, _api.currentColor.y); } - _api.lastPaintBufferLineCoord = { x, y }; + { + const auto backgroundRow = _p.backgroundBitmap.begin() + static_cast(y) * _p.s->cellCount.x; + auto it = backgroundRow + x; + const auto end = backgroundRow + column; + const auto bg = _api.currentColor.y; + + for (; it != end; ++it) + { + if (*it != bg) + { + _p.backgroundBitmapGeneration.bump(); + std::fill(it, end, bg); + break; + } + } + } + _api.lastPaintBufferLineCoord = { x, y }; return S_OK; } CATCH_RETURN() @@ -309,7 +322,7 @@ try const auto from = gsl::narrow_cast(clamp(coordTarget.x, 0, _p.s->cellCount.x - 1)); const auto to = gsl::narrow_cast(clamp(coordTarget.x + cchLine, from, _p.s->cellCount.x)); const auto fg = gsl::narrow_cast(color) | 0xff000000; - _p.rows[y].gridLineRanges.emplace_back(lines, fg, from, to); + _p.rows[y]->gridLineRanges.emplace_back(lines, fg, from, to); return S_OK; } CATCH_RETURN() @@ -326,7 +339,7 @@ try const auto from = gsl::narrow_cast(clamp(rect.left, 0, _p.s->cellCount.x - 1)); const auto to = gsl::narrow_cast(clamp(rect.right, from, _p.s->cellCount.x)); - auto& row = _p.rows[y]; + auto& row = *_p.rows[y]; row.selectionFrom = from; row.selectionTo = to; @@ -448,9 +461,9 @@ void AtlasEngine::_handleSettingsUpdate() _api.invalidatedRows = invalidatedRowsAll; u16 y = 0; - for (auto& r : _p.rows) + for (const auto r : _p.rows) { - r.clear(y, _p.s->font->cellSize.y); + r->clear(y, _p.s->font->cellSize.y); ++y; } } @@ -513,8 +526,16 @@ void AtlasEngine::_recreateCellCountDependentResources() _api.glyphAdvances = Buffer{ projectedGlyphSize }; _api.glyphOffsets = Buffer{ projectedGlyphSize }; - _p.rows = Buffer(_p.s->cellCount.y); + _p.unorderedRows = Buffer(_p.s->cellCount.y); + _p.rowsScratch = Buffer(_p.s->cellCount.y); + _p.rows = Buffer(_p.s->cellCount.y); _p.backgroundBitmap = Buffer(static_cast(_p.s->cellCount.x) * _p.s->cellCount.y); + + auto it = _p.unorderedRows.data(); + for (auto& r : _p.rows) + { + r = it++; + } } void AtlasEngine::_flushBufferLine() @@ -532,7 +553,7 @@ void AtlasEngine::_flushBufferLine() // This would seriously blow us up otherwise. Expects(_api.bufferLineColumn.size() == _api.bufferLine.size() + 1); - auto& row = _p.rows[_api.lastPaintBufferLineCoord.y]; + auto& row = *_p.rows[_api.lastPaintBufferLineCoord.y]; wil::com_ptr mappedFontFace; diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 99b5ddcd475..6047ab455c6 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -122,6 +122,7 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) _backgroundBrush->SetExtendModeX(D2D1_EXTEND_MODE_MIRROR); _backgroundBrush->SetExtendModeY(D2D1_EXTEND_MODE_MIRROR); _backgroundBrush->SetTransform(&transform); + _backgroundBitmapGeneration = {}; } _generation = p.s.generation(); @@ -131,11 +132,16 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) void BackendD2D::_drawBackground(const RenderingPayload& p) noexcept { + if (_backgroundBitmapGeneration != p.backgroundBitmapGeneration) + { + _backgroundBitmap->CopyFromMemory(nullptr, p.backgroundBitmap.data(), p.s->cellCount.x * 4); + _backgroundBitmapGeneration = p.backgroundBitmapGeneration; + } + // If the terminal was 120x30 cells and 1200x600 pixels large, this would draw the // background by upscaling a 120x30 pixel bitmap to fill the entire render target. const D2D1_RECT_F rect{ 0, 0, p.s->targetSize.x * p.d.font.dipPerPixel, p.s->targetSize.y * p.d.font.dipPerPixel }; _renderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_COPY); - _backgroundBitmap->CopyFromMemory(nullptr, p.backgroundBitmap.data(), p.s->cellCount.x * 4); _renderTarget->FillRectangle(&rect, _backgroundBrush.get()); _renderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_SOURCE_OVER); } @@ -146,26 +152,20 @@ void BackendD2D::_drawText(RenderingPayload& p) til::CoordType dirtyBottom = til::CoordTypeMin; // It is possible to create a "_foregroundBrush" similar to how the `_backgroundBrush` is created and - // use that as the brush for text rendering below. That way we wouldn't have to search `row.colors` for color + // use that as the brush for text rendering below. That way we wouldn't have to search `row->colors` for color // changes and could draw entire lines of text in a single call. Unfortunately Direct2D is not particularly // smart if you do this and chooses to draw the given text into a way too small offscreen texture first and // then blends it on the screen with the given bitmap brush. While this roughly doubles the performance // when drawing lots of colors, the extra latency drops performance by >10x when drawing fewer colors. // Since fewer colors are more common, I've chosen to go with regular solid-color brushes. u16 y = 0; - for (auto& row : p.rows) + for (const auto row : p.rows) { - if (row.top > p.dirtyRectInPx.bottom || row.bottom < p.dirtyRectInPx.top) - { - ++y; - continue; - } - f32 baselineX = 0.0f; - for (const auto& m : row.mappings) + for (const auto& m : row->mappings) { - const auto colorsBegin = row.colors.begin(); + const auto colorsBegin = row->colors.begin(); auto it = colorsBegin + m.glyphsFrom; const auto end = colorsBegin + m.glyphsTo; @@ -186,9 +186,9 @@ void BackendD2D::_drawText(RenderingPayload& p) .fontFace = m.fontFace.get(), .fontEmSize = m.fontEmSize, .glyphCount = gsl::narrow_cast(count), - .glyphIndices = &row.glyphIndices[off], - .glyphAdvances = &row.glyphAdvances[off], - .glyphOffsets = &row.glyphOffsets[off], + .glyphIndices = &row->glyphIndices[off], + .glyphAdvances = &row->glyphAdvances[off], + .glyphOffsets = &row->glyphOffsets[off], }; DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), { baselineX, baselineY }, &glyphRun, brush); @@ -196,8 +196,8 @@ void BackendD2D::_drawText(RenderingPayload& p) const auto blackBox = GetGlyphRunBlackBox(glyphRun, baselineX, baselineY); // Add a 1px padding to avoid inaccuracies with the blackbox measurement. // It's only an estimate based on the design size after all. - row.top = std::min(row.top, static_cast(lround(blackBox.top - 1.5f))); - row.bottom = std::max(row.bottom, static_cast(lround(blackBox.bottom + 1.5f))); + row->top = std::min(row->top, static_cast(lround(blackBox.top - 1.5f))); + row->bottom = std::max(row->bottom, static_cast(lround(blackBox.bottom + 1.5f))); for (UINT32 i = 0; i < glyphRun.glyphCount; ++i) { @@ -206,16 +206,18 @@ void BackendD2D::_drawText(RenderingPayload& p) } while (it != end); } - dirtyTop = std::min(dirtyTop, row.top); - dirtyBottom = std::max(dirtyBottom, row.bottom); + if (row->top < p.dirtyRectInPx.bottom && p.dirtyRectInPx.top < row->bottom) + { + dirtyTop = std::min(dirtyTop, row->top); + dirtyBottom = std::max(dirtyBottom, row->bottom); + } + ++y; } if (dirtyTop < dirtyBottom) { - p.dirtyRectInPx.left = 0; p.dirtyRectInPx.top = std::min(p.dirtyRectInPx.top, dirtyTop); - p.dirtyRectInPx.right = p.s->targetSize.x; p.dirtyRectInPx.bottom = std::max(p.dirtyRectInPx.bottom, dirtyBottom); } } @@ -223,9 +225,9 @@ void BackendD2D::_drawText(RenderingPayload& p) void BackendD2D::_drawGridlines(const RenderingPayload& p) { u16 y = 0; - for (const auto& row : p.rows) + for (const auto row : p.rows) { - if (!row.gridLineRanges.empty()) + if (!row->gridLineRanges.empty()) { _drawGridlineRow(p, row, y); } @@ -233,7 +235,7 @@ void BackendD2D::_drawGridlines(const RenderingPayload& p) } } -void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y) +void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y) { const auto columnToDIP = [&](til::CoordType i) { return i * p.d.font.cellSizeDIP.x; @@ -249,7 +251,7 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& ro const auto bottom = top + p.d.font.cellSizeDIP.y; const auto thinLineWidth = pxToDIP(p.s->font->thinLineWidth); - for (const auto& r : row.gridLineRanges) + for (const auto& r : row->gridLineRanges) { // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. assert(r.lines.any()); @@ -345,6 +347,9 @@ void BackendD2D::_drawCursor(const RenderingPayload& p) return; } + // Inverted cursors could be implemented in the future using + // ID2D1DeviceContext::DrawImage and D2D1_COMPOSITE_MODE_MASK_INVERT. + D2D1_RECT_F rect{ p.d.font.cellSizeDIP.x * p.cursorRect.left, p.d.font.cellSizeDIP.y * p.cursorRect.top, @@ -403,12 +408,12 @@ void BackendD2D::_drawSelection(const RenderingPayload& p) u16 y = 0; for (const auto& row : p.rows) { - if (row.selectionTo > row.selectionFrom) + if (row->selectionTo > row->selectionFrom) { const D2D1_RECT_F rect{ - p.d.font.cellSizeDIP.x * row.selectionFrom, + p.d.font.cellSizeDIP.x * row->selectionFrom, p.d.font.cellSizeDIP.y * y, - p.d.font.cellSizeDIP.x * row.selectionTo, + p.d.font.cellSizeDIP.x * row->selectionTo, p.d.font.cellSizeDIP.y * (y + 1), }; _fillRectangle(rect, p.s->misc->selectionColor); diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index 58aca1f6804..2df604e59be 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -17,7 +17,7 @@ namespace Microsoft::Console::Render::Atlas void _drawBackground(const RenderingPayload& p) noexcept; void _drawText(RenderingPayload& p); void _drawGridlines(const RenderingPayload& p); - void _drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y); + void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursor(const RenderingPayload& p); void _drawSelection(const RenderingPayload& p); ID2D1Brush* _brushWithColor(u32 color); @@ -34,6 +34,7 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _dottedStrokeStyle; wil::com_ptr _backgroundBitmap; wil::com_ptr _backgroundBrush; + til::generation_t _backgroundBitmapGeneration; u32 _brushColor = 0; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index d41c4db4bda..914a8fc0efd 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -341,7 +341,8 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) if (fontChanged) { DWrite_GetRenderParams(p.dwriteFactory.get(), &_gamma, &_cleartypeEnhancedContrast, &_grayscaleEnhancedContrast, _textRenderingParams.put()); - _resetGlyphAtlasNeeded = true; + // Clearing the atlas requires BeginDraw(), which is expensive. Defer this until we need Direct2D anyways. + _fontChangedResetGlyphAtlas = true; if (_d2dRenderTarget) { @@ -546,6 +547,7 @@ void BackendD3D::_recreateBackgroundColorBitmap(u16x2 cellCount) desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _backgroundBitmap.addressof())); THROW_IF_FAILED(_device->CreateShaderResourceView(_backgroundBitmap.get(), nullptr, _backgroundBitmapView.addressof())); + _backgroundBitmapGeneration = {}; } void BackendD3D::_d2dRenderTargetUpdateFontSettings(const FontSettings& font) noexcept @@ -710,16 +712,25 @@ void BackendD3D::_d2dEndDrawing() } } +void BackendD3D::_handleFontChangedResetGlyphAtlas(RenderingPayload& p) +{ + _fontChangedResetGlyphAtlas = false; + _resetGlyphAtlasAndBeginDraw(p); +} + void BackendD3D::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) { - // This block of code calculates the size of a power-of-2 texture that has an area larger than the targetSize - // of the swap chain. In other words for a 985x1946 pixel swap chain (area = 1916810) it would result in a u/v - // of 2048x1024 (area = 2097152). This has 2 benefits: GPUs like power-of-2 textures and it ensures that we don't - // resize the texture every time you resize the window by a pixel. Instead it only grows/shrinks by a factor of 2. - auto area = static_cast(p.s->targetSize.x) * static_cast(p.s->targetSize.y); - // The index returned by _BitScanReverse is undefined when the input is 0. We can simultaneously - // guard against this and avoid unreasonably small textures, by clamping the min. texture size. - area = std::max(uint32_t{ 256 * 256 }, area); + // The index returned by _BitScanReverse is undefined when the input is 0. We can simultaneously guard + // against that and avoid unreasonably small textures, by clamping the min. texture size to `minArea`. + static constexpr u32 minArea = 128 * 128; + const auto minAreaByFont = static_cast(p.s->font->cellSize.x) * p.s->font->cellSize.y * 64; + const auto minAreaByGrowth = static_cast(_rectPacker.width) * _rectPacker.height * 2; + const auto maxArea = static_cast(p.s->targetSize.x) * static_cast(p.s->targetSize.y); + const auto area = std::min(maxArea, std::max(minArea, std::max(minAreaByFont, minAreaByGrowth))); + // This block of code calculates the size of a power-of-2 texture that has an area larger than the given `area`. + // For instance, for an area of 985x1946 = 1916810 it would result in a u/v of 2048x1024 (area = 2097152). + // This has 2 benefits: GPUs like power-of-2 textures and it ensures that we don't resize the texture + // every time you resize the window by a pixel. Instead it only grows/shrinks by a factor of 2. unsigned long index; _BitScanReverse(&index, area - 1); const auto u = ::base::saturated_cast(1u << ((index + 2) / 2)); @@ -910,16 +921,20 @@ void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) void BackendD3D::_drawBackground(const RenderingPayload& p) { + if (_backgroundBitmapGeneration != p.backgroundBitmapGeneration) { D3D11_MAPPED_SUBRESOURCE mapped{}; THROW_IF_FAILED(_deviceContext->Map(_backgroundBitmap.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + auto data = static_cast(mapped.pData); for (size_t i = 0; i < p.s->cellCount.y; ++i) { memcpy(data, p.backgroundBitmap.data() + i * p.s->cellCount.x, p.s->cellCount.x * sizeof(u32)); data += mapped.RowPitch; } + _deviceContext->Unmap(_backgroundBitmap.get(), 0); + _backgroundBitmapGeneration = p.backgroundBitmapGeneration; } _appendQuad({}, p.s->targetSize, 0, ShadingType::Background); @@ -927,77 +942,61 @@ void BackendD3D::_drawBackground(const RenderingPayload& p) void BackendD3D::_drawText(RenderingPayload& p) { - if (_resetGlyphAtlasNeeded) + if (_fontChangedResetGlyphAtlas) { - _resetGlyphAtlasAndBeginDraw(p); - _resetGlyphAtlasNeeded = false; + _handleFontChangedResetGlyphAtlas(p); } til::CoordType dirtyTop = til::CoordTypeMax; til::CoordType dirtyBottom = til::CoordTypeMin; u16 y = 0; - for (auto& row : p.rows) + for (const auto row : p.rows) { - if (row.top >= p.dirtyRectInPx.bottom || row.bottom <= p.dirtyRectInPx.top) - { - ++y; - continue; - } - const auto baselineY = y * p.d.font.cellSizeDIP.y + p.s->font->baselineInDIP; f32 cumulativeAdvance = 0; - for (const auto& m : row.mappings) + for (const auto& m : row->mappings) { - for (auto x = m.glyphsFrom; x < m.glyphsTo; ++x) + for (auto x = m.glyphsFrom; x < m.glyphsTo; cumulativeAdvance += row->glyphAdvances[x], ++x) { bool inserted = false; - auto& entry = _glyphCache.FindOrInsert(m.fontFace.get(), row.glyphIndices[x], inserted); + auto& entry = _glyphCache.FindOrInsert(m.fontFace.get(), row->glyphIndices[x], inserted); if (inserted) { - _d2dBeginDrawing(); - - if (!_drawGlyph(p, entry, m.fontEmSize)) - { - _d2dEndDrawing(); - _flushQuads(p); - _resetGlyphAtlasAndBeginDraw(p); - --x; - continue; // retry - } + _drawGlyph(p, entry, m.fontEmSize); } if (entry.shadingType) { - const auto l = static_cast((cumulativeAdvance + row.glyphOffsets[x].advanceOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.x; - const auto t = static_cast((baselineY - row.glyphOffsets[x].ascenderOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.y; - row.top = std::min(row.top, t); - row.bottom = std::max(row.bottom, t + entry.size.y); - _appendQuad({ static_cast(l), static_cast(t) }, entry.size, entry.texcoord, row.colors[x], static_cast(entry.shadingType)); + const auto l = static_cast((cumulativeAdvance + row->glyphOffsets[x].advanceOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.x; + const auto t = static_cast((baselineY - row->glyphOffsets[x].ascenderOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.y; + row->top = std::min(row->top, t); + row->bottom = std::max(row->bottom, t + entry.size.y); + _appendQuad({ static_cast(l), static_cast(t) }, entry.size, entry.texcoord, row->colors[x], static_cast(entry.shadingType)); } - - cumulativeAdvance += row.glyphAdvances[x]; } } - dirtyTop = std::min(dirtyTop, row.top); - dirtyBottom = std::max(dirtyBottom, row.bottom); + if (row->top < p.dirtyRectInPx.bottom && p.dirtyRectInPx.top < row->bottom) + { + dirtyTop = std::min(dirtyTop, row->top); + dirtyBottom = std::max(dirtyBottom, row->bottom); + } + ++y; } if (dirtyTop < dirtyBottom) { - p.dirtyRectInPx.left = 0; p.dirtyRectInPx.top = std::min(p.dirtyRectInPx.top, dirtyTop); - p.dirtyRectInPx.right = p.s->targetSize.x; p.dirtyRectInPx.bottom = std::max(p.dirtyRectInPx.bottom, dirtyBottom); } _d2dEndDrawing(); } -bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) +void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) { DWRITE_GLYPH_RUN glyphRun{}; glyphRun.fontFace = entry.fontFace; @@ -1010,47 +1009,57 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f { // This will indicate to BackendD3D::_drawText that this glyph is whitespace. entry.shadingType = 0; - return true; + return; } - // We'll add a 1px padding on all 4 sides to avoid neighboring glyphs - // from overlapping, since the blackbox measurement is only an estimate. - // We need to use round (and not ceil/floor) to ensure we pixel-snap individual - // glyphs correctly and form a consistent baseline across an entire run of glyphs. + bool retry = false; + for (;;) + { + // We'll add a 1px padding on all 4 sides to avoid neighboring glyphs + // from overlapping, since the blackbox measurement is only an estimate. + // We need to use round (and not ceil/floor) to ensure we pixel-snap individual + // glyphs correctly and form a consistent baseline across an entire run of glyphs. + const auto l = lround(box.left * p.d.font.pixelPerDIP) - 1; + const auto t = lround(box.top * p.d.font.pixelPerDIP) - 1; + const auto r = lround(box.right * p.d.font.pixelPerDIP) + 1; + const auto b = lround(box.bottom * p.d.font.pixelPerDIP) + 1; + + stbrp_rect rect{}; + rect.w = r - l; + rect.h = b - t; + if (stbrp_pack_rects(&_rectPacker, &rect, 1)) + { + _d2dBeginDrawing(); + + const D2D1_POINT_2F baseline{ (rect.x - l) * p.d.font.dipPerPixel, (rect.y - t) * p.d.font.dipPerPixel }; + const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baseline, &glyphRun, _brush.get()); + const auto shadingType = colorGlyph ? ShadingType::Passthrough : (p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale); + + entry.shadingType = static_cast(shadingType); + entry.offset.x = l; + entry.offset.y = t; + entry.size.x = rect.w; + entry.size.y = rect.h; + entry.texcoord.x = rect.x; + entry.texcoord.y = rect.y; + return; + } - const auto l = lround(box.left * p.d.font.pixelPerDIP) - 1; - const auto t = lround(box.top * p.d.font.pixelPerDIP) - 1; - const auto r = lround(box.right * p.d.font.pixelPerDIP) + 1; - const auto b = lround(box.bottom * p.d.font.pixelPerDIP) + 1; + THROW_HR_IF_MSG(E_UNEXPECTED, retry, "BackendD3D::_drawGlyph deadlock"); - stbrp_rect rect{}; - rect.w = r - l; - rect.h = b - t; - if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) - { - return false; + _d2dEndDrawing(); + _flushQuads(p); + _resetGlyphAtlasAndBeginDraw(p); + retry = true; } - - const D2D1_POINT_2F baseline{ (rect.x - l) * p.d.font.dipPerPixel, (rect.y - t) * p.d.font.dipPerPixel }; - const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baseline, &glyphRun, _brush.get()); - const auto shadingType = colorGlyph ? ShadingType::Passthrough : (p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale); - - entry.shadingType = static_cast(shadingType); - entry.offset.x = l; - entry.offset.y = t; - entry.size.x = rect.w; - entry.size.y = rect.h; - entry.texcoord.x = rect.x; - entry.texcoord.y = rect.y; - return true; } void BackendD3D::_drawGridlines(const RenderingPayload& p) { u16 y = 0; - for (const auto& row : p.rows) + for (const auto row : p.rows) { - if (!row.gridLineRanges.empty()) + if (!row->gridLineRanges.empty()) { _drawGridlineRow(p, row, y); } @@ -1058,11 +1067,11 @@ void BackendD3D::_drawGridlines(const RenderingPayload& p) } } -void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y) +void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y) { const auto top = p.s->font->cellSize.y * y; - for (const auto& r : row.gridLineRanges) + for (const auto& r : row->gridLineRanges) { // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. assert(r.lines.any()); @@ -1285,27 +1294,27 @@ void BackendD3D::_drawSelection(const RenderingPayload& p) for (const auto& row : p.rows) { - if (row.selectionTo > row.selectionFrom) + if (row->selectionTo > row->selectionFrom) { // If the current selection line matches the previous one, we can just extend the previous quad downwards. // The way this is implemented isn't very smart, but we also don't have very many rows to iterate through. - if (row.selectionFrom == lastFrom && row.selectionTo == lastTo) + if (row->selectionFrom == lastFrom && row->selectionTo == lastTo) { _getLastQuad().size.y += p.s->font->cellSize.y; } else { const i16x2 position{ - p.s->font->cellSize.x * row.selectionFrom, + p.s->font->cellSize.x * row->selectionFrom, p.s->font->cellSize.y * y, }; const u16x2 size{ - (p.s->font->cellSize.x * (row.selectionTo - row.selectionFrom)), + (p.s->font->cellSize.x * (row->selectionTo - row->selectionFrom)), p.s->font->cellSize.y, }; _appendQuad(position, size, p.s->misc->selectionColor, ShadingType::SolidFill); - lastFrom = row.selectionFrom; - lastTo = row.selectionTo; + lastFrom = row->selectionFrom; + lastTo = row->selectionTo; } } diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index e5366c44001..68ba7d3dfc3 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -126,6 +126,7 @@ namespace Microsoft::Console::Render::Atlas void _debugUpdateShaders(const RenderingPayload& p) noexcept; void _d2dBeginDrawing() noexcept; void _d2dEndDrawing(); + void _handleFontChangedResetGlyphAtlas(RenderingPayload& p); void _resetGlyphAtlasAndBeginDraw(const RenderingPayload& p); void _markStateChange(ID3D11BlendState* blendState); QuadInstance& _getLastQuad() noexcept; @@ -136,9 +137,9 @@ namespace Microsoft::Console::Render::Atlas __declspec(noinline) void _recreateInstanceBuffers(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p); void _drawText(RenderingPayload& p); - bool _drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize); + __declspec(noinline) void _drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize); void _drawGridlines(const RenderingPayload& p); - void _drawGridlineRow(const RenderingPayload& p, const ShapedRow& row, u16 y); + void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursorPart1(const RenderingPayload& p); void _drawCursorPart2(const RenderingPayload& p); void _drawSelection(const RenderingPayload& p); @@ -186,6 +187,7 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _backgroundBitmap; wil::com_ptr _backgroundBitmapView; + til::generation_t _backgroundBitmapGeneration; wil::com_ptr _glyphAtlas; wil::com_ptr _glyphAtlasView; @@ -197,7 +199,7 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _d2dRenderTarget4; // Optional. Supported since Windows 10 14393. wil::com_ptr _brush; bool _d2dBeganDrawing = false; - bool _resetGlyphAtlasNeeded = false; + bool _fontChangedResetGlyphAtlas = false; float _gamma = 0; float _cleartypeEnhancedContrast = 0; diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index c8bcb0e5e19..d9d2a73bf43 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -399,7 +399,7 @@ namespace Microsoft::Console::Render::Atlas struct RenderingPayload { - // Parameters which are constant across backends. + //// Parameters which are constant across backends. wil::com_ptr d2dFactory; wil::com_ptr dwriteFactory; wil::com_ptr dwriteFactory4; // optional, might be nullptr @@ -410,16 +410,23 @@ namespace Microsoft::Console::Render::Atlas std::function warningCallback; std::function swapChainChangedCallback; - // Parameters which are constant for the existence of the backend. + //// Parameters which are constant for the existence of the backend. wil::com_ptr dxgiFactory; - // Parameters which change seldom. + //// Parameters which change seldom. til::generational s; Dependents d; - // Parameters which change every frame. - Buffer rows; + //// Parameters which change every frame. + // This is the backing buffer for `rows`. + Buffer unorderedRows; + // This is used as a scratch buffer during scrolling. + Buffer rowsScratch; + Buffer rows; Buffer backgroundBitmap; + // 1 ensures that the backends redraw the background, even if the background + // is entirely black, just like `backgroundBitmap` after it gets created. + til::generation_t backgroundBitmapGeneration{ 1 }; til::rect dirtyRectInPx; u16r cursorRect; i16 scrollOffset = 0; From 694daa7f24e07abd7b6709c984dbc0c3374a36ce Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 20 Mar 2023 20:09:59 +0100 Subject: [PATCH 08/37] Fix dirty area calculation, Add ATLAS_DEBUG_SHOW_DIRTY --- src/host/CursorBlinker.cpp | 4 +- src/renderer/atlas/AtlasEngine.cpp | 2 +- src/renderer/atlas/AtlasEngine.r.cpp | 4 +- src/renderer/atlas/Backend.cpp | 94 +++++-------------- src/renderer/atlas/Backend.h | 14 ++- src/renderer/atlas/BackendD2D.cpp | 73 ++++++++++++++- src/renderer/atlas/BackendD2D.h | 5 + src/renderer/atlas/BackendD3D.cpp | 109 +++++++++++++++++----- src/renderer/atlas/BackendD3D.h | 8 ++ src/renderer/atlas/DWriteTextAnalysis.cpp | 3 + src/renderer/atlas/colorbrewer.h | 35 +++++++ src/renderer/atlas/common.h | 54 ++++++++++- src/renderer/atlas/custom_shader_ps.hlsl | 3 + src/renderer/atlas/custom_shader_vs.hlsl | 3 + src/renderer/atlas/stb_rect_pack.cpp | 3 + tools/ConsoleTypes.natvis | 7 ++ 16 files changed, 307 insertions(+), 114 deletions(-) create mode 100644 src/renderer/atlas/colorbrewer.h diff --git a/src/host/CursorBlinker.cpp b/src/host/CursorBlinker.cpp index 78f072ccc94..014d4529c74 100644 --- a/src/host/CursorBlinker.cpp +++ b/src/host/CursorBlinker.cpp @@ -55,7 +55,7 @@ void CursorBlinker::SettingsChanged() noexcept { KillCaretTimer(); _uCaretBlinkTime = dwCaretBlinkTime; - SetCaretTimer(); + //SetCaretTimer(); } } @@ -66,7 +66,7 @@ void CursorBlinker::FocusEnd() const noexcept void CursorBlinker::FocusStart() const noexcept { - SetCaretTimer(); + //SetCaretTimer(); } // Routine Description: diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 925fe5b0c1c..2891172d0a9 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -69,7 +69,7 @@ try _handleSettingsUpdate(); } - if constexpr (debugDisablePartialInvalidation) + if constexpr (ATLAS_DEBUG_DISABLE_PARTIAL_INVALIDATION) { _api.invalidatedRows = invalidatedRowsAll; _api.scrollOffset = 0; diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 8c5a4693ec1..3d6c168df00 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -73,7 +73,7 @@ CATCH_RETURN() [[nodiscard]] bool AtlasEngine::RequiresContinuousRedraw() noexcept { - return debugContinuousRedraw || (_b && _b->RequiresContinuousRedraw()); + return ATLAS_DEBUG_CONTINUOUS_REDRAW || (_b && _b->RequiresContinuousRedraw()); } void AtlasEngine::WaitUntilCanRender() noexcept @@ -123,7 +123,7 @@ void AtlasEngine::_recreateBackend() // IID_PPV_ARGS doesn't work here for some reason. THROW_IF_FAILED(CreateDXGIFactory2(flags, __uuidof(_p.dxgiFactory), _p.dxgiFactory.put_void())); - auto d2dMode = debugForceD2DMode; + auto d2dMode = ATLAS_DEBUG_FORCE_D2D_MODE; auto deviceFlags = D3D11_CREATE_DEVICE_SINGLETHREADED #ifndef NDEBUG //| D3D11_CREATE_DEVICE_DEBUG diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index 67649202320..72d3e27bada 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + #include "pch.h" #include "Backend.h" @@ -36,24 +39,27 @@ void SwapChainManager::Present(const RenderingPayload& p) dirtyRect.right = std::min(dirtyRect.right, til::CoordType{ _targetSize.x }); dirtyRect.bottom = std::min(dirtyRect.bottom, til::CoordType{ _targetSize.y }); - if (dirtyRect != fullRect) + if constexpr (!ATLAS_DEBUG_SHOW_DIRTY) { - params.DirtyRectsCount = 1; - params.pDirtyRects = dirtyRect.as_win32_rect(); - - if (p.scrollOffset) + if (dirtyRect != fullRect) { - const auto offsetInPx = p.scrollOffset * p.s->font->cellSize.y; - const auto width = p.s->targetSize.x; - const auto height = p.s->cellCount.y * p.s->font->cellSize.y; - const auto top = std::max(0, offsetInPx); - const auto bottom = height + std::min(0, offsetInPx); + params.DirtyRectsCount = 1; + params.pDirtyRects = dirtyRect.as_win32_rect(); + + if (p.scrollOffset) + { + const auto offsetInPx = p.scrollOffset * p.s->font->cellSize.y; + const auto width = p.s->targetSize.x; + const auto height = p.s->cellCount.y * p.s->font->cellSize.y; + const auto top = std::max(0, offsetInPx); + const auto bottom = height + std::min(0, offsetInPx); - scrollRect = { 0, top, width, bottom }; - scrollOffset = { 0, offsetInPx }; + scrollRect = { 0, top, width, bottom }; + scrollOffset = { 0, offsetInPx }; - params.pScrollRect = &scrollRect; - params.pScrollOffset = &scrollOffset; + params.pScrollRect = &scrollRect; + params.pScrollOffset = &scrollOffset; + } } } @@ -66,7 +72,7 @@ void SwapChainManager::WaitUntilCanRender() noexcept // IDXGISwapChain2::GetFrameLatencyWaitableObject returns an auto-reset event. // Once we've waited on the event, waiting on it again will block until the timeout elapses. // _waitForPresentation guards against this. - if constexpr (!debugDisableFrameLatencyWaitableObject) + if constexpr (!ATLAS_DEBUG_DISABLE_FRAME_LATENCY_WAITABLE_OBJECT) { if (_waitForPresentation) { @@ -155,64 +161,6 @@ void SwapChainManager::_updateMatrixTransform(const RenderingPayload& p) const } } -// Returns the theoretical/design design size of the given `DWRITE_GLYPH_RUN`, relative the the given baseline origin. -f32r Microsoft::Console::Render::Atlas::GetGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY) -{ - DWRITE_FONT_METRICS fontMetrics; - glyphRun.fontFace->GetMetrics(&fontMetrics); - - std::unique_ptr glyphRunMetricsHeap; - std::array glyphRunMetricsStack; - DWRITE_GLYPH_METRICS* glyphRunMetrics = glyphRunMetricsStack.data(); - - if (glyphRun.glyphCount > glyphRunMetricsStack.size()) - { - glyphRunMetricsHeap = std::make_unique_for_overwrite(glyphRun.glyphCount); - glyphRunMetrics = glyphRunMetricsHeap.get(); - } - - glyphRun.fontFace->GetDesignGlyphMetrics(glyphRun.glyphIndices, glyphRun.glyphCount, glyphRunMetrics, false); - - f32 const fontScale = glyphRun.fontEmSize / fontMetrics.designUnitsPerEm; - f32r accumulatedBounds{ - FLT_MAX, - FLT_MAX, - FLT_MIN, - FLT_MIN, - }; - - for (uint32_t i = 0; i < glyphRun.glyphCount; ++i) - { - const auto& glyphMetrics = glyphRunMetrics[i]; - const auto glyphAdvance = glyphRun.glyphAdvances ? glyphRun.glyphAdvances[i] : glyphMetrics.advanceWidth * fontScale; - - const auto left = static_cast(glyphMetrics.leftSideBearing) * fontScale; - const auto top = static_cast(glyphMetrics.topSideBearing - glyphMetrics.verticalOriginY) * fontScale; - const auto right = static_cast(gsl::narrow_cast(glyphMetrics.advanceWidth) - glyphMetrics.rightSideBearing) * fontScale; - const auto bottom = static_cast(gsl::narrow_cast(glyphMetrics.advanceHeight) - glyphMetrics.bottomSideBearing - glyphMetrics.verticalOriginY) * fontScale; - - if (left < right && top < bottom) - { - auto glyphX = baselineX; - auto glyphY = baselineY; - if (glyphRun.glyphOffsets) - { - glyphX += glyphRun.glyphOffsets[i].advanceOffset; - glyphY -= glyphRun.glyphOffsets[i].ascenderOffset; - } - - accumulatedBounds.left = std::min(accumulatedBounds.left, left + glyphX); - accumulatedBounds.top = std::min(accumulatedBounds.top, top + glyphY); - accumulatedBounds.right = std::max(accumulatedBounds.right, right + glyphX); - accumulatedBounds.bottom = std::max(accumulatedBounds.bottom, bottom + glyphY); - } - - baselineX += glyphAdvance; - } - - return accumulatedBounds; -} - // Draws a `DWRITE_GLYPH_RUN` at `baselineOrigin` into the given `ID2D1DeviceContext`. // `d2dRenderTarget4` and `dwriteFactory4` are optional and used to draw colored glyphs. // Returns true if the `DWRITE_GLYPH_RUN` contained a color glyph. diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index a4da5a982d7..8abd023d06e 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -1,13 +1,17 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + #pragma once #include "common.h" namespace Microsoft::Console::Render::Atlas { - inline constexpr bool debugContinuousRedraw = false; - inline constexpr bool debugDisableFrameLatencyWaitableObject = false; - inline constexpr bool debugDisablePartialInvalidation = false; - inline constexpr bool debugForceD2DMode = false; +#define ATLAS_DEBUG_CONTINUOUS_REDRAW 0 +#define ATLAS_DEBUG_DISABLE_FRAME_LATENCY_WAITABLE_OBJECT 0 +#define ATLAS_DEBUG_DISABLE_PARTIAL_INVALIDATION 0 +#define ATLAS_DEBUG_FORCE_D2D_MODE 0 +#define ATLAS_DEBUG_SHOW_DIRTY 0 struct SwapChainManager { @@ -39,7 +43,7 @@ namespace Microsoft::Console::Render::Atlas void _createSwapChain(const RenderingPayload& p, IUnknown* device); void _updateMatrixTransform(const RenderingPayload& p) const; - static constexpr DXGI_SWAP_CHAIN_FLAG flags = debugDisableFrameLatencyWaitableObject ? DXGI_SWAP_CHAIN_FLAG{} : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; + static constexpr DXGI_SWAP_CHAIN_FLAG flags = ATLAS_DEBUG_DISABLE_FRAME_LATENCY_WAITABLE_OBJECT ? DXGI_SWAP_CHAIN_FLAG{} : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; wil::com_ptr _swapChain; wil::unique_handle _swapChainHandle; diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 6047ab455c6..20517803800 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + #include "pch.h" #include "BackendD2D.h" @@ -193,11 +196,11 @@ void BackendD2D::_drawText(RenderingPayload& p) DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), { baselineX, baselineY }, &glyphRun, brush); - const auto blackBox = GetGlyphRunBlackBox(glyphRun, baselineX, baselineY); + const auto blackBox = _getGlyphRunBlackBox(glyphRun, baselineX, baselineY); // Add a 1px padding to avoid inaccuracies with the blackbox measurement. // It's only an estimate based on the design size after all. - row->top = std::min(row->top, static_cast(lround(blackBox.top - 1.5f))); - row->bottom = std::max(row->bottom, static_cast(lround(blackBox.bottom + 1.5f))); + row->top = std::min(row->top, static_cast(lround(blackBox.top) - 1)); + row->bottom = std::max(row->bottom, static_cast(lround(blackBox.bottom) + 1)); for (UINT32 i = 0; i < glyphRun.glyphCount; ++i) { @@ -205,8 +208,8 @@ void BackendD2D::_drawText(RenderingPayload& p) } } while (it != end); } - - if (row->top < p.dirtyRectInPx.bottom && p.dirtyRectInPx.top < row->bottom) + + if (y >= p.invalidatedRows.x && y < p.invalidatedRows.y) { dirtyTop = std::min(dirtyTop, row->top); dirtyBottom = std::max(dirtyBottom, row->bottom); @@ -222,6 +225,66 @@ void BackendD2D::_drawText(RenderingPayload& p) } } +// Returns the theoretical/design design size of the given `DWRITE_GLYPH_RUN`, relative the the given baseline origin. +// This algorithm replicates what DirectWrite does internally to provide `IDWriteTextLayout::GetMetrics`. +f32r BackendD2D::_getGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY) +{ + DWRITE_FONT_METRICS fontMetrics; + glyphRun.fontFace->GetMetrics(&fontMetrics); + + if (glyphRun.glyphCount > _glyphMetrics.size()) + { + // Growth factor 1.5x. + auto size = _glyphMetrics.size(); + size = size + (size >> 1); + size = std::max(size, glyphRun.glyphCount); + // Overflow check. + Expects(size > _glyphMetrics.size()); + _glyphMetrics = Buffer{ size }; + } + + glyphRun.fontFace->GetDesignGlyphMetrics(glyphRun.glyphIndices, glyphRun.glyphCount, _glyphMetrics.data(), false); + + const f32 fontScale = glyphRun.fontEmSize / fontMetrics.designUnitsPerEm; + f32r accumulatedBounds{ + FLT_MAX, + FLT_MAX, + FLT_MIN, + FLT_MIN, + }; + + for (uint32_t i = 0; i < glyphRun.glyphCount; ++i) + { + const auto& glyphMetrics = _glyphMetrics[i]; + const auto glyphAdvance = glyphRun.glyphAdvances ? glyphRun.glyphAdvances[i] : glyphMetrics.advanceWidth * fontScale; + + const auto left = static_cast(glyphMetrics.leftSideBearing) * fontScale; + const auto top = static_cast(glyphMetrics.topSideBearing - glyphMetrics.verticalOriginY) * fontScale; + const auto right = static_cast(gsl::narrow_cast(glyphMetrics.advanceWidth) - glyphMetrics.rightSideBearing) * fontScale; + const auto bottom = static_cast(gsl::narrow_cast(glyphMetrics.advanceHeight) - glyphMetrics.bottomSideBearing - glyphMetrics.verticalOriginY) * fontScale; + + if (left < right && top < bottom) + { + auto glyphX = baselineX; + auto glyphY = baselineY; + if (glyphRun.glyphOffsets) + { + glyphX += glyphRun.glyphOffsets[i].advanceOffset; + glyphY -= glyphRun.glyphOffsets[i].ascenderOffset; + } + + accumulatedBounds.left = std::min(accumulatedBounds.left, left + glyphX); + accumulatedBounds.top = std::min(accumulatedBounds.top, top + glyphY); + accumulatedBounds.right = std::max(accumulatedBounds.right, right + glyphX); + accumulatedBounds.bottom = std::max(accumulatedBounds.bottom, bottom + glyphY); + } + + baselineX += glyphAdvance; + } + + return accumulatedBounds; +} + void BackendD2D::_drawGridlines(const RenderingPayload& p) { u16 y = 0; diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index 2df604e59be..64f6ea4b0ee 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + #pragma once #include "Backend.h" @@ -16,6 +19,7 @@ namespace Microsoft::Console::Render::Atlas __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p) noexcept; void _drawText(RenderingPayload& p); + f32r _getGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursor(const RenderingPayload& p); @@ -36,6 +40,7 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _backgroundBrush; til::generation_t _backgroundBitmapGeneration; + Buffer _glyphMetrics; u32 _brushColor = 0; til::generation_t _generation; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 914a8fc0efd..db45984e8d3 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + #include "pch.h" #include "BackendD3D.h" @@ -10,6 +13,8 @@ #include "dwrite.h" +#include "colorbrewer.h" + TIL_FAST_MATH_BEGIN // This code packs various data into smaller-than-int types to save both CPU and GPU memory. This warning would force @@ -291,6 +296,31 @@ void BackendD3D::Render(RenderingPayload& p) _drawGridlines(p); _drawCursorPart2(p); _drawSelection(p); + +#if ATLAS_DEBUG_SHOW_DIRTY + { + _presentRects[_presentRectsPos] = p.dirtyRectInPx; + _presentRectsPos = (_presentRectsPos + 1) % std::size(_presentRects); + + for (size_t i = 0; i < std::size(_presentRects); ++i) + { + if (const auto& rect = _presentRects[i]) + { + const i16x2 position{ + static_cast(rect.left), + static_cast(rect.top), + }; + const u16x2 size{ + static_cast(rect.right - rect.left), + static_cast(rect.bottom - rect.top), + }; + const auto color = 0x3f000000 | colorbrewer::pastel1[i]; + _appendQuad(position, size, color, ShadingType::SolidFill); + } + } + } +#endif + _flushQuads(p); if (_customPixelShader) @@ -373,6 +403,11 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) _miscGeneration = p.s->misc.generation(); _targetSize = p.s->targetSize; _cellCount = p.s->cellCount; + +#if ATLAS_DEBUG_SHOW_DIRTY + std::ranges::fill(_presentRects, til::rect{}); + _presentRectsPos = 0; +#endif } void BackendD3D::_recreateCustomShader(const RenderingPayload& p) @@ -835,6 +870,7 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) return; } + // TODO: Shrink instances buffer if (_instancesCount > _instanceBufferCapacity) { _recreateInstanceBuffers(p); @@ -889,20 +925,20 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) { - static constexpr size_t R16max = 1 << 16; - // While the viewport size of the terminal is probably a good initial estimate for the amount of instances we'll see, - // I feel like we should ensure that the estimate doesn't exceed the limit for a DXGI_FORMAT_R16_UINT index buffer. - const auto estimatedInstances = std::min(R16max / 4, static_cast(p.s->cellCount.x) * p.s->cellCount.y); - const auto minSize = std::max(_instancesCount, estimatedInstances); - // std::bit_ceil will result in a nice exponential growth curve. I don't know exactly how structured buffers are treated - // by various drivers, but I'm assuming that they prefer buffer sizes that are close to power-of-2 sizes as well. - const auto newInstancesCapacity = std::bit_ceil(minSize * sizeof(QuadInstance)) / sizeof(QuadInstance); + // We use the viewport size of the terminal as the initial estimate for the amount of instances we'll see. + const auto minCapacity = static_cast(p.s->cellCount.x) * p.s->cellCount.y; + auto newCapacity = std::max(_instancesCount, minCapacity); + auto newSize = newCapacity * sizeof(QuadInstance); + // Round up to multiples of 64kB to avoid reallocating too often. + // 64kB is the minimum alignment for committed resources in D3D12. + newSize = (newSize + 0xffff) & ~size_t{ 0xffff }; + newCapacity = newSize / sizeof(QuadInstance); _instanceBuffer.reset(); { D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = gsl::narrow(newInstancesCapacity * sizeof(QuadInstance)); + desc.ByteWidth = gsl::narrow(newSize); desc.Usage = D3D11_USAGE_DYNAMIC; desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; @@ -916,7 +952,7 @@ void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) static constexpr UINT offsets[]{ 0, 0 }; _deviceContext->IASetVertexBuffers(0, 2, &vertexBuffers[0], &strides[0], &offsets[0]); - _instanceBufferCapacity = newInstancesCapacity; + _instanceBufferCapacity = newCapacity; } void BackendD3D::_drawBackground(const RenderingPayload& p) @@ -978,7 +1014,7 @@ void BackendD3D::_drawText(RenderingPayload& p) } } - if (row->top < p.dirtyRectInPx.bottom && p.dirtyRectInPx.top < row->bottom) + if (y >= p.invalidatedRows.x && y < p.invalidatedRows.y) { dirtyTop = std::min(dirtyTop, row->top); dirtyBottom = std::max(dirtyBottom, row->bottom); @@ -996,18 +1032,42 @@ void BackendD3D::_drawText(RenderingPayload& p) _d2dEndDrawing(); } +#pragma warning(disable : 4189) + void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) { - DWRITE_GLYPH_RUN glyphRun{}; - glyphRun.fontFace = entry.fontFace; - glyphRun.fontEmSize = fontEmSize; - glyphRun.glyphCount = 1; - glyphRun.glyphIndices = &entry.glyphIndex; + const DWRITE_GLYPH_RUN glyphRun{ + .fontFace = entry.fontFace, + .fontEmSize = fontEmSize, + .glyphCount = 1, + .glyphIndices = &entry.glyphIndex, + }; + + DWRITE_FONT_METRICS fontMetrics; + glyphRun.fontFace->GetMetrics(&fontMetrics); + + DWRITE_GLYPH_METRICS glyphMetrics; + glyphRun.fontFace->GetDesignGlyphMetrics(glyphRun.glyphIndices, glyphRun.glyphCount, &glyphMetrics, false); + + // This calculates the black box of the glyph, or in other words, it's extents/size relative to its baseline origin (at 0,0). + // The algorithm below is a reverse engineered variant of `IDWriteTextLayout::GetMetrics`. The coordinates will be in pixel + // and the positive direction will be bottom/right. A `.left` of -3px would indicate that the glyph overlaps it's bounding box + // by 3px to the left and would thus overlap it's neighbor to the left by 3px. `.bottom` is the same but for the descender. + // `.right` and `.top` are not overlaps per se, but rather the distance to the right/top edge relative to the baseline origin. + // The width of the glyph for instance is thus `.right - .left`. + const f32 fontScale = p.d.font.pixelPerDIP * glyphRun.fontEmSize / fontMetrics.designUnitsPerEm; + const f32r box{ + static_cast(glyphMetrics.leftSideBearing) * fontScale, + static_cast(glyphMetrics.topSideBearing - glyphMetrics.verticalOriginY) * fontScale, + static_cast(static_cast(glyphMetrics.advanceWidth) - glyphMetrics.rightSideBearing) * fontScale, + static_cast(static_cast(glyphMetrics.advanceHeight) - glyphMetrics.bottomSideBearing - glyphMetrics.verticalOriginY) * fontScale, + }; - const auto box = GetGlyphRunBlackBox(glyphRun, 0, 0); + // box may be empty if the glyph is whitespace. if (box.empty()) { - // This will indicate to BackendD3D::_drawText that this glyph is whitespace. + // This will indicate to `BackendD3D::_drawText` that this glyph is whitespace. It's important to set this member, + // because `GlyphCacheMap` does not zero out inserted entries and `shadingType` might still contain "garbage". entry.shadingType = 0; return; } @@ -1015,14 +1075,15 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f bool retry = false; for (;;) { - // We'll add a 1px padding on all 4 sides to avoid neighboring glyphs - // from overlapping, since the blackbox measurement is only an estimate. + // We'll add a 1px padding on all 4 sides to avoid neighboring glyphs from overlapping, + // since the blackbox measurement is only an estimate based on the design metrics. // We need to use round (and not ceil/floor) to ensure we pixel-snap individual // glyphs correctly and form a consistent baseline across an entire run of glyphs. - const auto l = lround(box.left * p.d.font.pixelPerDIP) - 1; - const auto t = lround(box.top * p.d.font.pixelPerDIP) - 1; - const auto r = lround(box.right * p.d.font.pixelPerDIP) + 1; - const auto b = lround(box.bottom * p.d.font.pixelPerDIP) + 1; + // Also, ClearType might draw (rounded) up to 1.2px away from the design outline. + const auto l = lround(box.left) - 1; + const auto t = lround(box.top) - 1; + const auto r = lround(box.right) + 1; + const auto b = lround(box.bottom) + 1; stbrp_rect rect{}; rect.w = r - l; diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 68ba7d3dfc3..a1769b12f5c 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + #pragma once #include @@ -223,6 +226,11 @@ namespace Microsoft::Console::Render::Atlas til::small_vector _cursorRects; bool _requiresContinuousRedraw = false; + +#if ATLAS_DEBUG_SHOW_DIRTY + til::rect _presentRects[9]{}; + size_t _presentRectsPos = 0; +#endif #ifndef NDEBUG std::filesystem::path _sourceDirectory; diff --git a/src/renderer/atlas/DWriteTextAnalysis.cpp b/src/renderer/atlas/DWriteTextAnalysis.cpp index d9456c4ddf9..94764c59a10 100644 --- a/src/renderer/atlas/DWriteTextAnalysis.cpp +++ b/src/renderer/atlas/DWriteTextAnalysis.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + #include "pch.h" #include "DWriteTextAnalysis.h" diff --git a/src/renderer/atlas/colorbrewer.h b/src/renderer/atlas/colorbrewer.h new file mode 100644 index 00000000000..990ce4638a6 --- /dev/null +++ b/src/renderer/atlas/colorbrewer.h @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +namespace Microsoft::Console::Render::Atlas::colorbrewer { + // The following list of colors is only used as a debug aid and not part of the final product. + // They're licensed under: + // + // Apache-Style Software License for ColorBrewer software and ColorBrewer Color Schemes + // + // Copyright (c) 2002 Cynthia Brewer, Mark Harrower, and The Pennsylvania State University. + // + // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + // You may obtain a copy of the License at + // + // http://www.apache.org/licenses/LICENSE-2.0 + // + // Unless required by applicable law or agreed to in writing, software distributed + // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + // CONDITIONS OF ANY KIND, either express or implied. See the License for the + // specific language governing permissions and limitations under the License. + // + inline constexpr u32 pastel1[]{ + 0xfbb4ae, + 0xb3cde3, + 0xccebc5, + 0xdecbe4, + 0xfed9a6, + 0xffffcc, + 0xe5d8bd, + 0xfddaec, + 0xf2f2f2, + }; +} diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index d9d2a73bf43..ac7f4d06724 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + #pragma once #include @@ -93,6 +96,15 @@ namespace Microsoft::Console::Render::Atlas } }; + template + struct range + { + T start{}; + T end{}; + + ATLAS_POD_OPS(range) + }; + using u8 = uint8_t; using u16 = uint16_t; @@ -146,7 +158,7 @@ namespace Microsoft::Console::Render::Atlas // be a good future extension, but not to improve security here. // You can trivially construct std::span's from invalid ranges. // Until then the raw-pointer style is more practical. -#pragma warning(suppress : 26459) // You called an STL function '...' with a raw pointer parameter at position '3' that may be unsafe [...]. +#pragma warning(suppress : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. std::uninitialized_copy_n(data, size, _data); } @@ -161,7 +173,6 @@ namespace Microsoft::Console::Render::Atlas { } -#pragma warning(suppress : 26432) // If you define or delete any default operation in the type '...', define or delete them all (c.21). Buffer& operator=(Buffer&& other) noexcept { destroy(); @@ -170,6 +181,40 @@ namespace Microsoft::Console::Render::Atlas return *this; } +#if 0 + Buffer(const Buffer& other) noexcept : + _data{ allocate(other._size) }, + _size{ other._size } + { +#pragma warning(suppress : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. + std::uninitialized_copy_n(other._data, other._size, _data); + } + + Buffer& operator=(const Buffer& other) noexcept + { + destroy(); + _data = nullptr; + _size = 0; + + _data = allocate(other._size); + _size = other._size; + +#pragma warning(suppress : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. + std::uninitialized_copy_n(other._data, other._size, _data); + return *this; + } + + bool operator==(const Buffer& other) const + { + return memcmp(_data, other._data, _size * sizeof(T)) == 0; + } + + bool operator!=(const Buffer& other) const + { + return memcmp(_data, other._data, _size * sizeof(T)) != 0; + } +#endif + explicit operator bool() const noexcept { return _data != nullptr; @@ -234,6 +279,10 @@ namespace Microsoft::Console::Render::Atlas #pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique instead (r.11). static T* allocate(size_t size) { + if (!size) + { + return nullptr; + } if constexpr (Alignment <= __STDCPP_DEFAULT_NEW_ALIGNMENT__) { return static_cast(::operator new(size * sizeof(T))); @@ -428,6 +477,7 @@ namespace Microsoft::Console::Render::Atlas // is entirely black, just like `backgroundBitmap` after it gets created. til::generation_t backgroundBitmapGeneration{ 1 }; til::rect dirtyRectInPx; + u16x2 invalidatedRows; u16r cursorRect; i16 scrollOffset = 0; }; diff --git a/src/renderer/atlas/custom_shader_ps.hlsl b/src/renderer/atlas/custom_shader_ps.hlsl index 0073f2ca87c..221be267b42 100644 --- a/src/renderer/atlas/custom_shader_ps.hlsl +++ b/src/renderer/atlas/custom_shader_ps.hlsl @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + // The original retro pixel shader Texture2D shaderTexture; SamplerState samplerState; diff --git a/src/renderer/atlas/custom_shader_vs.hlsl b/src/renderer/atlas/custom_shader_vs.hlsl index 5bb9fbff70b..97b51c4c23e 100644 --- a/src/renderer/atlas/custom_shader_vs.hlsl +++ b/src/renderer/atlas/custom_shader_vs.hlsl @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + struct VS_OUTPUT { float4 pos : SV_POSITION; diff --git a/src/renderer/atlas/stb_rect_pack.cpp b/src/renderer/atlas/stb_rect_pack.cpp index 7cba8a5330b..306f747a7f5 100644 --- a/src/renderer/atlas/stb_rect_pack.cpp +++ b/src/renderer/atlas/stb_rect_pack.cpp @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + #include "pch.h" #define STB_RECT_PACK_IMPLEMENTATION diff --git a/tools/ConsoleTypes.natvis b/tools/ConsoleTypes.natvis index 48f950c55c7..317371f7d52 100644 --- a/tools/ConsoleTypes.natvis +++ b/tools/ConsoleTypes.natvis @@ -105,6 +105,13 @@ + + {{ generation={_generation._value}}} + + _value + + + {{ size={_size} }} From badbd49e2ccf7cc6845e7a155f46f8dcabd4c77d Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 21 Mar 2023 01:15:00 +0100 Subject: [PATCH 09/37] Finally fix broken rendering in BackendD3D --- src/host/CursorBlinker.cpp | 4 +- src/inc/til/small_vector.h | 2 + src/renderer/atlas/AtlasEngine.r.cpp | 4 ++ src/renderer/atlas/Backend.h | 21 +++++- src/renderer/atlas/BackendD2D.cpp | 30 ++++++++- src/renderer/atlas/BackendD2D.h | 6 ++ src/renderer/atlas/BackendD3D.cpp | 98 +++++++++++++++++++--------- src/renderer/atlas/BackendD3D.h | 11 +++- src/renderer/atlas/colorbrewer.h | 3 +- src/renderer/atlas/common.h | 35 +--------- src/renderer/atlas/wic.h | 59 +++++++++++++++++ 11 files changed, 202 insertions(+), 71 deletions(-) create mode 100644 src/renderer/atlas/wic.h diff --git a/src/host/CursorBlinker.cpp b/src/host/CursorBlinker.cpp index 014d4529c74..78f072ccc94 100644 --- a/src/host/CursorBlinker.cpp +++ b/src/host/CursorBlinker.cpp @@ -55,7 +55,7 @@ void CursorBlinker::SettingsChanged() noexcept { KillCaretTimer(); _uCaretBlinkTime = dwCaretBlinkTime; - //SetCaretTimer(); + SetCaretTimer(); } } @@ -66,7 +66,7 @@ void CursorBlinker::FocusEnd() const noexcept void CursorBlinker::FocusStart() const noexcept { - //SetCaretTimer(); + SetCaretTimer(); } // Routine Description: diff --git a/src/inc/til/small_vector.h b/src/inc/til/small_vector.h index 127795e131d..9c0d4fc87ec 100644 --- a/src/inc/til/small_vector.h +++ b/src/inc/til/small_vector.h @@ -16,6 +16,8 @@ #pragma warning(disable : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe ... (stl.1). // small_vector::_data references potentially uninitialized data and so we can't pass it regular iterators which reference initialized data. #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). +// small_vector::_buffer is explicitly uninitialized, because we manage it's initialization manually. +#pragma warning(disable : 26495) // Variable '...' is uninitialized. Always initialize a member variable (type.6). namespace til { diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 3d6c168df00..65a57594374 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -82,6 +82,10 @@ void AtlasEngine::WaitUntilCanRender() noexcept { _b->WaitUntilCanRender(); } + if constexpr (ATLAS_DEBUG_RENDER_DELAY) + { + Sleep(ATLAS_DEBUG_RENDER_DELAY); + } } #pragma endregion diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 8abd023d06e..21498fa522b 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -7,12 +7,31 @@ namespace Microsoft::Console::Render::Atlas { + // If set to 1, this will cause the entire viewport to be invalidated at all times. + // Helpful for benchmarking our text shaping code based on DirectWrite. +#define ATLAS_DEBUG_DISABLE_PARTIAL_INVALIDATION 0 + + // Redraw at display refresh rate at all times. This helps with shader debugging. #define ATLAS_DEBUG_CONTINUOUS_REDRAW 0 + + // Disables the use of DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT. + // This helps with benchmarking the application as it'll run beyond display refresh rate. #define ATLAS_DEBUG_DISABLE_FRAME_LATENCY_WAITABLE_OBJECT 0 -#define ATLAS_DEBUG_DISABLE_PARTIAL_INVALIDATION 0 + + // Forces the use of Direct2D for text rendering (= BackendD2D). #define ATLAS_DEBUG_FORCE_D2D_MODE 0 + + // Adds an artificial delay before every render pass. In milliseconds. +#define ATLAS_DEBUG_RENDER_DELAY 0 + + // Shows the dirty rects as given to IDXGISwapChain2::Present1() during each frame. #define ATLAS_DEBUG_SHOW_DIRTY 0 + // Dumps the contents of the swap chain on each render pass into the given directory as PNG. + // I highly recommend setting ATLAS_DEBUG_RENDER_DELAY to 250 or similar if this is used. +#define ATLAS_DEBUG_DUMP_RENDER_TARGET 0 +#define ATLAS_DEBUG_DUMP_RENDER_TARGET_PATH LR"(%USERPROFILE%\Downloads\AtlasEngine)" + struct SwapChainManager { void UpdateSwapChainSettings(const RenderingPayload& p, IUnknown* device, auto&& prepareRecreate, auto&& prepareResize) diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 20517803800..e62ae5b887a 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -4,6 +4,10 @@ #include "pch.h" #include "BackendD2D.h" +#if ATLAS_DEBUG_SHOW_DIRTY +#include "colorbrewer.h" +#endif + TIL_FAST_MATH_BEGIN // Disable a bunch of warnings which get in the way of writing performant code. @@ -34,6 +38,9 @@ void BackendD2D::Render(RenderingPayload& p) _drawGridlines(p); _drawCursor(p); _drawSelection(p); +#if ATLAS_DEBUG_SHOW_DIRTY + _debugShowDirty(p); +#endif THROW_IF_FAILED(_renderTarget->EndDraw()); _swapChainManager.Present(p); @@ -208,7 +215,7 @@ void BackendD2D::_drawText(RenderingPayload& p) } } while (it != end); } - + if (y >= p.invalidatedRows.x && y < p.invalidatedRows.y) { dirtyTop = std::min(dirtyTop, row->top); @@ -486,6 +493,27 @@ void BackendD2D::_drawSelection(const RenderingPayload& p) } } +void BackendD2D::_debugShowDirty(RenderingPayload& p) +{ + _presentRects[_presentRectsPos] = p.dirtyRectInPx; + _presentRectsPos = (_presentRectsPos + 1) % std::size(_presentRects); + + for (size_t i = 0; i < std::size(_presentRects); ++i) + { + if (const auto& rect = _presentRects[i]) + { + const D2D1_RECT_F rectF{ + static_cast(rect.left), + static_cast(rect.top), + static_cast(rect.right), + static_cast(rect.bottom), + }; + const auto color = 0x1f000000 | colorbrewer::pastel1[i]; + _fillRectangle(rectF, color); + } + } +} + ID2D1Brush* BackendD2D::_brushWithColor(u32 color) { if (_brushColor != color) diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index 64f6ea4b0ee..428b12f8535 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -24,6 +24,7 @@ namespace Microsoft::Console::Render::Atlas void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursor(const RenderingPayload& p); void _drawSelection(const RenderingPayload& p); + void _debugShowDirty(RenderingPayload& p); ID2D1Brush* _brushWithColor(u32 color); void _fillRectangle(const D2D1_RECT_F& rect, u32 color); @@ -46,5 +47,10 @@ namespace Microsoft::Console::Render::Atlas til::generation_t _generation; til::generation_t _fontGeneration; u16x2 _cellCount; + +#if ATLAS_DEBUG_SHOW_DIRTY + til::rect _presentRects[9]{}; + size_t _presentRectsPos = 0; +#endif }; } diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index db45984e8d3..ce4cb2c64a1 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -13,7 +13,13 @@ #include "dwrite.h" +#if ATLAS_DEBUG_SHOW_DIRTY #include "colorbrewer.h" +#endif + +#if ATLAS_DEBUG_DUMP_RENDER_TARGET +#include "wic.h" +#endif TIL_FAST_MATH_BEGIN @@ -290,37 +296,23 @@ void BackendD3D::Render(RenderingPayload& p) // After a Present() the render target becomes unbound. _deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); + // Invalidating the render target helps with spotting invalid quad instances and Present1() bugs. +#if ATLAS_DEBUG_SHOW_DIRTY || ATLAS_DEBUG_DUMP_RENDER_TARGET + { + static constexpr f32 clearColor[4]{}; + _deviceContext->ClearView(_renderTargetView.get(), &clearColor[0], nullptr, 0); + } +#endif + _drawBackground(p); _drawCursorPart1(p); _drawText(p); _drawGridlines(p); _drawCursorPart2(p); _drawSelection(p); - #if ATLAS_DEBUG_SHOW_DIRTY - { - _presentRects[_presentRectsPos] = p.dirtyRectInPx; - _presentRectsPos = (_presentRectsPos + 1) % std::size(_presentRects); - - for (size_t i = 0; i < std::size(_presentRects); ++i) - { - if (const auto& rect = _presentRects[i]) - { - const i16x2 position{ - static_cast(rect.left), - static_cast(rect.top), - }; - const u16x2 size{ - static_cast(rect.right - rect.left), - static_cast(rect.bottom - rect.top), - }; - const auto color = 0x3f000000 | colorbrewer::pastel1[i]; - _appendQuad(position, size, color, ShadingType::SolidFill); - } - } - } + _debugShowDirty(p); #endif - _flushQuads(p); if (_customPixelShader) @@ -328,6 +320,9 @@ void BackendD3D::Render(RenderingPayload& p) _executeCustomShader(p); } +#if ATLAS_DEBUG_DUMP_RENDER_TARGET + _debugDumpRenderTarget(p); +#endif _swapChainManager.Present(p); } @@ -403,11 +398,6 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) _miscGeneration = p.s->misc.generation(); _targetSize = p.s->targetSize; _cellCount = p.s->cellCount; - -#if ATLAS_DEBUG_SHOW_DIRTY - std::ranges::fill(_presentRects, til::rect{}); - _presentRectsPos = 0; -#endif } void BackendD3D::_recreateCustomShader(const RenderingPayload& p) @@ -747,7 +737,7 @@ void BackendD3D::_d2dEndDrawing() } } -void BackendD3D::_handleFontChangedResetGlyphAtlas(RenderingPayload& p) +void BackendD3D::_handleFontChangedResetGlyphAtlas(const RenderingPayload& p) { _fontChangedResetGlyphAtlas = false; _resetGlyphAtlasAndBeginDraw(p); @@ -860,7 +850,13 @@ void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 col void BackendD3D::_bumpInstancesSize() { - _instances = Buffer{ std::max(1024, _instances.size() << 1) }; + const auto newSize = std::max(256, _instances.size() * 2); + Expects(newSize > _instances.size()); + + auto newInstances = Buffer{ newSize }; + std::copy_n(_instances.data(), _instances.size(), newInstances.data()); + + _instances = std::move(newInstances); } void BackendD3D::_flushQuads(const RenderingPayload& p) @@ -1383,6 +1379,48 @@ void BackendD3D::_drawSelection(const RenderingPayload& p) } } +#if ATLAS_DEBUG_SHOW_DIRTY +void BackendD3D::_debugShowDirty(RenderingPayload& p) +{ + _presentRects[_presentRectsPos] = p.dirtyRectInPx; + _presentRectsPos = (_presentRectsPos + 1) % std::size(_presentRects); + + for (size_t i = 0; i < std::size(_presentRects); ++i) + { + if (const auto& rect = _presentRects[i]) + { + const i16x2 position{ + static_cast(rect.left), + static_cast(rect.top), + }; + const u16x2 size{ + static_cast(rect.right - rect.left), + static_cast(rect.bottom - rect.top), + }; + const auto color = 0x1f000000 | colorbrewer::pastel1[i]; + _appendQuad(position, size, color, ShadingType::SolidFill); + } + } +} +#endif + +#if ATLAS_DEBUG_DUMP_RENDER_TARGET +void BackendD3D::_debugDumpRenderTarget(RenderingPayload& p) +{ + const auto n = _dumpRenderTargetCounter.fetch_add(1, std::memory_order_relaxed); + + if (n == 0) + { + ExpandEnvironmentStringsW(ATLAS_DEBUG_DUMP_RENDER_TARGET_PATH, &_dumpRenderTargetBasePath[0], std::size(_dumpRenderTargetBasePath)); + std::filesystem::create_directories(_dumpRenderTargetBasePath); + } + + wchar_t path[MAX_PATH]; + swprintf_s(path, L"%s\\%u_%08u.png", &_dumpRenderTargetBasePath[0], GetCurrentProcessId(), n); + SaveTextureToPNG(_deviceContext.get(), _swapChainManager.GetBuffer().get(), p.s->font->dpi, &path[0]); +} +#endif + void BackendD3D::_executeCustomShader(RenderingPayload& p) { { diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index a1769b12f5c..6777404ef35 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -127,9 +127,11 @@ namespace Microsoft::Console::Render::Atlas void _recreateConstBuffer(const RenderingPayload& p); void _setupDeviceContextState(const RenderingPayload& p); void _debugUpdateShaders(const RenderingPayload& p) noexcept; + void _debugShowDirty(RenderingPayload& p); + void _debugDumpRenderTarget(RenderingPayload& p); void _d2dBeginDrawing() noexcept; void _d2dEndDrawing(); - void _handleFontChangedResetGlyphAtlas(RenderingPayload& p); + void _handleFontChangedResetGlyphAtlas(const RenderingPayload& p); void _resetGlyphAtlasAndBeginDraw(const RenderingPayload& p); void _markStateChange(ID3D11BlendState* blendState); QuadInstance& _getLastQuad() noexcept; @@ -226,12 +228,17 @@ namespace Microsoft::Console::Render::Atlas til::small_vector _cursorRects; bool _requiresContinuousRedraw = false; - + #if ATLAS_DEBUG_SHOW_DIRTY til::rect _presentRects[9]{}; size_t _presentRectsPos = 0; #endif +#if ATLAS_DEBUG_DUMP_RENDER_TARGET + std::atomic _dumpRenderTargetCounter; + wchar_t _dumpRenderTargetBasePath[MAX_PATH]; +#endif + #ifndef NDEBUG std::filesystem::path _sourceDirectory; wil::unique_folder_change_reader_nothrow _sourceCodeWatcher; diff --git a/src/renderer/atlas/colorbrewer.h b/src/renderer/atlas/colorbrewer.h index 990ce4638a6..5724d2d37da 100644 --- a/src/renderer/atlas/colorbrewer.h +++ b/src/renderer/atlas/colorbrewer.h @@ -3,7 +3,8 @@ #pragma once -namespace Microsoft::Console::Render::Atlas::colorbrewer { +namespace Microsoft::Console::Render::Atlas::colorbrewer +{ // The following list of colors is only used as a debug aid and not part of the final product. // They're licensed under: // diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index ac7f4d06724..42874c1f1da 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -173,6 +173,7 @@ namespace Microsoft::Console::Render::Atlas { } +#pragma warning(suppress : 26432) // If you define or delete any default operation in the type '...', define or delete them all (c.21). Buffer& operator=(Buffer&& other) noexcept { destroy(); @@ -181,40 +182,6 @@ namespace Microsoft::Console::Render::Atlas return *this; } -#if 0 - Buffer(const Buffer& other) noexcept : - _data{ allocate(other._size) }, - _size{ other._size } - { -#pragma warning(suppress : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. - std::uninitialized_copy_n(other._data, other._size, _data); - } - - Buffer& operator=(const Buffer& other) noexcept - { - destroy(); - _data = nullptr; - _size = 0; - - _data = allocate(other._size); - _size = other._size; - -#pragma warning(suppress : 26459) // You called an STL function '...' with a raw pointer parameter at position '...' that may be unsafe [...]. - std::uninitialized_copy_n(other._data, other._size, _data); - return *this; - } - - bool operator==(const Buffer& other) const - { - return memcmp(_data, other._data, _size * sizeof(T)) == 0; - } - - bool operator!=(const Buffer& other) const - { - return memcmp(_data, other._data, _size * sizeof(T)) != 0; - } -#endif - explicit operator bool() const noexcept { return _data != nullptr; diff --git a/src/renderer/atlas/wic.h b/src/renderer/atlas/wic.h new file mode 100644 index 00000000000..1f08eded553 --- /dev/null +++ b/src/renderer/atlas/wic.h @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#include + +inline void SaveTextureToPNG(ID3D11DeviceContext* deviceContext, ID3D11Resource* source, double dpi, const wchar_t* fileName) +{ + __assume(deviceContext != nullptr); + __assume(source != nullptr); + + wil::com_ptr texture; + THROW_IF_FAILED(source->QueryInterface(IID_PPV_ARGS(texture.addressof()))); + + wil::com_ptr d3dDevice; + deviceContext->GetDevice(d3dDevice.addressof()); + + D3D11_TEXTURE2D_DESC desc{}; + texture->GetDesc(&desc); + desc.BindFlags = 0; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc.Usage = D3D11_USAGE_STAGING; + + wil::com_ptr staging; + THROW_IF_FAILED(d3dDevice->CreateTexture2D(&desc, nullptr, staging.put())); + + deviceContext->CopyResource(staging.get(), source); + + static const auto wicFactory = []() { + THROW_IF_FAILED(::CoInitializeEx(nullptr, COINIT_MULTITHREADED)); + return wil::CoCreateInstance(CLSID_WICImagingFactory2); + }(); + + wil::com_ptr stream; + THROW_IF_FAILED(wicFactory->CreateStream(stream.addressof())); + THROW_IF_FAILED(stream->InitializeFromFilename(fileName, GENERIC_WRITE)); + + wil::com_ptr encoder; + THROW_IF_FAILED(wicFactory->CreateEncoder(GUID_ContainerFormatPng, nullptr, encoder.addressof())); + THROW_IF_FAILED(encoder->Initialize(stream.get(), WICBitmapEncoderNoCache)); + + wil::com_ptr frame; + wil::com_ptr props; + THROW_IF_FAILED(encoder->CreateNewFrame(frame.addressof(), props.addressof())); + THROW_IF_FAILED(frame->Initialize(props.get())); + THROW_IF_FAILED(frame->SetSize(desc.Width, desc.Height)); + THROW_IF_FAILED(frame->SetResolution(dpi, dpi)); + auto pixelFormat = GUID_WICPixelFormat32bppBGRA; + THROW_IF_FAILED(frame->SetPixelFormat(&pixelFormat)); + + D3D11_MAPPED_SUBRESOURCE mapped; + THROW_IF_FAILED(deviceContext->Map(staging.get(), 0, D3D11_MAP_READ, 0, &mapped)); + THROW_IF_FAILED(frame->WritePixels(desc.Height, mapped.RowPitch, mapped.RowPitch * desc.Height, static_cast(mapped.pData))); + deviceContext->Unmap(staging.get(), 0); + + THROW_IF_FAILED(frame->Commit()); + THROW_IF_FAILED(encoder->Commit()); +} From 0d44fe4410ed96e7b6aba244c9006bd43357f2cc Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 21 Mar 2023 19:04:56 +0100 Subject: [PATCH 10/37] Fix dirty rects in BackendD2D, Investigate broken support for hinted glyphs --- src/renderer/atlas/AtlasEngine.api.cpp | 11 +- src/renderer/atlas/AtlasEngine.cpp | 21 ++- src/renderer/atlas/Backend.cpp | 4 +- src/renderer/atlas/Backend.h | 4 + src/renderer/atlas/BackendD2D.cpp | 157 +++++++++++++++-------- src/renderer/atlas/BackendD2D.h | 9 +- src/renderer/atlas/BackendD3D.cpp | 170 ++++++++++++++----------- src/renderer/atlas/BackendD3D.h | 16 ++- src/renderer/atlas/common.h | 10 +- 9 files changed, 240 insertions(+), 162 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index bf99768474d..fb40bf34510 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -278,7 +278,7 @@ CATCH_RETURN() /* fontWeight */ static_cast(_api.s->font->fontWeight), /* fontStyle */ DWRITE_FONT_STYLE_NORMAL, /* fontStretch */ DWRITE_FONT_STRETCH_NORMAL, - /* fontSize */ _api.s->font->fontSizeInDIP, + /* fontSize */ _api.s->font->fontSize, /* localeName */ L"", /* textFormat */ textFormat.put())); @@ -289,8 +289,7 @@ CATCH_RETURN() RETURN_IF_FAILED(textLayout->GetMetrics(&metrics)); const auto minWidth = (_api.s->font->cellSize.x * 1.2f); - const auto width = metrics.width * GetScaling(); - *pResult = width > minWidth; + *pResult = metrics.width > minWidth; return S_OK; } @@ -629,7 +628,6 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo // (including by OpenType), whereas DirectWrite uses 96 DPI. // Since we want the height in px we multiply by the display's DPI. const auto dpi = static_cast(_api.s->font->dpi); - const auto fontSizeInDIP = fontSize / 72.0f * 96.0f; const auto fontSizeInPx = fontSize / 72.0f * dpi; const auto designUnitsPerPx = fontSizeInPx / static_cast(metrics.designUnitsPerEm); @@ -703,6 +701,7 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo { std::wstring fontName{ requestedFaceName }; const auto fontWeightU16 = gsl::narrow_cast(requestedWeight); + const auto baselineU16 = gsl::narrow_cast(baseline); const auto underlinePosU16 = gsl::narrow_cast(underlinePos); const auto underlineWidthU16 = gsl::narrow_cast(underlineWidth); const auto strikethroughPosU16 = gsl::narrow_cast(strikethroughPos); @@ -717,11 +716,11 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo fontMetrics->fontCollection = std::move(fontCollection); fontMetrics->fontFamily = std::move(fontFamily); fontMetrics->fontName = std::move(fontName); - fontMetrics->baselineInDIP = baseline / dpi * 96.0f; - fontMetrics->fontSizeInDIP = fontSizeInDIP; + fontMetrics->fontSize = fontSizeInPx; fontMetrics->advanceScale = cellWidth / advanceWidth; fontMetrics->cellSize = { cellWidth, cellHeight }; fontMetrics->fontWeight = fontWeightU16; + fontMetrics->baseline = baselineU16; fontMetrics->underlinePos = underlinePosU16; fontMetrics->underlineWidth = underlineWidthU16; fontMetrics->strikethroughPos = strikethroughPosU16; diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 2891172d0a9..2744746ec1f 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -363,7 +363,7 @@ try const CursorSettings cachedOptions{ .cursorColor = gsl::narrow_cast(options.fUseColor ? options.cursorColor | 0xff000000 : INVALID_COLOR), .cursorType = gsl::narrow_cast(options.cursorType), - .heightPercentage = gsl::narrow_cast(options.ulCursorHeightPercent), + .heightPercentage = gsl::narrow_cast(options.ulCursorHeightPercent), }; if (*_api.s->cursor != cachedOptions) { @@ -470,11 +470,6 @@ void AtlasEngine::_handleSettingsUpdate() void AtlasEngine::_recreateFontDependentResources() { - _p.d.font.dipPerPixel = static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(_p.s->font->dpi); - _p.d.font.pixelPerDIP = static_cast(_p.s->font->dpi) / static_cast(USER_DEFAULT_SCREEN_DPI); - _p.d.font.cellSizeDIP.x = static_cast(_p.s->font->cellSize.x) * _p.d.font.dipPerPixel; - _p.d.font.cellSizeDIP.y = static_cast(_p.s->font->cellSize.y) * _p.d.font.dipPerPixel; - if (!_p.s->font->fontAxisValues.empty()) { // See AtlasEngine::UpdateFont. @@ -597,9 +592,9 @@ void AtlasEngine::_flushBufferLine() const auto col1 = _api.bufferLineColumn[idx + i + 0]; const auto fg = _api.colorsForeground[col1]; const auto col2 = _api.bufferLineColumn[idx + i + 1]; - const auto glyphAdvance = (col2 - col1) * _p.d.font.cellSizeDIP.x; + const auto glyphAdvance = (col2 - col1) * _p.s->font->cellSize.x; row.glyphIndices.emplace_back(_api.glyphIndices[i]); - row.glyphAdvances.emplace_back(glyphAdvance); + row.glyphAdvances.emplace_back(static_cast(glyphAdvance)); row.glyphOffsets.emplace_back(); row.colors.emplace_back(fg); } @@ -613,7 +608,7 @@ void AtlasEngine::_flushBufferLine() const auto indicesCount = row.glyphIndices.size(); if (indicesCount > initialIndicesCount) { - row.mappings.emplace_back(std::move(mappedFontFace), _p.s->font->fontSizeInDIP * scale, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); + row.mappings.emplace_back(std::move(mappedFontFace), _p.s->font->fontSize * scale, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); } } } @@ -761,7 +756,7 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng /* glyphProps */ _api.glyphProps.data(), /* glyphCount */ actualGlyphCount, /* fontFace */ mappedFontFace, - /* fontEmSize */ _p.s->font->fontSizeInDIP, + /* fontEmSize */ _p.s->font->fontSize, /* isSideways */ false, /* isRightToLeft */ a.bidiLevel & 1, /* scriptAnalysis */ &scriptAnalysis, @@ -789,7 +784,7 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng const auto col2 = _api.bufferLineColumn[a.textPosition + i]; const auto fg = _api.colorsForeground[col1]; - const auto expectedAdvance = (col2 - col1) * _p.d.font.cellSizeDIP.x; + const auto expectedAdvance = (col2 - col1) * _p.s->font->cellSize.x; f32 actualAdvance = 0; for (auto j = prevCluster; j < nextCluster; ++j) { @@ -841,9 +836,9 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) const auto col1 = _api.bufferLineColumn[to]; const auto cols = gsl::narrow_cast(col1 - col0); row.glyphIndices.insert(row.glyphIndices.end(), cols, _api.replacementCharacterGlyphIndex); - row.glyphAdvances.insert(row.glyphAdvances.end(), cols, _p.d.font.cellSizeDIP.x); + row.glyphAdvances.insert(row.glyphAdvances.end(), cols, _p.s->font->cellSize.x); row.glyphOffsets.insert(row.glyphOffsets.end(), cols, DWRITE_GLYPH_OFFSET{}); row.colors.insert(row.colors.end(), _api.colorsForeground.begin() + col0, _api.colorsForeground.begin() + col1); - row.mappings.emplace_back(_api.replacementCharacterFontFace, _p.s->font->fontSizeInDIP * 0.5f, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(row.glyphIndices.size())); + row.mappings.emplace_back(_api.replacementCharacterFontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(row.glyphIndices.size())); } } diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index 72d3e27bada..b0fc1366a0d 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -154,8 +154,8 @@ void SwapChainManager::_updateMatrixTransform(const RenderingPayload& p) const if (_fontGeneration != p.s->font.generation() && !p.s->target->hwnd) { const DXGI_MATRIX_3X2_F matrix{ - ._11 = p.d.font.dipPerPixel, - ._22 = p.d.font.dipPerPixel, + ._11 = static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(p.s->font->dpi), + ._22 = static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(p.s->font->dpi), }; THROW_IF_FAILED(_swapChain->SetMatrixTransform(&matrix)); } diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 21498fa522b..fdd1fdc2761 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -32,6 +32,10 @@ namespace Microsoft::Console::Render::Atlas #define ATLAS_DEBUG_DUMP_RENDER_TARGET 0 #define ATLAS_DEBUG_DUMP_RENDER_TARGET_PATH LR"(%USERPROFILE%\Downloads\AtlasEngine)" + // Draws a background behind each glyph placed into the BackendD3D glyph atlas. + // This can be helpful when debugging bugs in the algorithm that measures the size of glyph. +#define ATLAS_DEBUG_COLORIZE_GLYPH_ATLAS 0 + struct SwapChainManager { void UpdateSwapChainSettings(const RenderingPayload& p, IUnknown* device, auto&& prepareRecreate, auto&& prepareResize) diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index e62ae5b887a..a860436782c 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -4,10 +4,16 @@ #include "pch.h" #include "BackendD2D.h" +#include "wic.h" + #if ATLAS_DEBUG_SHOW_DIRTY #include "colorbrewer.h" #endif +#if ATLAS_DEBUG_DUMP_RENDER_TARGET +#include "wic.h" +#endif + TIL_FAST_MATH_BEGIN // Disable a bunch of warnings which get in the way of writing performant code. @@ -33,6 +39,10 @@ void BackendD2D::Render(RenderingPayload& p) } _renderTarget->BeginDraw(); +#if ATLAS_DEBUG_SHOW_DIRTY || ATLAS_DEBUG_DUMP_RENDER_TARGET + // Invalidating the render target helps with spotting Present1() bugs. + _renderTarget->Clear(); +#endif _drawBackground(p); _drawText(p); _drawGridlines(p); @@ -43,6 +53,9 @@ void BackendD2D::Render(RenderingPayload& p) #endif THROW_IF_FAILED(_renderTarget->EndDraw()); +#if ATLAS_DEBUG_DUMP_RENDER_TARGET + _debugDumpRenderTarget(p); +#endif _swapChainManager.Present(p); } @@ -88,6 +101,8 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) THROW_IF_FAILED(p.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); _renderTarget = renderTarget.query(); _renderTarget4 = renderTarget.try_query(); + + _renderTarget->SetUnitMode(D2D1_UNIT_MODE_PIXELS); _renderTarget->SetAntialiasMode(D2D1_ANTIALIAS_MODE_ALIASED); } { @@ -121,7 +136,10 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) .dpiX = static_cast(p.s->font->dpi), .dpiY = static_cast(p.s->font->dpi), }; - const D2D1_SIZE_U size{ p.s->cellCount.x, p.s->cellCount.y }; + const D2D1_SIZE_U size{ + p.s->cellCount.x, + p.s->cellCount.y, + }; const D2D1_MATRIX_3X2_F transform{ .m11 = static_cast(p.s->font->cellSize.x), .m22 = static_cast(p.s->font->cellSize.y), @@ -150,7 +168,7 @@ void BackendD2D::_drawBackground(const RenderingPayload& p) noexcept // If the terminal was 120x30 cells and 1200x600 pixels large, this would draw the // background by upscaling a 120x30 pixel bitmap to fill the entire render target. - const D2D1_RECT_F rect{ 0, 0, p.s->targetSize.x * p.d.font.dipPerPixel, p.s->targetSize.y * p.d.font.dipPerPixel }; + const D2D1_RECT_F rect{ 0, 0, static_cast(p.s->targetSize.x), static_cast(p.s->targetSize.y) }; _renderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_COPY); _renderTarget->FillRectangle(&rect, _backgroundBrush.get()); _renderTarget->SetPrimitiveBlend(D2D1_PRIMITIVE_BLEND_SOURCE_OVER); @@ -191,7 +209,7 @@ void BackendD2D::_drawText(RenderingPayload& p) const auto count = it - beg; const auto brush = _brushWithColor(fg); - const auto baselineY = p.d.font.cellSizeDIP.y * y + p.s->font->baselineInDIP; + const auto baselineY = static_cast(p.s->font->cellSize.y * y + p.s->font->baseline); const DWRITE_GLYPH_RUN glyphRun{ .fontFace = m.fontFace.get(), .fontEmSize = m.fontEmSize, @@ -254,10 +272,10 @@ f32r BackendD2D::_getGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 base const f32 fontScale = glyphRun.fontEmSize / fontMetrics.designUnitsPerEm; f32r accumulatedBounds{ - FLT_MAX, - FLT_MAX, - FLT_MIN, - FLT_MIN, + baselineX, + baselineY, + baselineX, + baselineY, }; for (uint32_t i = 0; i < glyphRun.glyphCount; ++i) @@ -308,18 +326,14 @@ void BackendD2D::_drawGridlines(const RenderingPayload& p) void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y) { const auto columnToDIP = [&](til::CoordType i) { - return i * p.d.font.cellSizeDIP.x; + return i * p.s->font->cellSize.x; }; const auto rowToDIP = [&](til::CoordType i) { - return i * p.d.font.cellSizeDIP.y; - }; - const auto pxToDIP = [&](til::CoordType i) { - return i * p.d.font.dipPerPixel; + return i * p.s->font->cellSize.y; }; const auto top = rowToDIP(y); - const auto bottom = top + p.d.font.cellSizeDIP.y; - const auto thinLineWidth = pxToDIP(p.s->font->thinLineWidth); + const auto bottom = top + p.s->font->cellSize.y; for (const auto& r : row->gridLineRanges) { @@ -328,7 +342,7 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro const auto left = columnToDIP(r.from); const auto right = columnToDIP(r.to); - D2D1_RECT_F rect{}; + til::rect rect; if (r.lines.test(GridLines::Left)) { @@ -337,7 +351,7 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro for (auto i = r.from; i < r.to; ++i) { rect.left = columnToDIP(i); - rect.right = rect.left + thinLineWidth; + rect.right = rect.left + p.s->font->thinLineWidth; _fillRectangle(rect, r.color); } } @@ -346,7 +360,7 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro rect.left = left; rect.top = top; rect.right = right; - rect.bottom = rect.top + thinLineWidth; + rect.bottom = rect.top + p.s->font->thinLineWidth; _fillRectangle(rect, r.color); } if (r.lines.test(GridLines::Right)) @@ -356,14 +370,14 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro for (auto i = r.to; i > r.from; --i) { rect.right = columnToDIP(i); - rect.left = rect.right - thinLineWidth; + rect.left = rect.right - p.s->font->thinLineWidth; _fillRectangle(rect, r.color); } } if (r.lines.test(GridLines::Bottom)) { rect.left = left; - rect.top = bottom - thinLineWidth; + rect.top = bottom - p.s->font->thinLineWidth; rect.right = right; rect.bottom = bottom; _fillRectangle(rect, r.color); @@ -371,40 +385,40 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro if (r.lines.test(GridLines::Underline)) { rect.left = left; - rect.top = top + pxToDIP(p.s->font->underlinePos); + rect.top = top + p.s->font->underlinePos; rect.right = right; - rect.bottom = rect.top + pxToDIP(p.s->font->underlineWidth); + rect.bottom = rect.top + p.s->font->underlineWidth; _fillRectangle(rect, r.color); } if (r.lines.test(GridLines::HyperlinkUnderline)) { - const auto w = pxToDIP(p.s->font->underlineWidth); - const auto centerY = top + pxToDIP(p.s->font->underlinePos) + w * 0.5f; + const auto w = p.s->font->underlineWidth; + const auto centerY = (top + p.s->font->underlinePos) + w * 0.5f; const auto brush = _brushWithColor(r.color); - const D2D1_POINT_2F point0{ left, centerY }; - const D2D1_POINT_2F point1{ right, centerY }; + const D2D1_POINT_2F point0{ static_cast(left), centerY }; + const D2D1_POINT_2F point1{ static_cast(right), centerY }; _renderTarget->DrawLine(point0, point1, brush, w, _dottedStrokeStyle.get()); } if (r.lines.test(GridLines::DoubleUnderline)) { rect.left = left; - rect.top = top + pxToDIP(p.s->font->doubleUnderlinePos.x); + rect.top = top + p.s->font->doubleUnderlinePos.x; rect.right = right; - rect.bottom = rect.top + thinLineWidth; + rect.bottom = rect.top + p.s->font->thinLineWidth; _fillRectangle(rect, r.color); rect.left = left; - rect.top = top + pxToDIP(p.s->font->doubleUnderlinePos.y); + rect.top = top + p.s->font->doubleUnderlinePos.y; rect.right = right; - rect.bottom = rect.top + thinLineWidth; + rect.bottom = rect.top + p.s->font->thinLineWidth; _fillRectangle(rect, r.color); } if (r.lines.test(GridLines::Strikethrough)) { rect.left = left; - rect.top = top + pxToDIP(p.s->font->strikethroughPos); + rect.top = top + p.s->font->strikethroughPos; rect.right = right; - rect.bottom = rect.top + pxToDIP(p.s->font->strikethroughWidth); + rect.bottom = rect.top + p.s->font->strikethroughWidth; _fillRectangle(rect, r.color); } } @@ -420,38 +434,40 @@ void BackendD2D::_drawCursor(const RenderingPayload& p) // Inverted cursors could be implemented in the future using // ID2D1DeviceContext::DrawImage and D2D1_COMPOSITE_MODE_MASK_INVERT. - D2D1_RECT_F rect{ - p.d.font.cellSizeDIP.x * p.cursorRect.left, - p.d.font.cellSizeDIP.y * p.cursorRect.top, - p.d.font.cellSizeDIP.x * p.cursorRect.right, - p.d.font.cellSizeDIP.y * p.cursorRect.bottom, + til::rect rect{ + p.s->font->cellSize.x * p.cursorRect.left, + p.s->font->cellSize.y * p.cursorRect.top, + p.s->font->cellSize.x * p.cursorRect.right, + p.s->font->cellSize.y * p.cursorRect.bottom, }; switch (static_cast(p.s->cursor->cursorType)) { case CursorType::Legacy: - rect.top = rect.bottom - (rect.bottom - rect.top) * static_cast(p.s->cursor->heightPercentage) / 100.0f; + rect.top = rect.bottom - (p.s->font->cellSize.y * p.s->cursor->heightPercentage + 50) / 100; _fillRectangle(rect, p.s->cursor->cursorColor); break; case CursorType::VerticalBar: - rect.right = rect.left + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + rect.right = rect.left + p.s->font->thinLineWidth; _fillRectangle(rect, p.s->cursor->cursorColor); break; case CursorType::Underscore: - rect.top += p.s->font->underlinePos * p.d.font.dipPerPixel; - rect.bottom = rect.top + p.s->font->underlineWidth * p.d.font.dipPerPixel; + rect.top += p.s->font->underlinePos; + rect.bottom = rect.top + p.s->font->underlineWidth; _fillRectangle(rect, p.s->cursor->cursorColor); break; case CursorType::EmptyBox: { const auto brush = _brushWithColor(p.s->cursor->cursorColor); - const auto w = p.s->font->thinLineWidth * p.d.font.dipPerPixel; + const auto w = p.s->font->thinLineWidth; const auto wh = w / 2.0f; - rect.left += wh; - rect.top += wh; - rect.right += wh; - rect.bottom += wh; - _renderTarget->DrawRectangle(&rect, brush, w, nullptr); + const D2D1_RECT_F rectF{ + rect.left + wh, + rect.top + wh, + rect.right - wh, + rect.bottom - wh, + }; + _renderTarget->DrawRectangle(&rectF, brush, w, nullptr); break; } case CursorType::FullBox: @@ -460,11 +476,11 @@ void BackendD2D::_drawCursor(const RenderingPayload& p) case CursorType::DoubleUnderscore: { auto rect2 = rect; - rect2.top = rect.top + p.s->font->doubleUnderlinePos.x * p.d.font.dipPerPixel; - rect2.bottom = rect2.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + rect2.top = rect.top + p.s->font->doubleUnderlinePos.x; + rect2.bottom = rect2.top + p.s->font->thinLineWidth; _fillRectangle(rect2, p.s->cursor->cursorColor); - rect.top = rect.top + p.s->font->doubleUnderlinePos.y * p.d.font.dipPerPixel; - rect.bottom = rect.top + p.s->font->thinLineWidth * p.d.font.dipPerPixel; + rect.top = rect.top + p.s->font->doubleUnderlinePos.y; + rect.bottom = rect.top + p.s->font->thinLineWidth; _fillRectangle(rect, p.s->cursor->cursorColor); break; } @@ -481,10 +497,10 @@ void BackendD2D::_drawSelection(const RenderingPayload& p) if (row->selectionTo > row->selectionFrom) { const D2D1_RECT_F rect{ - p.d.font.cellSizeDIP.x * row->selectionFrom, - p.d.font.cellSizeDIP.y * y, - p.d.font.cellSizeDIP.x * row->selectionTo, - p.d.font.cellSizeDIP.y * (y + 1), + static_cast(p.s->font->cellSize.x * row->selectionFrom), + static_cast(p.s->font->cellSize.y * y), + static_cast(p.s->font->cellSize.x * row->selectionTo), + static_cast(p.s->font->cellSize.y * (y + 1)), }; _fillRectangle(rect, p.s->misc->selectionColor); } @@ -493,7 +509,8 @@ void BackendD2D::_drawSelection(const RenderingPayload& p) } } -void BackendD2D::_debugShowDirty(RenderingPayload& p) +#if ATLAS_DEBUG_SHOW_DIRTY +void BackendD2D::_debugShowDirty(const RenderingPayload& p) { _presentRects[_presentRectsPos] = p.dirtyRectInPx; _presentRectsPos = (_presentRectsPos + 1) % std::size(_presentRects); @@ -508,11 +525,28 @@ void BackendD2D::_debugShowDirty(RenderingPayload& p) static_cast(rect.right), static_cast(rect.bottom), }; - const auto color = 0x1f000000 | colorbrewer::pastel1[i]; + const auto color = colorbrewer::pastel1[i] | 0x1f000000; _fillRectangle(rectF, color); } } } +#endif + +#if ATLAS_DEBUG_DUMP_RENDER_TARGET +void BackendD2D::_debugDumpRenderTarget(const RenderingPayload& p) +{ + if (_dumpRenderTargetCounter == 0) + { + ExpandEnvironmentStringsW(ATLAS_DEBUG_DUMP_RENDER_TARGET_PATH, &_dumpRenderTargetBasePath[0], gsl::narrow_cast(std::size(_dumpRenderTargetBasePath))); + std::filesystem::create_directories(_dumpRenderTargetBasePath); + } + + wchar_t path[MAX_PATH]; + swprintf_s(path, L"%s\\%u_%08zu.png", &_dumpRenderTargetBasePath[0], GetCurrentProcessId(), _dumpRenderTargetCounter); + SaveTextureToPNG(_deviceContext.get(), _swapChainManager.GetBuffer().get(), p.s->font->dpi, &path[0]); + _dumpRenderTargetCounter++; +} +#endif ID2D1Brush* BackendD2D::_brushWithColor(u32 color) { @@ -525,6 +559,17 @@ ID2D1Brush* BackendD2D::_brushWithColor(u32 color) return _brush.get(); } +void BackendD2D::_fillRectangle(const til::rect& rect, u32 color) +{ + const D2D1_RECT_F rectF{ + static_cast(rect.left), + static_cast(rect.top), + static_cast(rect.right), + static_cast(rect.bottom), + }; + _fillRectangle(rectF, color); +} + void BackendD2D::_fillRectangle(const D2D1_RECT_F& rect, u32 color) { const auto brush = _brushWithColor(color); diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index 428b12f8535..8b0ded86056 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -24,8 +24,10 @@ namespace Microsoft::Console::Render::Atlas void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursor(const RenderingPayload& p); void _drawSelection(const RenderingPayload& p); - void _debugShowDirty(RenderingPayload& p); + void _debugShowDirty(const RenderingPayload& p); + void _debugDumpRenderTarget(const RenderingPayload& p); ID2D1Brush* _brushWithColor(u32 color); + void _fillRectangle(const til::rect& rect, u32 color); void _fillRectangle(const D2D1_RECT_F& rect, u32 color); SwapChainManager _swapChainManager; @@ -52,5 +54,10 @@ namespace Microsoft::Console::Render::Atlas til::rect _presentRects[9]{}; size_t _presentRectsPos = 0; #endif + +#if ATLAS_DEBUG_DUMP_RENDER_TARGET + wchar_t _dumpRenderTargetBasePath[MAX_PATH]{}; + size_t _dumpRenderTargetCounter = 0; +#endif }; } diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index ce4cb2c64a1..5cc637a703b 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -13,7 +13,7 @@ #include "dwrite.h" -#if ATLAS_DEBUG_SHOW_DIRTY +#if ATLAS_DEBUG_SHOW_DIRTY || ATLAS_DEBUG_COLORIZE_GLYPH_ATLAS #include "colorbrewer.h" #endif @@ -507,23 +507,25 @@ void BackendD3D::_recreateCustomShader(const RenderingPayload& p) THROW_IF_FAILED(_device->CreateVertexShader(&custom_shader_vs[0], sizeof(custom_shader_vs), nullptr, _customVertexShader.put())); { - D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = sizeof(CustomConstBuffer); - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + static constexpr D3D11_BUFFER_DESC desc{ + .ByteWidth = sizeof(CustomConstBuffer), + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = D3D11_BIND_CONSTANT_BUFFER, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, + }; THROW_IF_FAILED(_device->CreateBuffer(&desc, nullptr, _customShaderConstantBuffer.put())); } { - D3D11_SAMPLER_DESC desc{}; - desc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; - desc.AddressU = D3D11_TEXTURE_ADDRESS_BORDER; - desc.AddressV = D3D11_TEXTURE_ADDRESS_BORDER; - desc.AddressW = D3D11_TEXTURE_ADDRESS_BORDER; - desc.MaxAnisotropy = 1; - desc.ComparisonFunc = D3D11_COMPARISON_ALWAYS; - desc.MaxLOD = D3D11_FLOAT32_MAX; + static constexpr D3D11_SAMPLER_DESC desc{ + .Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR, + .AddressU = D3D11_TEXTURE_ADDRESS_BORDER, + .AddressV = D3D11_TEXTURE_ADDRESS_BORDER, + .AddressW = D3D11_TEXTURE_ADDRESS_BORDER, + .MaxAnisotropy = 1, + .ComparisonFunc = D3D11_COMPARISON_ALWAYS, + .MaxLOD = D3D11_FLOAT32_MAX, + }; THROW_IF_FAILED(_device->CreateSamplerState(&desc, _customShaderSamplerState.put())); } @@ -541,14 +543,15 @@ void BackendD3D::_recreateCustomRenderTargetView(u16x2 targetSize) // `_customRenderTargetView` to render into the swap chain using the custom (user provided) shader. _customRenderTargetView = std::move(_renderTargetView); - D3D11_TEXTURE2D_DESC desc{}; - desc.Width = targetSize.x; - desc.Height = targetSize.y; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; - desc.SampleDesc = { 1, 0 }; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + const D3D11_TEXTURE2D_DESC desc{ + .Width = targetSize.x, + .Height = targetSize.y, + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_B8G8R8A8_UNORM, + .SampleDesc = { 1, 0 }, + .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, + }; THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _customOffscreenTexture.addressof())); THROW_IF_FAILED(_device->CreateShaderResourceView(_customOffscreenTexture.get(), nullptr, _customOffscreenTextureView.addressof())); THROW_IF_FAILED(_device->CreateRenderTargetView(_customOffscreenTexture.get(), nullptr, _renderTargetView.addressof())); @@ -560,28 +563,29 @@ void BackendD3D::_recreateBackgroundColorBitmap(u16x2 cellCount) _backgroundBitmap.reset(); _backgroundBitmapView.reset(); - D3D11_TEXTURE2D_DESC desc{}; - desc.Width = cellCount.x; - desc.Height = cellCount.y; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; - desc.SampleDesc = { 1, 0 }; - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; + const D3D11_TEXTURE2D_DESC desc{ + .Width = cellCount.x, + .Height = cellCount.y, + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_R8G8B8A8_UNORM, + .SampleDesc = { 1, 0 }, + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, + }; THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _backgroundBitmap.addressof())); THROW_IF_FAILED(_device->CreateShaderResourceView(_backgroundBitmap.get(), nullptr, _backgroundBitmapView.addressof())); _backgroundBitmapGeneration = {}; } -void BackendD3D::_d2dRenderTargetUpdateFontSettings(const FontSettings& font) noexcept +void BackendD3D::_d2dRenderTargetUpdateFontSettings(const FontSettings& font) const noexcept { _d2dRenderTarget->SetDpi(font.dpi, font.dpi); _d2dRenderTarget->SetTextAntialiasMode(static_cast(font.antialiasingMode)); } -void BackendD3D::_recreateConstBuffer(const RenderingPayload& p) +void BackendD3D::_recreateConstBuffer(const RenderingPayload& p) const { { VSConstBuffer data; @@ -769,14 +773,15 @@ void BackendD3D::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) _glyphAtlasView.reset(); { - D3D11_TEXTURE2D_DESC desc{}; - desc.Width = u; - desc.Height = v; - desc.MipLevels = 1; - desc.ArraySize = 1; - desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; - desc.SampleDesc = { 1, 0 }; - desc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET; + const D3D11_TEXTURE2D_DESC desc{ + .Width = u, + .Height = v, + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_B8G8R8A8_UNORM, + .SampleDesc = { 1, 0 }, + .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, + }; THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _glyphAtlas.addressof())); THROW_IF_FAILED(_device->CreateShaderResourceView(_glyphAtlas.get(), nullptr, _glyphAtlasView.addressof())); } @@ -784,7 +789,7 @@ void BackendD3D::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) { const auto surface = _glyphAtlas.query(); - const D2D1_RENDER_TARGET_PROPERTIES props{ + static constexpr D2D1_RENDER_TARGET_PROPERTIES props{ .type = D2D1_RENDER_TARGET_TYPE_DEFAULT, .pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }, }; @@ -793,6 +798,7 @@ void BackendD3D::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) _d2dRenderTarget = renderTarget.query(); _d2dRenderTarget4 = renderTarget.try_query(); + _d2dRenderTarget->SetUnitMode(D2D1_UNIT_MODE_PIXELS); // We don't really use D2D for anything except DWrite, but it // can't hurt to ensure that everything it does is pixel aligned. _d2dRenderTarget->SetAntialiasMode(D2D1_ANTIALIAS_MODE_ALIASED); @@ -933,12 +939,13 @@ void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) _instanceBuffer.reset(); { - D3D11_BUFFER_DESC desc{}; - desc.ByteWidth = gsl::narrow(newSize); - desc.Usage = D3D11_USAGE_DYNAMIC; - desc.BindFlags = D3D11_BIND_VERTEX_BUFFER; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE; - desc.StructureByteStride = sizeof(QuadInstance); + const D3D11_BUFFER_DESC desc{ + .ByteWidth = gsl::narrow(newSize), + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, + .StructureByteStride = sizeof(QuadInstance), + }; THROW_IF_FAILED(_device->CreateBuffer(&desc, nullptr, _instanceBuffer.addressof())); } @@ -985,7 +992,7 @@ void BackendD3D::_drawText(RenderingPayload& p) u16 y = 0; for (const auto row : p.rows) { - const auto baselineY = y * p.d.font.cellSizeDIP.y + p.s->font->baselineInDIP; + const auto baselineY = y * p.s->font->cellSize.y + p.s->font->baseline; f32 cumulativeAdvance = 0; for (const auto& m : row->mappings) @@ -1001,8 +1008,8 @@ void BackendD3D::_drawText(RenderingPayload& p) if (entry.shadingType) { - const auto l = static_cast((cumulativeAdvance + row->glyphOffsets[x].advanceOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.x; - const auto t = static_cast((baselineY - row->glyphOffsets[x].ascenderOffset) * p.d.font.pixelPerDIP + 0.5f) + entry.offset.y; + const auto l = static_cast(cumulativeAdvance + row->glyphOffsets[x].advanceOffset + 0.5f) + entry.offset.x; + const auto t = static_cast(baselineY - row->glyphOffsets[x].ascenderOffset + 0.5f) + entry.offset.y; row->top = std::min(row->top, t); row->bottom = std::max(row->bottom, t + entry.size.y); _appendQuad({ static_cast(l), static_cast(t) }, entry.size, entry.texcoord, row->colors[x], static_cast(entry.shadingType)); @@ -1051,7 +1058,7 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f // by 3px to the left and would thus overlap it's neighbor to the left by 3px. `.bottom` is the same but for the descender. // `.right` and `.top` are not overlaps per se, but rather the distance to the right/top edge relative to the baseline origin. // The width of the glyph for instance is thus `.right - .left`. - const f32 fontScale = p.d.font.pixelPerDIP * glyphRun.fontEmSize / fontMetrics.designUnitsPerEm; + const f32 fontScale = glyphRun.fontEmSize / fontMetrics.designUnitsPerEm; const f32r box{ static_cast(glyphMetrics.leftSideBearing) * fontScale, static_cast(glyphMetrics.topSideBearing - glyphMetrics.verticalOriginY) * fontScale, @@ -1081,14 +1088,33 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f const auto r = lround(box.right) + 1; const auto b = lround(box.bottom) + 1; - stbrp_rect rect{}; - rect.w = r - l; - rect.h = b - t; + stbrp_rect rect{ + .w = r - l, + .h = b - t, + }; if (stbrp_pack_rects(&_rectPacker, &rect, 1)) { _d2dBeginDrawing(); - const D2D1_POINT_2F baseline{ (rect.x - l) * p.d.font.dipPerPixel, (rect.y - t) * p.d.font.dipPerPixel }; +#if ATLAS_DEBUG_COLORIZE_GLYPH_ATLAS + { + const auto d2dColor = colorFromU32(colorbrewer::pastel1[_colorizeGlyphAtlasCounter] | 0x3f000000); + _colorizeGlyphAtlasCounter = (_colorizeGlyphAtlasCounter + 1) % std::size(colorbrewer::pastel1); + + wil::com_ptr brush; + THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&d2dColor, nullptr, brush.addressof())); + + const D2D1_RECT_F rectF{ + static_cast(rect.x), + static_cast(rect.y), + static_cast(rect.x + rect.w), + static_cast(rect.y + rect.h), + }; + _d2dRenderTarget->FillRectangle(&rectF, brush.get()); + } +#endif + + const D2D1_POINT_2F baseline{ static_cast(rect.x - l), static_cast(rect.y - t) }; const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baseline, &glyphRun, _brush.get()); const auto shadingType = colorGlyph ? ShadingType::Passthrough : (p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale); @@ -1380,7 +1406,7 @@ void BackendD3D::_drawSelection(const RenderingPayload& p) } #if ATLAS_DEBUG_SHOW_DIRTY -void BackendD3D::_debugShowDirty(RenderingPayload& p) +void BackendD3D::_debugShowDirty(const RenderingPayload& p) { _presentRects[_presentRectsPos] = p.dirtyRectInPx; _presentRectsPos = (_presentRectsPos + 1) % std::size(_presentRects); @@ -1397,7 +1423,7 @@ void BackendD3D::_debugShowDirty(RenderingPayload& p) static_cast(rect.right - rect.left), static_cast(rect.bottom - rect.top), }; - const auto color = 0x1f000000 | colorbrewer::pastel1[i]; + const auto color = colorbrewer::pastel1[i] | 0x1f000000; _appendQuad(position, size, color, ShadingType::SolidFill); } } @@ -1405,31 +1431,33 @@ void BackendD3D::_debugShowDirty(RenderingPayload& p) #endif #if ATLAS_DEBUG_DUMP_RENDER_TARGET -void BackendD3D::_debugDumpRenderTarget(RenderingPayload& p) +void BackendD3D::_debugDumpRenderTarget(const RenderingPayload& p) { - const auto n = _dumpRenderTargetCounter.fetch_add(1, std::memory_order_relaxed); - - if (n == 0) + if (_dumpRenderTargetCounter == 0) { - ExpandEnvironmentStringsW(ATLAS_DEBUG_DUMP_RENDER_TARGET_PATH, &_dumpRenderTargetBasePath[0], std::size(_dumpRenderTargetBasePath)); + ExpandEnvironmentStringsW(ATLAS_DEBUG_DUMP_RENDER_TARGET_PATH, &_dumpRenderTargetBasePath[0], gsl::narrow_cast(std::size(_dumpRenderTargetBasePath))); std::filesystem::create_directories(_dumpRenderTargetBasePath); } wchar_t path[MAX_PATH]; - swprintf_s(path, L"%s\\%u_%08u.png", &_dumpRenderTargetBasePath[0], GetCurrentProcessId(), n); + swprintf_s(path, L"%s\\%u_%08zu.png", &_dumpRenderTargetBasePath[0], GetCurrentProcessId(), _dumpRenderTargetCounter); SaveTextureToPNG(_deviceContext.get(), _swapChainManager.GetBuffer().get(), p.s->font->dpi, &path[0]); + _dumpRenderTargetCounter++; } #endif void BackendD3D::_executeCustomShader(RenderingPayload& p) { { - CustomConstBuffer data; - data.time = std::chrono::duration(std::chrono::steady_clock::now() - _customShaderStartTime).count(); - data.scale = p.d.font.pixelPerDIP; - data.resolution.x = static_cast(_cellCount.x * p.s->font->cellSize.x); - data.resolution.y = static_cast(_cellCount.y * p.s->font->cellSize.y); - data.background = colorFromU32(p.s->misc->backgroundColor); + const CustomConstBuffer data{ + .time = std::chrono::duration(std::chrono::steady_clock::now() - _customShaderStartTime).count(), + .scale = static_cast(p.s->font->dpi) / static_cast(USER_DEFAULT_SCREEN_DPI), + .resolution = { + static_cast(_cellCount.x * p.s->font->cellSize.x), + static_cast(_cellCount.y * p.s->font->cellSize.y), + }, + .background = colorFromU32(p.s->misc->backgroundColor), + }; D3D11_MAPPED_SUBRESOURCE mapped{}; THROW_IF_FAILED(_deviceContext->Map(_customShaderConstantBuffer.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 6777404ef35..723bc69c25d 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -122,13 +122,13 @@ namespace Microsoft::Console::Render::Atlas __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); void _recreateCustomShader(const RenderingPayload& p); void _recreateCustomRenderTargetView(u16x2 targetSize); - void _d2dRenderTargetUpdateFontSettings(const FontSettings& font) noexcept; + void _d2dRenderTargetUpdateFontSettings(const FontSettings& font) const noexcept; void _recreateBackgroundColorBitmap(u16x2 cellCount); - void _recreateConstBuffer(const RenderingPayload& p); + void _recreateConstBuffer(const RenderingPayload& p) const; void _setupDeviceContextState(const RenderingPayload& p); void _debugUpdateShaders(const RenderingPayload& p) noexcept; - void _debugShowDirty(RenderingPayload& p); - void _debugDumpRenderTarget(RenderingPayload& p); + void _debugShowDirty(const RenderingPayload& p); + void _debugDumpRenderTarget(const RenderingPayload& p); void _d2dBeginDrawing() noexcept; void _d2dEndDrawing(); void _handleFontChangedResetGlyphAtlas(const RenderingPayload& p); @@ -235,8 +235,12 @@ namespace Microsoft::Console::Render::Atlas #endif #if ATLAS_DEBUG_DUMP_RENDER_TARGET - std::atomic _dumpRenderTargetCounter; - wchar_t _dumpRenderTargetBasePath[MAX_PATH]; + wchar_t _dumpRenderTargetBasePath[MAX_PATH]{}; + size_t _dumpRenderTargetCounter = 0; +#endif + +#if ATLAS_DEBUG_COLORIZE_GLYPH_ATLAS + size_t _colorizeGlyphAtlasCounter = 0; #endif #ifndef NDEBUG diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 42874c1f1da..92cbfcf4b05 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -299,11 +299,11 @@ namespace Microsoft::Console::Render::Atlas std::wstring fontName; std::vector fontFeatures; std::vector fontAxisValues; - f32 baselineInDIP = 0.0f; - f32 fontSizeInDIP = 0.0f; + f32 fontSize = 0; f32 advanceScale = 0; u16x2 cellSize; u16 fontWeight = 0; + u16 baseline = 0; u16 underlinePos = 0; u16 underlineWidth = 0; u16 strikethroughPos = 0; @@ -320,8 +320,7 @@ namespace Microsoft::Console::Render::Atlas u32 cursorColor = 0xffffffff; u16 cursorType = 0; - u8 heightPercentage = 20; - u8 _padding = 0; + u16 heightPercentage = 20; }; struct MiscellaneousSettings @@ -345,9 +344,6 @@ namespace Microsoft::Console::Render::Atlas struct FontDependents { Buffer textFormatAxes[2][2]; - f32 dipPerPixel = 1.0f; // caches USER_DEFAULT_SCREEN_DPI / dpi - f32 pixelPerDIP = 1.0f; // caches dpi / USER_DEFAULT_SCREEN_DPI - f32x2 cellSizeDIP; // caches cellSize in DIP }; struct Dependents From 6232dfddf3b9f13927f1a504af51624abbadffab Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 23 Mar 2023 16:57:19 +0100 Subject: [PATCH 11/37] Implement line renditions for BackendD2D --- src/renderer/atlas/AtlasEngine.cpp | 19 +++-- src/renderer/atlas/AtlasEngine.h | 2 + src/renderer/atlas/AtlasEngine.r.cpp | 2 +- src/renderer/atlas/Backend.h | 2 +- src/renderer/atlas/BackendD2D.cpp | 110 ++++++++++++++++++++++----- src/renderer/atlas/BackendD2D.h | 7 +- src/renderer/atlas/BackendD3D.cpp | 8 +- src/renderer/atlas/common.h | 28 ++----- 8 files changed, 123 insertions(+), 55 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 2744746ec1f..399c71e9f5a 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -175,13 +175,13 @@ try for (const auto r : _p.rows) { - r->top += deltaPx; - r->bottom += deltaPx; + r->dirtyTop += deltaPx; + r->dirtyBottom += deltaPx; if (y >= _api.invalidatedRows.x && y < _api.invalidatedRows.y) { - const auto clampedTop = clamp(r->top, 0, targetSizeY); - const auto clampedBottom = clamp(r->bottom, 0, targetSizeY); + const auto clampedTop = clamp(r->dirtyTop, 0, targetSizeY); + const auto clampedBottom = clamp(r->dirtyBottom, 0, targetSizeY); if (clampedTop != clampedBottom) { _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, clampedTop); @@ -244,6 +244,9 @@ CATCH_RETURN() [[nodiscard]] HRESULT AtlasEngine::PrepareLineTransform(const LineRendition lineRendition, const til::CoordType targetRow, const til::CoordType viewportLeft) noexcept { + const auto y = gsl::narrow_cast(clamp(targetRow, 0, _p.s->cellCount.y)); + _p.rows[y]->lineRendition = lineRendition; + _api.lineRendition = lineRendition; return S_OK; } @@ -294,9 +297,10 @@ try } { + const auto shift = _api.lineRendition >= LineRendition::DoubleWidth ? 1 : 0; const auto backgroundRow = _p.backgroundBitmap.begin() + static_cast(y) * _p.s->cellCount.x; auto it = backgroundRow + x; - const auto end = backgroundRow + column; + const auto end = backgroundRow + (column << shift); const auto bg = _api.currentColor.y; for (; it != end; ++it) @@ -318,9 +322,10 @@ CATCH_RETURN() [[nodiscard]] HRESULT AtlasEngine::PaintBufferGridLines(const GridLineSet lines, const COLORREF color, const size_t cchLine, const til::point coordTarget) noexcept try { + const auto shift = _api.lineRendition >= LineRendition::DoubleWidth ? 1 : 0; const auto y = gsl::narrow_cast(clamp(coordTarget.y, 0, _p.s->cellCount.y)); - const auto from = gsl::narrow_cast(clamp(coordTarget.x, 0, _p.s->cellCount.x - 1)); - const auto to = gsl::narrow_cast(clamp(coordTarget.x + cchLine, from, _p.s->cellCount.x)); + const auto from = gsl::narrow_cast(clamp(coordTarget.x << shift, 0, _p.s->cellCount.x - 1)); + const auto to = gsl::narrow_cast(clamp((coordTarget.x + cchLine) << shift, from, _p.s->cellCount.x)); const auto fg = gsl::narrow_cast(color) | 0xff000000; _p.rows[y]->gridLineRanges.emplace_back(lines, fg, from, to); return S_OK; diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 83a2c2b74f2..9066cd562d6 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -142,6 +142,8 @@ namespace Microsoft::Console::Render::Atlas u16 replacementCharacterGlyphIndex = 0; bool replacementCharacterLookedUp = false; + // PrepareLineTransform() + LineRendition lineRendition = LineRendition::SingleWidth; // UpdateDrawingBrushes() u32 backgroundOpaqueMixin = 0xff000000; u32x2 currentColor; diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 65a57594374..ecfc0a459c7 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -130,7 +130,7 @@ void AtlasEngine::_recreateBackend() auto d2dMode = ATLAS_DEBUG_FORCE_D2D_MODE; auto deviceFlags = D3D11_CREATE_DEVICE_SINGLETHREADED #ifndef NDEBUG - //| D3D11_CREATE_DEVICE_DEBUG + | D3D11_CREATE_DEVICE_DEBUG #endif // This flag prevents the driver from creating a large thread pool for things like shader computations // that would be advantageous for games. For us this has only a minimal performance benefit, diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index fdd1fdc2761..70915972aa4 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -19,7 +19,7 @@ namespace Microsoft::Console::Render::Atlas #define ATLAS_DEBUG_DISABLE_FRAME_LATENCY_WAITABLE_OBJECT 0 // Forces the use of Direct2D for text rendering (= BackendD2D). -#define ATLAS_DEBUG_FORCE_D2D_MODE 0 +#define ATLAS_DEBUG_FORCE_D2D_MODE 1 // Adds an artificial delay before every render pass. In milliseconds. #define ATLAS_DEBUG_RENDER_DELAY 0 diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index a860436782c..fcedb0b147a 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -4,8 +4,6 @@ #include "pch.h" #include "BackendD2D.h" -#include "wic.h" - #if ATLAS_DEBUG_SHOW_DIRTY #include "colorbrewer.h" #endif @@ -86,7 +84,11 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) _deviceContext->ClearState(); }); - if (!_renderTarget) + const auto renderTargetChanged = !_renderTarget; + const auto fontChanged = _fontGeneration != p.s->font.generation(); + const auto cellCountChanged = _cellCount != p.s->cellCount; + + if (renderTargetChanged) { { const auto surface = _swapChainManager.GetBuffer().query(); @@ -119,17 +121,14 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) THROW_IF_FAILED(p.d2dFactory->CreateStrokeStyle(&props, &dashes[0], 2, _dottedStrokeStyle.addressof())); } - const auto fontChanged = _fontGeneration != p.s->font.generation(); - const auto cellCountChanged = _cellCount != p.s->cellCount; - - if (fontChanged) + if (renderTargetChanged || fontChanged) { const auto dpi = static_cast(p.s->font->dpi); _renderTarget->SetDpi(dpi, dpi); _renderTarget->SetTextAntialiasMode(static_cast(p.s->font->antialiasingMode)); } - if (fontChanged || cellCountChanged) + if (renderTargetChanged || fontChanged || cellCountChanged) { const D2D1_BITMAP_PROPERTIES props{ .pixelFormat = { DXGI_FORMAT_R8G8B8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }, @@ -189,7 +188,47 @@ void BackendD2D::_drawText(RenderingPayload& p) u16 y = 0; for (const auto row : p.rows) { - f32 baselineX = 0.0f; + auto baselineX = 0.0f; + auto baselineY = static_cast(p.s->font->cellSize.y * y + p.s->font->baseline); + + for (const auto& m : row->mappings) + { + const DWRITE_GLYPH_RUN glyphRun{ + .fontFace = m.fontFace.get(), + .fontEmSize = m.fontEmSize, + .glyphCount = gsl::narrow_cast(m.glyphsTo - m.glyphsFrom), + .glyphIndices = &row->glyphIndices[m.glyphsFrom], + .glyphAdvances = &row->glyphAdvances[m.glyphsFrom], + .glyphOffsets = &row->glyphOffsets[m.glyphsFrom], + }; + + // _getGlyphRunBlackBox returns a rectangle based on the design metrics of the + // glyphs, which doesn't take antialiasing nor font hinting into consideration. + // Especially the latter can technically move the rasterized result around + // however it pleases. A padding of 5px should hopefully avoid most issues. + const auto blackBox = _getGlyphRunBlackBox(glyphRun, baselineX, baselineY); + if (row->lineRendition != LineRendition::DoubleHeightTop) + { + row->dirtyBottom = std::max(row->dirtyBottom, static_cast(lround(blackBox.bottom) + 5)); + } + if (row->lineRendition != LineRendition::DoubleHeightBottom) + { + row->dirtyTop = std::min(row->dirtyTop, static_cast(lround(blackBox.top) - 5)); + } + } + + const D2D1_RECT_F clipRect{ + 0, + static_cast(row->dirtyTop), + static_cast(p.s->targetSize.x), + static_cast(row->dirtyBottom), + }; + _renderTarget->PushAxisAlignedClip(&clipRect, D2D1_ANTIALIAS_MODE_ALIASED); + + if (row->lineRendition != LineRendition::SingleWidth) + { + baselineY = _drawTextPrepareLineRendition(p, baselineY, row->lineRendition); + } for (const auto& m : row->mappings) { @@ -197,7 +236,7 @@ void BackendD2D::_drawText(RenderingPayload& p) auto it = colorsBegin + m.glyphsFrom; const auto end = colorsBegin + m.glyphsTo; - do + while (it != end) { const auto beg = it; const auto off = it - colorsBegin; @@ -209,7 +248,6 @@ void BackendD2D::_drawText(RenderingPayload& p) const auto count = it - beg; const auto brush = _brushWithColor(fg); - const auto baselineY = static_cast(p.s->font->cellSize.y * y + p.s->font->baseline); const DWRITE_GLYPH_RUN glyphRun{ .fontFace = m.fontFace.get(), .fontEmSize = m.fontEmSize, @@ -221,23 +259,24 @@ void BackendD2D::_drawText(RenderingPayload& p) DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), { baselineX, baselineY }, &glyphRun, brush); - const auto blackBox = _getGlyphRunBlackBox(glyphRun, baselineX, baselineY); - // Add a 1px padding to avoid inaccuracies with the blackbox measurement. - // It's only an estimate based on the design size after all. - row->top = std::min(row->top, static_cast(lround(blackBox.top) - 1)); - row->bottom = std::max(row->bottom, static_cast(lround(blackBox.bottom) + 1)); - for (UINT32 i = 0; i < glyphRun.glyphCount; ++i) { baselineX += glyphRun.glyphAdvances[i]; } - } while (it != end); + } + } + + if (row->lineRendition != LineRendition::SingleWidth) + { + _drawTextResetLineRendition(); } + _renderTarget->PopAxisAlignedClip(); + if (y >= p.invalidatedRows.x && y < p.invalidatedRows.y) { - dirtyTop = std::min(dirtyTop, row->top); - dirtyBottom = std::max(dirtyBottom, row->bottom); + dirtyTop = std::min(dirtyTop, row->dirtyTop); + dirtyBottom = std::max(dirtyBottom, row->dirtyBottom); } ++y; @@ -250,6 +289,37 @@ void BackendD2D::_drawText(RenderingPayload& p) } } +f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const +{ + const auto descender = static_cast(p.s->font->cellSize.y - p.s->font->baseline); + D2D1_MATRIX_3X2_F transform{ + .m11 = 2.0f, + .m22 = 1.0f, + }; + + if (lineRendition >= LineRendition::DoubleHeightTop) + { + transform.m22 = 2.0f; + transform.dy = -1.0f * (baselineY + descender); + + if (lineRendition == LineRendition::DoubleHeightTop) + { + const auto delta = static_cast(p.s->font->cellSize.y); + baselineY += delta; + transform.dy -= delta; + } + } + + _renderTarget->SetTransform(&transform); + return baselineY; +} + +void BackendD2D::_drawTextResetLineRendition() const +{ + static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1.f, .m22 = 1.f }; + _renderTarget->SetTransform(&identity); +} + // Returns the theoretical/design design size of the given `DWRITE_GLYPH_RUN`, relative the the given baseline origin. // This algorithm replicates what DirectWrite does internally to provide `IDWriteTextLayout::GetMetrics`. f32r BackendD2D::_getGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY) diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index 8b0ded86056..ad9947bed05 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -19,6 +19,8 @@ namespace Microsoft::Console::Render::Atlas __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p) noexcept; void _drawText(RenderingPayload& p); + f32 _drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const; + void _drawTextResetLineRendition() const; f32r _getGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); @@ -37,15 +39,16 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _renderTarget; wil::com_ptr _renderTarget4; // Optional. Supported since Windows 10 14393. - wil::com_ptr _brush; wil::com_ptr _dottedStrokeStyle; wil::com_ptr _backgroundBitmap; wil::com_ptr _backgroundBrush; til::generation_t _backgroundBitmapGeneration; - Buffer _glyphMetrics; + wil::com_ptr _brush; u32 _brushColor = 0; + Buffer _glyphMetrics; + til::generation_t _generation; til::generation_t _fontGeneration; u16x2 _cellCount; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 5cc637a703b..6ee09cf059c 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -1010,8 +1010,8 @@ void BackendD3D::_drawText(RenderingPayload& p) { const auto l = static_cast(cumulativeAdvance + row->glyphOffsets[x].advanceOffset + 0.5f) + entry.offset.x; const auto t = static_cast(baselineY - row->glyphOffsets[x].ascenderOffset + 0.5f) + entry.offset.y; - row->top = std::min(row->top, t); - row->bottom = std::max(row->bottom, t + entry.size.y); + row->dirtyTop = std::min(row->dirtyTop, t); + row->dirtyBottom = std::max(row->dirtyBottom, t + entry.size.y); _appendQuad({ static_cast(l), static_cast(t) }, entry.size, entry.texcoord, row->colors[x], static_cast(entry.shadingType)); } } @@ -1019,8 +1019,8 @@ void BackendD3D::_drawText(RenderingPayload& p) if (y >= p.invalidatedRows.x && y < p.invalidatedRows.y) { - dirtyTop = std::min(dirtyTop, row->top); - dirtyBottom = std::max(dirtyBottom, row->bottom); + dirtyTop = std::min(dirtyTop, row->dirtyTop); + dirtyBottom = std::max(dirtyBottom, row->dirtyBottom); } ++y; diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 92cbfcf4b05..854c7001576 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -377,24 +377,11 @@ namespace Microsoft::Console::Render::Atlas glyphOffsets.clear(); colors.clear(); gridLineRanges.clear(); + lineRendition = LineRendition::SingleWidth; selectionFrom = 0; selectionTo = 0; - top = y * cellHeight; - bottom = top + cellHeight; - } - - friend void swap(ShapedRow& lhs, ShapedRow& rhs) noexcept - { - std::swap(lhs.mappings, rhs.mappings); - std::swap(lhs.glyphIndices, rhs.glyphIndices); - std::swap(lhs.glyphAdvances, rhs.glyphAdvances); - std::swap(lhs.glyphOffsets, rhs.glyphOffsets); - std::swap(lhs.colors, rhs.colors); - std::swap(lhs.gridLineRanges, rhs.gridLineRanges); - std::swap(lhs.selectionFrom, rhs.selectionFrom); - std::swap(lhs.selectionTo, rhs.selectionTo); - std::swap(lhs.top, rhs.top); - std::swap(lhs.bottom, rhs.bottom); + dirtyTop = y * cellHeight; + dirtyBottom = dirtyTop + cellHeight; } std::vector mappings; @@ -403,10 +390,11 @@ namespace Microsoft::Console::Render::Atlas std::vector glyphOffsets; // same size as glyphIndices std::vector colors; // same size as glyphIndices std::vector gridLineRanges; + LineRendition lineRendition = LineRendition::SingleWidth; u16 selectionFrom = 0; u16 selectionTo = 0; - til::CoordType top = 0; - til::CoordType bottom = 0; + til::CoordType dirtyTop = 0; + til::CoordType dirtyBottom = 0; }; struct RenderingPayload @@ -436,8 +424,8 @@ namespace Microsoft::Console::Render::Atlas Buffer rowsScratch; Buffer rows; Buffer backgroundBitmap; - // 1 ensures that the backends redraw the background, even if the background - // is entirely black, just like `backgroundBitmap` after it gets created. + // 1 ensures that the backends redraw the background, even if the background is + // entirely black, just like `backgroundBitmap` is all back after it gets created. til::generation_t backgroundBitmapGeneration{ 1 }; til::rect dirtyRectInPx; u16x2 invalidatedRows; From da40a01ac0844decbaf4258ea4f450864d749c6b Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Sat, 25 Mar 2023 23:06:47 +0100 Subject: [PATCH 12/37] Fix D2D emoji rendering, Add support for line renditions --- src/renderer/atlas/AtlasEngine.api.cpp | 30 +- src/renderer/atlas/AtlasEngine.cpp | 1 + src/renderer/atlas/Backend.cpp | 17 +- src/renderer/atlas/Backend.h | 2 +- src/renderer/atlas/BackendD2D.cpp | 67 +++-- src/renderer/atlas/BackendD2D.h | 2 +- src/renderer/atlas/BackendD3D.cpp | 396 +++++++++++++++++-------- src/renderer/atlas/BackendD3D.h | 57 ++-- src/renderer/atlas/common.h | 3 +- src/renderer/atlas/stb_rect_pack.cpp | 7 + src/tools/RenderingTests/main.cpp | 171 ++++++++--- tools/ConsoleTypes.natvis | 6 +- 12 files changed, 507 insertions(+), 252 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index fb40bf34510..75b279d6c77 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -700,15 +700,6 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo if (fontMetrics) { std::wstring fontName{ requestedFaceName }; - const auto fontWeightU16 = gsl::narrow_cast(requestedWeight); - const auto baselineU16 = gsl::narrow_cast(baseline); - const auto underlinePosU16 = gsl::narrow_cast(underlinePos); - const auto underlineWidthU16 = gsl::narrow_cast(underlineWidth); - const auto strikethroughPosU16 = gsl::narrow_cast(strikethroughPos); - const auto strikethroughWidthU16 = gsl::narrow_cast(strikethroughWidth); - const auto doubleUnderlinePosTopU16 = gsl::narrow_cast(doubleUnderlinePosTop); - const auto doubleUnderlinePosBottomU16 = gsl::narrow_cast(doubleUnderlinePosBottom); - const auto thinLineWidthU16 = gsl::narrow_cast(thinLineWidth); // NOTE: From this point onward no early returns or throwing code should exist, // as we might cause _api to be in an inconsistent state otherwise. @@ -718,14 +709,17 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo fontMetrics->fontName = std::move(fontName); fontMetrics->fontSize = fontSizeInPx; fontMetrics->advanceScale = cellWidth / advanceWidth; - fontMetrics->cellSize = { cellWidth, cellHeight }; - fontMetrics->fontWeight = fontWeightU16; - fontMetrics->baseline = baselineU16; - fontMetrics->underlinePos = underlinePosU16; - fontMetrics->underlineWidth = underlineWidthU16; - fontMetrics->strikethroughPos = strikethroughPosU16; - fontMetrics->strikethroughWidth = strikethroughWidthU16; - fontMetrics->doubleUnderlinePos = { doubleUnderlinePosTopU16, doubleUnderlinePosBottomU16 }; - fontMetrics->thinLineWidth = thinLineWidthU16; + fontMetrics->cellSize.x = cellWidth; + fontMetrics->cellSize.y = cellHeight; + fontMetrics->fontWeight = gsl::narrow_cast(requestedWeight); + fontMetrics->baseline = static_cast(baseline); + fontMetrics->descender = static_cast(cellHeight - fontMetrics->baseline); + fontMetrics->underlinePos = static_cast(underlinePos); + fontMetrics->underlineWidth = static_cast(underlineWidth); + fontMetrics->strikethroughPos = static_cast(strikethroughPos); + fontMetrics->strikethroughWidth = static_cast(strikethroughWidth); + fontMetrics->doubleUnderlinePos.x = static_cast(doubleUnderlinePosTop); + fontMetrics->doubleUnderlinePos.y = static_cast(doubleUnderlinePosBottom); + fontMetrics->thinLineWidth = static_cast(thinLineWidth); } } diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 399c71e9f5a..e58fdb8c4d5 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -158,6 +158,7 @@ try til::CoordTypeMin, til::CoordTypeMin, }; + _p.invalidatedRows = _api.invalidatedRows; _p.cursorRect = {}; _p.scrollOffset = _api.scrollOffset; diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index b0fc1366a0d..24a511e23a8 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -184,12 +184,6 @@ bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRend if (d2dRenderTarget4) { - D2D_MATRIX_3X2_F transform; - d2dRenderTarget4->GetTransform(&transform); - f32 dpiX, dpiY; - d2dRenderTarget4->GetDpi(&dpiX, &dpiY); - transform = transform * D2D1::Matrix3x2F::Scale(dpiX, dpiY); - // Support for ID2D1DeviceContext4 implies support for IDWriteFactory4. // ID2D1DeviceContext4 is required for drawing below. hr = dwriteFactory4->TranslateColorGlyphRun(baselineOrigin, glyphRun, nullptr, formats, measuringMode, nullptr, 0, &enumerator); @@ -241,6 +235,11 @@ bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRend runBrush = solidBrush.get(); } + const D2D1_POINT_2F runOrigin{ + colorGlyphRun->baselineOriginX, + colorGlyphRun->baselineOriginY, + }; + switch (colorGlyphRun->glyphImageFormat) { case DWRITE_GLYPH_IMAGE_FORMATS_NONE: @@ -249,13 +248,13 @@ bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRend case DWRITE_GLYPH_IMAGE_FORMATS_JPEG: case DWRITE_GLYPH_IMAGE_FORMATS_TIFF: case DWRITE_GLYPH_IMAGE_FORMATS_PREMULTIPLIED_B8G8R8A8: - d2dRenderTarget4->DrawColorBitmapGlyphRun(colorGlyphRun->glyphImageFormat, baselineOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->measuringMode, D2D1_COLOR_BITMAP_GLYPH_SNAP_OPTION_DEFAULT); + d2dRenderTarget4->DrawColorBitmapGlyphRun(colorGlyphRun->glyphImageFormat, runOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->measuringMode, D2D1_COLOR_BITMAP_GLYPH_SNAP_OPTION_DEFAULT); break; case DWRITE_GLYPH_IMAGE_FORMATS_SVG: - d2dRenderTarget4->DrawSvgGlyphRun(baselineOrigin, &colorGlyphRun->glyphRun, runBrush, nullptr, 0, colorGlyphRun->measuringMode); + d2dRenderTarget4->DrawSvgGlyphRun(runOrigin, &colorGlyphRun->glyphRun, runBrush, nullptr, 0, colorGlyphRun->measuringMode); break; default: - d2dRenderTarget4->DrawGlyphRun(baselineOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->glyphRunDescription, runBrush, colorGlyphRun->measuringMode); + d2dRenderTarget4->DrawGlyphRun(runOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->glyphRunDescription, runBrush, colorGlyphRun->measuringMode); break; } } diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 70915972aa4..fdd1fdc2761 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -19,7 +19,7 @@ namespace Microsoft::Console::Render::Atlas #define ATLAS_DEBUG_DISABLE_FRAME_LATENCY_WAITABLE_OBJECT 0 // Forces the use of Direct2D for text rendering (= BackendD2D). -#define ATLAS_DEBUG_FORCE_D2D_MODE 1 +#define ATLAS_DEBUG_FORCE_D2D_MODE 0 // Adds an artificial delay before every render pass. In milliseconds. #define ATLAS_DEBUG_RENDER_DELAY 0 diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index fcedb0b147a..321dabb7488 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -191,30 +191,45 @@ void BackendD2D::_drawText(RenderingPayload& p) auto baselineX = 0.0f; auto baselineY = static_cast(p.s->font->cellSize.y * y + p.s->font->baseline); - for (const auto& m : row->mappings) + if (y >= p.invalidatedRows.x && y < p.invalidatedRows.y) { - const DWRITE_GLYPH_RUN glyphRun{ - .fontFace = m.fontFace.get(), - .fontEmSize = m.fontEmSize, - .glyphCount = gsl::narrow_cast(m.glyphsTo - m.glyphsFrom), - .glyphIndices = &row->glyphIndices[m.glyphsFrom], - .glyphAdvances = &row->glyphAdvances[m.glyphsFrom], - .glyphOffsets = &row->glyphOffsets[m.glyphsFrom], - }; - - // _getGlyphRunBlackBox returns a rectangle based on the design metrics of the - // glyphs, which doesn't take antialiasing nor font hinting into consideration. - // Especially the latter can technically move the rasterized result around - // however it pleases. A padding of 5px should hopefully avoid most issues. - const auto blackBox = _getGlyphRunBlackBox(glyphRun, baselineX, baselineY); - if (row->lineRendition != LineRendition::DoubleHeightTop) - { - row->dirtyBottom = std::max(row->dirtyBottom, static_cast(lround(blackBox.bottom) + 5)); - } - if (row->lineRendition != LineRendition::DoubleHeightBottom) + for (const auto& m : row->mappings) { - row->dirtyTop = std::min(row->dirtyTop, static_cast(lround(blackBox.top) - 5)); + const DWRITE_GLYPH_RUN glyphRun{ + .fontFace = m.fontFace.get(), + .fontEmSize = m.fontEmSize, + .glyphCount = gsl::narrow_cast(m.glyphsTo - m.glyphsFrom), + .glyphIndices = &row->glyphIndices[m.glyphsFrom], + .glyphAdvances = &row->glyphAdvances[m.glyphsFrom], + .glyphOffsets = &row->glyphOffsets[m.glyphsFrom], + }; + + // _getGlyphRunBlackBox returns a rectangle based on the design metrics of the + // glyphs, which doesn't take antialiasing nor font hinting into consideration. + // Especially the latter can technically move the rasterized result around + // however it pleases. A padding of 5px should hopefully avoid most issues. + // + // Technically ID2D1DeviceContext::GetGlyphRunWorldBounds would be the proper approach + // here, because it returns the exact (pixel accurate) boundaries of the glyph run. + // But that function is way, *way* more expensive than anything else, to the point + // its useless. To put numbers on it, it's about >20x more costly to call than + // DrawGlyphRun below, even though the function puts like half a million glyphs per + // second on the screen, filling hundreds of millions of pixels in the process. + const auto blackBox = _getGlyphRunDesignBounds(glyphRun, 0.0f, baselineY); + // We exclude setting the top/bottom dirty height for DECDHL double height rows, + // because DECDHL intentionally cuts off their bottom/top half respectively. + if (row->lineRendition != LineRendition::DoubleHeightTop) + { + row->dirtyBottom = std::max(row->dirtyBottom, static_cast(lround(blackBox.bottom) + 5)); + } + if (row->lineRendition != LineRendition::DoubleHeightBottom) + { + row->dirtyTop = std::min(row->dirtyTop, static_cast(lround(blackBox.top) - 5)); + } } + + dirtyTop = std::min(dirtyTop, row->dirtyTop); + dirtyBottom = std::max(dirtyBottom, row->dirtyBottom); } const D2D1_RECT_F clipRect{ @@ -273,12 +288,6 @@ void BackendD2D::_drawText(RenderingPayload& p) _renderTarget->PopAxisAlignedClip(); - if (y >= p.invalidatedRows.x && y < p.invalidatedRows.y) - { - dirtyTop = std::min(dirtyTop, row->dirtyTop); - dirtyBottom = std::max(dirtyBottom, row->dirtyBottom); - } - ++y; } @@ -316,13 +325,13 @@ f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 bas void BackendD2D::_drawTextResetLineRendition() const { - static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1.f, .m22 = 1.f }; + static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1, .m22 = 1 }; _renderTarget->SetTransform(&identity); } // Returns the theoretical/design design size of the given `DWRITE_GLYPH_RUN`, relative the the given baseline origin. // This algorithm replicates what DirectWrite does internally to provide `IDWriteTextLayout::GetMetrics`. -f32r BackendD2D::_getGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY) +f32r BackendD2D::_getGlyphRunDesignBounds(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY) { DWRITE_FONT_METRICS fontMetrics; glyphRun.fontFace->GetMetrics(&fontMetrics); diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index ad9947bed05..b7a53b41f7b 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -21,7 +21,7 @@ namespace Microsoft::Console::Render::Atlas void _drawText(RenderingPayload& p); f32 _drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const; void _drawTextResetLineRendition() const; - f32r _getGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); + __declspec(noinline) f32r _getGlyphRunDesignBounds(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursor(const RenderingPayload& p); diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 6ee09cf059c..ffec4223252 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -47,7 +47,7 @@ BackendD3D::GlyphCacheMap::~GlyphCacheMap() BackendD3D::GlyphCacheMap& BackendD3D::GlyphCacheMap::operator=(GlyphCacheMap&& other) noexcept { _map = std::exchange(other._map, {}); - _mapMask = std::exchange(other._mapMask, 0); + _mask = std::exchange(other._mask, 0); _capacity = std::exchange(other._capacity, 0); _size = std::exchange(other._size, 0); return *this; @@ -55,89 +55,129 @@ BackendD3D::GlyphCacheMap& BackendD3D::GlyphCacheMap::operator=(GlyphCacheMap&& void BackendD3D::GlyphCacheMap::Clear() noexcept { - if (_size) + for (auto& entry : _map) { - for (auto& entry : _map) + if (entry.key.fontFace) { - if (entry.fontFace) - { - // I'm pretty sure Release() doesn't throw exceptions. + // I'm pretty sure Release() doesn't throw exceptions. #pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'Release()' which may throw exceptions (f.6). - entry.fontFace->Release(); - entry.fontFace = nullptr; - } + entry.key.fontFace->Release(); + entry.key.fontFace = nullptr; } } + + _size = 0; } -BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::FindOrInsert(IDWriteFontFace* fontFace, u16 glyphIndex, bool& inserted) +BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::FindOrInsert(const GlyphCacheKey& key, bool& inserted) { - const auto hash = _hash(fontFace, glyphIndex); + // Putting this into the Find() path is a little pessimistic, but it + // allows us to default-construct this hashmap with a size of 0. + if (_size >= _capacity) + { + _bumpSize(); + } + const auto hash = _hash(key); for (auto i = hash;; ++i) { - auto& entry = _map[i & _mapMask]; - if (entry.fontFace == fontFace && entry.glyphIndex == glyphIndex) + auto& entry = _map[i & _mask]; + if (entry.key == key) { inserted = false; return entry; } - if (!entry.fontFace) + if (!entry.key.fontFace) { + ++_size; + entry.key = key; + entry.key.fontFace->AddRef(); inserted = true; - return _insert(fontFace, glyphIndex, hash); + return entry; } } } -size_t BackendD3D::GlyphCacheMap::_hash(IDWriteFontFace* fontFace, u16 glyphIndex) noexcept +size_t BackendD3D::GlyphCacheMap::_hash(const GlyphCacheKey& key) noexcept { - // MSVC 19.33 produces surprisingly good assembly for this without stack allocation. - const uintptr_t data[2]{ std::bit_cast(fontFace), glyphIndex }; - return til::hash(&data[0], sizeof(data)); + //auto h = UINT64_C(0xcafef00dd15ea5e5); + //h = (h ^ key.hashField1) * UINT64_C(6364136223846793005) + UINT64_C(1442695040888963407); + //h = (h ^ key.hashField2) * UINT64_C(6364136223846793005) + UINT64_C(1442695040888963407); + //const int r = h & 63; + //const auto x = static_cast(h >> 32) ^ static_cast(h); + //return _rotl(x, r); + return til::hash(&key, GlyphCacheKeyDataSize); } -BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::_insert(IDWriteFontFace* fontFace, u16 glyphIndex, size_t hash) +void BackendD3D::GlyphCacheMap::_bumpSize() { - if (_size >= _capacity) + // The following block of code may be used to assess the quality of the hash function. + // The displacement is the distance between the ideal slot the hash value points at to + // the slot the value actually ended up in. A low displacement is not everything however, + // and the size and performance of the hash function is just as important. +#if 0 + if (_size) { - _bumpSize(); - } - - ++_size; + size_t displacementMax = 0; + size_t displacementTotal = 0; + size_t actualSlot = 0; - for (auto i = hash;; ++i) - { - auto& entry = _map[i & _mapMask]; - if (!entry.fontFace) + for (const auto& entry : _map) { - entry.fontFace = fontFace; - entry.glyphIndex = glyphIndex; - entry.fontFace->AddRef(); - return entry; + if (entry.key.fontFace) + { + const auto idealSlot = _hash(entry.key) & _mask; + size_t displacement = actualSlot - idealSlot; + + // A hash near the end of the map may wrap around to the beginning. + // This if condition will fix the displacement in that case. + if (actualSlot < idealSlot) + { + displacement += _map.size(); + } + + if (displacement > displacementMax) + { + displacementMax = displacement; + displacementTotal += displacement; + } + } + + actualSlot++; } + + const auto displacementAvg = static_cast(displacementTotal) / static_cast(_size); + wchar_t buffer[128]; + swprintf_s(buffer, L"GlyphCacheMap resize at %zu, max. displacement: %zu, avg. displacement: %f%%\r\n", _map.size(), displacementMax, displacementAvg); + OutputDebugStringW(&buffer[0]); } -} +#endif -void BackendD3D::GlyphCacheMap::_bumpSize() -{ - const auto newMapSize = _map.size() * 2; - const auto newMapMask = newMapSize - 1; + const auto newSize = std::max(256, _map.size() * 2); + const auto newMask = newSize - 1; static constexpr auto sizeLimit = std::numeric_limits::max() / 2; - THROW_HR_IF_MSG(E_OUTOFMEMORY, newMapSize >= sizeLimit, "GlyphCacheMap overflow"); + THROW_HR_IF_MSG(E_OUTOFMEMORY, newSize >= sizeLimit, "GlyphCacheMap overflow"); - auto newMap = Buffer(newMapSize); + auto newMap = Buffer(newSize); - for (const auto& entry : _map) + for (const auto& oldEntry : _map) { - const auto newHash = _hash(entry.fontFace, entry.glyphIndex); - newMap[newHash & newMapMask] = entry; + const auto hash = _hash(oldEntry.key); + for (auto i = hash;; ++i) + { + auto& newEntry = newMap[i & newMask]; + if (!newEntry.key.fontFace) + { + newEntry = oldEntry; + break; + } + } } _map = std::move(newMap); - _mapMask = newMapMask; - _capacity = newMapSize / 2; + _mask = newMask; + _capacity = newSize / 2; } BackendD3D::BackendD3D(wil::com_ptr device, wil::com_ptr deviceContext) : @@ -365,14 +405,7 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) if (fontChanged) { - DWrite_GetRenderParams(p.dwriteFactory.get(), &_gamma, &_cleartypeEnhancedContrast, &_grayscaleEnhancedContrast, _textRenderingParams.put()); - // Clearing the atlas requires BeginDraw(), which is expensive. Defer this until we need Direct2D anyways. - _fontChangedResetGlyphAtlas = true; - - if (_d2dRenderTarget) - { - _d2dRenderTargetUpdateFontSettings(*p.s->font); - } + _updateFontDependents(p); } if (cellCountChanged) @@ -400,6 +433,19 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) _cellCount = p.s->cellCount; } +void BackendD3D::_updateFontDependents(const RenderingPayload& p) +{ + DWrite_GetRenderParams(p.dwriteFactory.get(), &_gamma, &_cleartypeEnhancedContrast, &_grayscaleEnhancedContrast, _textRenderingParams.put()); + // Clearing the atlas requires BeginDraw(), which is expensive. Defer this until we need Direct2D anyways. + _fontChangedResetGlyphAtlas = true; + _textShadingType = p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale; + + if (_d2dRenderTarget) + { + _d2dRenderTargetUpdateFontSettings(*p.s->font); + } +} + void BackendD3D::_recreateCustomShader(const RenderingPayload& p) { _customRenderTargetView.reset(); @@ -851,7 +897,7 @@ void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 col _bumpInstancesSize(); } - _instances[_instancesCount++] = QuadInstance{ position, size, texcoord, static_cast(shadingType), color }; + _instances[_instancesCount++] = QuadInstance{ position, size, texcoord, shadingType, color }; } void BackendD3D::_bumpInstancesSize() @@ -992,28 +1038,39 @@ void BackendD3D::_drawText(RenderingPayload& p) u16 y = 0; for (const auto row : p.rows) { + f32 baselineX = 0; const auto baselineY = y * p.s->font->cellSize.y + p.s->font->baseline; - f32 cumulativeAdvance = 0; + GlyphCacheKey key{ .lineRendition = static_cast(row->lineRendition) }; + const auto lineRenditionScale = static_cast(row->lineRendition != LineRendition::SingleWidth); for (const auto& m : row->mappings) { - for (auto x = m.glyphsFrom; x < m.glyphsTo; cumulativeAdvance += row->glyphAdvances[x], ++x) + key.fontFace = m.fontFace.get(); + + for (auto x = m.glyphsFrom; x < m.glyphsTo; ++x) { + key.glyphIndex = row->glyphIndices[x]; bool inserted = false; - auto& entry = _glyphCache.FindOrInsert(m.fontFace.get(), row->glyphIndices[x], inserted); + auto& entry = _glyphCache.FindOrInsert(key, inserted); if (inserted) { _drawGlyph(p, entry, m.fontEmSize); } - if (entry.shadingType) + if (entry.data.shadingType != ShadingType::Default) { - const auto l = static_cast(cumulativeAdvance + row->glyphOffsets[x].advanceOffset + 0.5f) + entry.offset.x; - const auto t = static_cast(baselineY - row->glyphOffsets[x].ascenderOffset + 0.5f) + entry.offset.y; + auto l = static_cast(baselineX + row->glyphOffsets[x].advanceOffset + 0.5f) + entry.data.offset.x; + const auto t = static_cast(baselineY - row->glyphOffsets[x].ascenderOffset + 0.5f) + entry.data.offset.y; + + l <<= lineRenditionScale; + row->dirtyTop = std::min(row->dirtyTop, t); - row->dirtyBottom = std::max(row->dirtyBottom, t + entry.size.y); - _appendQuad({ static_cast(l), static_cast(t) }, entry.size, entry.texcoord, row->colors[x], static_cast(entry.shadingType)); + row->dirtyBottom = std::max(row->dirtyBottom, t + entry.data.size.y); + + _appendQuad({ static_cast(l), static_cast(t) }, entry.data.size, entry.data.texcoord, row->colors[x], entry.data.shadingType); } + + baselineX += row->glyphAdvances[x]; } } @@ -1035,15 +1092,13 @@ void BackendD3D::_drawText(RenderingPayload& p) _d2dEndDrawing(); } -#pragma warning(disable : 4189) - void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) { const DWRITE_GLYPH_RUN glyphRun{ - .fontFace = entry.fontFace, + .fontFace = entry.key.fontFace, .fontEmSize = fontEmSize, .glyphCount = 1, - .glyphIndices = &entry.glyphIndex, + .glyphIndices = &entry.key.glyphIndex, }; DWRITE_FONT_METRICS fontMetrics; @@ -1052,14 +1107,26 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f DWRITE_GLYPH_METRICS glyphMetrics; glyphRun.fontFace->GetDesignGlyphMetrics(glyphRun.glyphIndices, glyphRun.glyphCount, &glyphMetrics, false); - // This calculates the black box of the glyph, or in other words, it's extents/size relative to its baseline origin (at 0,0). - // The algorithm below is a reverse engineered variant of `IDWriteTextLayout::GetMetrics`. The coordinates will be in pixel - // and the positive direction will be bottom/right. A `.left` of -3px would indicate that the glyph overlaps it's bounding box - // by 3px to the left and would thus overlap it's neighbor to the left by 3px. `.bottom` is the same but for the descender. - // `.right` and `.top` are not overlaps per se, but rather the distance to the right/top edge relative to the baseline origin. - // The width of the glyph for instance is thus `.right - .left`. + // This calculates the black box of the glyph, or in other words, it's extents/size relative to its baseline + // origin (at 0,0). The algorithm below is a reverse engineered variant of `IDWriteTextLayout::GetMetrics`. + // The coordinates will be in pixels and the positive direction will be bottom/right. + // + // box.top --------++-----######--+ + // (-7) || ############ + // ||#### #### + // |### ##### + // baseline _____ |### #####| + // origin \ |############# | + // (= 0,0) \||########### | + // ++-------###---+ + // ## ### | + // box.bottom -----+#########-----+ + // (+2) | | + // box.left box.right + // (-1) (+14) + // const f32 fontScale = glyphRun.fontEmSize / fontMetrics.designUnitsPerEm; - const f32r box{ + f32r box{ static_cast(glyphMetrics.leftSideBearing) * fontScale, static_cast(glyphMetrics.topSideBearing - glyphMetrics.verticalOriginY) * fontScale, static_cast(static_cast(glyphMetrics.advanceWidth) - glyphMetrics.rightSideBearing) * fontScale, @@ -1071,69 +1138,152 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f { // This will indicate to `BackendD3D::_drawText` that this glyph is whitespace. It's important to set this member, // because `GlyphCacheMap` does not zero out inserted entries and `shadingType` might still contain "garbage". - entry.shadingType = 0; + entry.data.shadingType = ShadingType::Default; return; } - bool retry = false; - for (;;) + const auto isDoubleHeight = static_cast(entry.key.lineRendition) >= LineRendition::DoubleHeightTop; + + std::optional transform; + if (entry.key.lineRendition) { - // We'll add a 1px padding on all 4 sides to avoid neighboring glyphs from overlapping, - // since the blackbox measurement is only an estimate based on the design metrics. - // We need to use round (and not ceil/floor) to ensure we pixel-snap individual - // glyphs correctly and form a consistent baseline across an entire run of glyphs. - // Also, ClearType might draw (rounded) up to 1.2px away from the design outline. - const auto l = lround(box.left) - 1; - const auto t = lround(box.top) - 1; - const auto r = lround(box.right) + 1; - const auto b = lround(box.bottom) + 1; + auto& t = transform.emplace(); + t.m11 = 2.0f; + t.m22 = isDoubleHeight ? 2.0f : 1.0f; + + box.left *= t.m11; + box.top *= t.m22; + box.right *= t.m11; + box.bottom *= t.m22; + } - stbrp_rect rect{ - .w = r - l, - .h = b - t, - }; - if (stbrp_pack_rects(&_rectPacker, &rect, 1)) - { - _d2dBeginDrawing(); + // To take anti-aliasing of the borders into account, we'll add a 1px padding on all 4 sides. + // This doesn't work however if font hinting causes the pixels to be offset from the design outline. + // We need to use round (and not ceil/floor) to ensure we pixel-snap individual + // glyphs correctly and form a consistent baseline across an entire run of glyphs. + const auto bl = lround(box.left) - 1; + const auto bt = lround(box.top) - 1; + const auto br = lround(box.right) + 1; + const auto bb = lround(box.bottom) + 1; + + stbrp_rect rect{ + .w = br - bl, + .h = bb - bt, + }; + if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) + { + _drawGlyphRetry(p, rect); + } -#if ATLAS_DEBUG_COLORIZE_GLYPH_ATLAS - { - const auto d2dColor = colorFromU32(colorbrewer::pastel1[_colorizeGlyphAtlasCounter] | 0x3f000000); - _colorizeGlyphAtlasCounter = (_colorizeGlyphAtlasCounter + 1) % std::size(colorbrewer::pastel1); + _d2dBeginDrawing(); - wil::com_ptr brush; - THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&d2dColor, nullptr, brush.addressof())); + D2D1_POINT_2F baseline{ + static_cast(rect.x - bl), + static_cast(rect.y - bt), + }; + D2D1_RECT_F clipRect{ + static_cast(rect.x), + static_cast(rect.y), + static_cast(rect.x + rect.w), + static_cast(rect.y + rect.h), + }; + _d2dRenderTarget->PushAxisAlignedClip(&clipRect, D2D1_ANTIALIAS_MODE_ALIASED); - const D2D1_RECT_F rectF{ - static_cast(rect.x), - static_cast(rect.y), - static_cast(rect.x + rect.w), - static_cast(rect.y + rect.h), - }; - _d2dRenderTarget->FillRectangle(&rectF, brush.get()); - } +#if ATLAS_DEBUG_COLORIZE_GLYPH_ATLAS + { + const auto d2dColor = colorFromU32(colorbrewer::pastel1[_colorizeGlyphAtlasCounter] | 0x3f000000); + _colorizeGlyphAtlasCounter = (_colorizeGlyphAtlasCounter + 1) % std::size(colorbrewer::pastel1); + wil::com_ptr brush; + THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&d2dColor, nullptr, brush.addressof())); + _d2dRenderTarget->FillRectangle(&clipRect, brush.get()); + } #endif - const D2D1_POINT_2F baseline{ static_cast(rect.x - l), static_cast(rect.y - t) }; - const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baseline, &glyphRun, _brush.get()); - const auto shadingType = colorGlyph ? ShadingType::Passthrough : (p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale); - - entry.shadingType = static_cast(shadingType); - entry.offset.x = l; - entry.offset.y = t; - entry.size.x = rect.w; - entry.size.y = rect.h; - entry.texcoord.x = rect.x; - entry.texcoord.y = rect.y; - return; + if (transform) + { + auto& t = *transform; + t.dx = (1.0f - t.m11) * baseline.x; + t.dy = (1.0f - t.m22) * baseline.y; + _d2dRenderTarget->SetTransform(&t); + } + + const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baseline, &glyphRun, _brush.get()); + + entry.data.offset.x = bl; + entry.data.offset.y = bt; + entry.data.size.x = rect.w; + entry.data.size.y = rect.h; + entry.data.texcoord.x = rect.x; + entry.data.texcoord.y = rect.y; + entry.data.shadingType = colorGlyph ? ShadingType::Passthrough : _textShadingType; + + if (transform) + { + static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1, .m22 = 1 }; + _d2dRenderTarget->SetTransform(&identity); + + if (isDoubleHeight) + { + _splitDoubleHeightGlyph(p, entry); } + } + + _d2dRenderTarget->PopAxisAlignedClip(); +} + +void BackendD3D::_drawGlyphRetry(const RenderingPayload& p, stbrp_rect& rect) +{ + _d2dEndDrawing(); + _flushQuads(p); + _resetGlyphAtlasAndBeginDraw(p); + + if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) + { + THROW_HR_MSG(E_UNEXPECTED, "BackendD3D::_drawGlyph deadlock"); + } +} + +// If this is a double-height glyph (DECDHL), we need to split it into 2 glyph entries: +// One for the top half and one for the bottom half, because that's how DECDHL works. +// This will clip `entry` to only contain the top/bottom half (as specified by `entry.key.lineRendition`) +// and create a second entry in our glyph cache hashmap that contains the other half. +void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, GlyphCacheEntry& entry) +{ + static constexpr auto lrTop = static_cast(LineRendition::DoubleHeightTop); + static constexpr auto lrBottom = static_cast(LineRendition::DoubleHeightBottom); + + // Twice the line height, twice the descender gap. For both. + entry.data.offset.y -= p.s->font->descender; + + const auto isTop = entry.key.lineRendition == lrTop; + const auto altRendition = isTop ? lrBottom : lrTop; + const auto topSize = clamp(-entry.data.offset.y - p.s->font->baseline, 0, static_cast(entry.data.size.y)); - THROW_HR_IF_MSG(E_UNEXPECTED, retry, "BackendD3D::_drawGlyph deadlock"); + bool inserted; + auto key2 = entry.key; + key2.lineRendition = altRendition; + auto& entry2 = _glyphCache.FindOrInsert(key2, inserted); + entry2.data = entry.data; - _d2dEndDrawing(); - _flushQuads(p); - _resetGlyphAtlasAndBeginDraw(p); - retry = true; + auto& top = isTop ? entry : entry2; + auto& bottom = isTop ? entry2 : entry; + + top.data.offset.y += p.s->font->cellSize.y; + top.data.size.y = topSize; + + bottom.data.offset.y += topSize; + bottom.data.size.y = std::max(0, bottom.data.size.y - topSize); + bottom.data.texcoord.y += topSize; + + // Things like diacritics might be so small that they only exist on either half of the + // double-height row. This effectively turns the other (unneeded) side into whitespace. + if (!top.data.size.y) + { + top.data.shadingType = ShadingType::Default; + } + if (!bottom.data.size.y) + { + bottom.data.shadingType = ShadingType::Default; } } diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 723bc69c25d..dce80dd67fb 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -57,12 +57,13 @@ namespace Microsoft::Console::Render::Atlas enum class ShadingType : u32 { + Default = 0, Background = 0, - TextGrayscale, - TextClearType, - Passthrough, - DashedLine, - SolidFill, + TextGrayscale = 1, + TextClearType = 2, + Passthrough = 3, + DashedLine = 4, + SolidFill = 5, }; struct QuadInstance @@ -75,21 +76,42 @@ namespace Microsoft::Console::Render::Atlas alignas(sizeof(i16x2)) i16x2 position; alignas(sizeof(i16x2)) u16x2 size; alignas(sizeof(i16x2)) u16x2 texcoord; - alignas(sizeof(u32)) u32 shadingType = 0; + alignas(sizeof(u32)) ShadingType shadingType = ShadingType::Default; alignas(sizeof(u32)) u32 color = 0; }; - struct GlyphCacheEntry + struct GlyphCacheKey { // BODGY: The IDWriteFontFace results from us calling IDWriteFontFallback::MapCharacters // which at the time of writing returns the same IDWriteFontFace as long as someone is // holding a reference / the reference count doesn't drop to 0 (see ActiveFaceCache). + // This allows us to hash the value of the pointer as if it was uniquely identifying the font face. IDWriteFontFace* fontFace = nullptr; - u16 glyphIndex = 0; - u16 shadingType = 0; + u16 glyphIndex; + u16 lineRendition; + + constexpr bool operator==(const GlyphCacheKey& rhs) const noexcept + { + return __builtin_memcmp(this, &rhs, GlyphCacheKeyDataSize) == 0; + } + }; + + // Due to padding on 64-Bit systems, sizeof(GlyphCacheKey) will be 16, + // but the actual contents of the struct still only be 12 bytes. + static constexpr size_t GlyphCacheKeyDataSize = sizeof(GlyphCacheKey::fontFace) + 2 * sizeof(u16); + + struct GlyphCacheData + { i16x2 offset; u16x2 size; u16x2 texcoord; + ShadingType shadingType = ShadingType::Default; + }; + + struct GlyphCacheEntry + { + GlyphCacheKey key; + GlyphCacheData data; }; struct GlyphCacheMap @@ -104,22 +126,20 @@ namespace Microsoft::Console::Render::Atlas GlyphCacheMap& operator=(GlyphCacheMap&& other) noexcept; void Clear() noexcept; - GlyphCacheEntry& FindOrInsert(IDWriteFontFace* fontFace, u16 glyphIndex, bool& inserted); + GlyphCacheEntry& FindOrInsert(const GlyphCacheKey& key, bool& inserted); private: - static size_t _hash(IDWriteFontFace* fontFace, u16 glyphIndex) noexcept; - GlyphCacheEntry& _insert(IDWriteFontFace* fontFace, u16 glyphIndex, size_t hash); + static size_t _hash(const GlyphCacheKey& key) noexcept; void _bumpSize(); - static constexpr u32 initialSize = 256; - - Buffer _map{ initialSize }; - size_t _mapMask = initialSize - 1; - size_t _capacity = initialSize / 2; + Buffer _map; + size_t _mask = 0; + size_t _capacity = 0; size_t _size = 0; }; __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); + void _updateFontDependents(const RenderingPayload& p); void _recreateCustomShader(const RenderingPayload& p); void _recreateCustomRenderTargetView(u16x2 targetSize); void _d2dRenderTargetUpdateFontSettings(const FontSettings& font) const noexcept; @@ -143,6 +163,8 @@ namespace Microsoft::Console::Render::Atlas void _drawBackground(const RenderingPayload& p); void _drawText(RenderingPayload& p); __declspec(noinline) void _drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize); + __declspec(noinline) void _drawGlyphRetry(const RenderingPayload& p, stbrp_rect& rect); + void _splitDoubleHeightGlyph(const RenderingPayload& p, GlyphCacheEntry& entry); void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursorPart1(const RenderingPayload& p); @@ -216,6 +238,7 @@ namespace Microsoft::Console::Render::Atlas til::generation_t _miscGeneration; u16x2 _targetSize; u16x2 _cellCount; + ShadingType _textShadingType = ShadingType::Default; // An empty-box cursor spanning a wide glyph that has different // background colors on each side results in 6 lines being drawn. diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 854c7001576..eee789072f5 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -43,8 +43,6 @@ namespace Microsoft::Console::Render::Atlas } #define ATLAS_POD_OPS(type) \ - constexpr auto operator<=>(const type&) const noexcept = default; \ - \ constexpr bool operator==(const type& rhs) const noexcept \ { \ return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0; \ @@ -304,6 +302,7 @@ namespace Microsoft::Console::Render::Atlas u16x2 cellSize; u16 fontWeight = 0; u16 baseline = 0; + u16 descender = 0; u16 underlinePos = 0; u16 underlineWidth = 0; u16 strikethroughPos = 0; diff --git a/src/renderer/atlas/stb_rect_pack.cpp b/src/renderer/atlas/stb_rect_pack.cpp index 306f747a7f5..e90d9b988f9 100644 --- a/src/renderer/atlas/stb_rect_pack.cpp +++ b/src/renderer/atlas/stb_rect_pack.cpp @@ -3,5 +3,12 @@ #include "pch.h" +// AtlasEngine doesn't use stb_rect_pack efficiently right now and packs rectangles one by one, +// because this simplifies the text rendering implementation quite a bit. On the flip side however, +// this allows us to skip sorting rectangles, because sorting arrays of size 1 is pointless. +#pragma warning(disable : 4505) // '...': unreferenced function with internal linkage has been removed +#define STBRP_SORT(_Base, _NumOfElements, _SizeOfElements, _CompareFunction) \ + assert(_NumOfElements == 1) + #define STB_RECT_PACK_IMPLEMENTATION #include "stb_rect_pack.h" diff --git a/src/tools/RenderingTests/main.cpp b/src/tools/RenderingTests/main.cpp index e64d4c131c0..e64fd7511f0 100644 --- a/src/tools/RenderingTests/main.cpp +++ b/src/tools/RenderingTests/main.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -54,45 +55,35 @@ namespace #define defer const auto _DEFER_CONCAT(_defer_, __LINE__) = ::detail::scope_guard_helper() + [&]() } +static void printUTF16(const wchar_t* str) +{ + WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE), str, static_cast(wcslen(str)), nullptr, nullptr); +} + // wprintf() in the uCRT prints every single wchar_t individually and thus breaks surrogate // pairs apart which Windows Terminal treats as invalid input and replaces it with U+FFFD. static void printfUTF16(_In_z_ _Printf_format_string_ wchar_t const* const format, ...) { - wchar_t buffer[128]; + std::array buffer; va_list args; va_start(args, format); - const auto length = _vsnwprintf_s(buffer, _countof(buffer), _TRUNCATE, format, args); + const auto length = _vsnwprintf_s(buffer.data(), buffer.size(), _TRUNCATE, format, args); va_end(args); - WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE), buffer, length, nullptr, nullptr); -} - -static void writeAsciiWithAttribute(WORD attribute, const wchar_t* text) -{ - const auto outputHandle = GetStdHandle(STD_OUTPUT_HANDLE); - const auto length = static_cast(wcslen(text)); - - CONSOLE_SCREEN_BUFFER_INFO info{}; - GetConsoleScreenBufferInfo(outputHandle, &info); - - WORD attributes[128]; - std::fill_n(&attributes[0], length, static_cast(attribute | FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED)); - - DWORD numberOfAttrsWritten; - WriteConsoleW(outputHandle, text, length, nullptr, nullptr); - WriteConsoleOutputAttribute(outputHandle, attributes, length, info.dwCursorPosition, &numberOfAttrsWritten); + assert(length >= 0); + WriteConsoleW(GetStdHandle(STD_OUTPUT_HANDLE), buffer.data(), length, nullptr, nullptr); } static void wait() { - printfUTF16(L"\r\nPress any key to continue..."); + printUTF16(L"\x1B[9999;1HPress any key to continue..."); _getch(); } static void clear() { - printfUTF16( + printUTF16( L"\x1B[H" // move cursor to 0,0 L"\x1B[2J" // clear screen ); @@ -109,67 +100,149 @@ int main() SetConsoleMode(outputHandle, consoleMode); }; - printfUTF16( + printUTF16( L"\x1b[?1049h" // enable alternative screen buffer - L"\x1B[H" // move cursor to 0,0 ); defer { - printfUTF16( + printUTF16( L"\x1b[?1049l" // disable alternative screen buffer ); }; { - struct Test + struct ConsoleAttributeTest { - WORD attribute = 0; const wchar_t* text = nullptr; + WORD attribute = 0; }; - static constexpr Test tests[]{ -#define MAKE_TEST_FOR_ATTRIBUTE(attr) Test{ attr, L## #attr } + static constexpr ConsoleAttributeTest consoleAttributeTests[]{ + { L"Console attributes:", 0 }, +#define MAKE_TEST_FOR_ATTRIBUTE(attr) { L## #attr, attr } MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_GRID_HORIZONTAL), MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_GRID_LVERTICAL), MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_GRID_RVERTICAL), MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_REVERSE_VIDEO), MAKE_TEST_FOR_ATTRIBUTE(COMMON_LVB_UNDERSCORE), #undef MAKE_TEST_FOR_ATTRIBUTE + { L"all gridlines", COMMON_LVB_GRID_HORIZONTAL | COMMON_LVB_GRID_LVERTICAL | COMMON_LVB_GRID_RVERTICAL | COMMON_LVB_UNDERSCORE }, + { L"all attributes", COMMON_LVB_GRID_HORIZONTAL | COMMON_LVB_GRID_LVERTICAL | COMMON_LVB_GRID_RVERTICAL | COMMON_LVB_REVERSE_VIDEO | COMMON_LVB_UNDERSCORE }, + }; + + SHORT row = 2; + for (const auto& t : consoleAttributeTests) + { + const auto length = static_cast(wcslen(t.text)); + printfUTF16(L"\x1B[%d;5H%s", row + 1, t.text); + + WORD attributes[32]; + std::fill_n(&attributes[0], length, static_cast(FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED | t.attribute)); + + DWORD numberOfAttrsWritten; + WriteConsoleOutputAttribute(outputHandle, attributes, length, { 4, row }, &numberOfAttrsWritten); + + row += 2; + } + + struct VTAttributeTest + { + const wchar_t* text = nullptr; + int sgr = 0; + }; + static constexpr VTAttributeTest vtAttributeTests[]{ + { L"ANSI escape SGR:", 0 }, + { L"italic", 3 }, + { L"underline", 4 }, + { L"reverse", 7 }, + { L"strikethrough", 9 }, + { L"double underline", 21 }, + { L"overlined", 53 }, }; - for (const auto& t : tests) + row = 3; + for (const auto& t : vtAttributeTests) { - printfUTF16(L"\r\n "); - writeAsciiWithAttribute(t.attribute, t.text); - printfUTF16(L"\r\n "); + printfUTF16(L"\x1B[%d;45H\x1b[%dm%s\x1b[m", row, t.sgr, t.text); + row += 2; } + + printfUTF16(L"\x1B[%d;45H\x1b]8;;https://example.com\x1b\\hyperlink\x1b]8;;\x1b\\", row); + + wait(); + clear(); } - wait(); - clear(); + { + printUTF16( + L"\x1B[3;5HDECDWL Double Width \U0001FAE0 A\u0353\u0353 B\u036F\u036F" + L"\x1B[4;5H\x1b#6DECDWL Double Width \U0001FAE0 A\u0353\u0353 B\u036F\u036F" + L"\x1B[8;5HDECDHL Double Height \U0001F642\U0001F6C1 A\u0353\u0353 B\u036F\u036F X\u0353\u0353 Y\u036F\u036F" + L"\x1B[9;5H\x1b#3DECDHL Double Height Top \U0001F642 A\u0353\u0353 B\u036F\u036F" + L"\x1B[10;5H\x1b#4DECDHL Double Height Bottom \U0001F6C1 X\u0353\u0353 Y\u036F\u036F"); + + wait(); + clear(); + } { - struct Test + defer { - WORD sgr = 0; - const wchar_t* name = nullptr; - }; - static constexpr Test tests[]{ - { 3, L"italic" }, - { 4, L"underline" }, - { 7, L"reverse" }, - { 9, L"strikethrough" }, - { 21, L"double underline" }, - { 53, L"overlined" }, + // Setting an empty DRCS gets us back to the regular font. + printUTF16(L"\x1bP1;1;2{ @\x1b\\"); }; - for (const auto& t : tests) + const auto glyph = + " W W " + " W W " + " W W W " + " W W W " + " W W W " + " W W W TTTTTTT " + " W W T " + " T " + " T " + " T " + " T " + " T "; + + // Convert the above visual glyph to sixels + wchar_t rows[2][15]; + for (int r = 0; r < 2; ++r) { - printfUTF16(L"\r\n \x1b[%dm%s \\x1b[%dm\x1b[m\r\n ", t.sgr, t.name, t.sgr); + const auto glyphData = &glyph[r * 15 * 6]; + + for (int x = 0; x < 15; ++x) + { + unsigned int accumulator = 0; + for (int y = 5; y >= 0; --y) + { + const auto isSet = glyphData[y * 15 + x] != ' '; + accumulator <<= 1; + accumulator |= static_cast(isSet); + } + + rows[r][x] = static_cast(L'?' + accumulator); + } } - printfUTF16(L"\r\n \x1b]8;;https://example.com\x1b\\hyperlink \\x1b]8;;https://example.com\\x1b\\\\hyperlink\\x1b]8;;\\x1b\\\\\x1b]8;;\x1b\\\r\n "); + printfUTF16( + // * Pfn | font number | 1 | + // * Pcn | starting character | 3 | = ASCII 0x23 "#" + // * Pe | erase control | 2 | erase all + // Pcmw | character matrix width | 0 | 15 pixels + // Pw | font width | 0 | 80 columns + // Pt | text or full cell | 0 | text + // Pcmh | character matrix height | 0 | 12 pixels + // Pcss | character set size | 0 | 94 + // * Dscs | character set name | " @" | unregistered soft set + L"\x1bP1;3;2{ @%.15s/%.15s\x1b\\", + rows[0], + rows[1]); + + printUTF16(L"\x1B[3;5HDECDLD glyph \"WT\": \x1b( @#\x1b(A"); + + wait(); } - wait(); return 0; } diff --git a/tools/ConsoleTypes.natvis b/tools/ConsoleTypes.natvis index 317371f7d52..6cd822766c2 100644 --- a/tools/ConsoleTypes.natvis +++ b/tools/ConsoleTypes.natvis @@ -124,11 +124,11 @@ - {{ top={top}, bottom={bottom} }} + {{ dirtyTop={dirtyTop}, dirtyBottom={dirtyBottom} }} - (empty) - {(void*)fontFace}, {glyphIndex} + (empty) + {(void*)key.fontFace}, {key.glyphIndex} From 4879a36a362219041a28d1ac38283a5777ac92aa Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 28 Mar 2023 00:50:04 +0200 Subject: [PATCH 13/37] Fix glyph measurements, Fix font axis support, Begin implementing soft fonts --- src/renderer/atlas/AtlasEngine.api.cpp | 42 ++++++++----- src/renderer/atlas/AtlasEngine.cpp | 75 +++++++++++++++-------- src/renderer/atlas/AtlasEngine.h | 14 +---- src/renderer/atlas/BackendD2D.cpp | 49 +++++++-------- src/renderer/atlas/BackendD2D.h | 4 +- src/renderer/atlas/BackendD3D.cpp | 83 +++++++++++--------------- src/renderer/atlas/BackendD3D.h | 5 +- src/renderer/atlas/common.h | 64 +++++++++++++------- 8 files changed, 182 insertions(+), 154 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index 75b279d6c77..3ac4e1dd7ec 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -162,6 +162,10 @@ constexpr HRESULT vec2_narrow(U x, U y, vec2& out) noexcept [[nodiscard]] HRESULT AtlasEngine::UpdateSoftFont(const std::span bitPattern, const til::size cellSize, const size_t centeringHint) noexcept { + const auto softFont = _api.s.write()->font.write(); + softFont->softFontPattern = std::vector(bitPattern.begin(), bitPattern.end()); + softFont->softFontCellSize = cellSize; + softFont->softFontCenteringHint = centeringHint; return S_OK; } @@ -395,7 +399,7 @@ void AtlasEngine::SetRetroTerminalEffect(bool enable) noexcept void AtlasEngine::SetSelectionBackground(const COLORREF color, const float alpha) noexcept { - const u32 selectionColor = (color & 0xffffff) | gsl::narrow_cast(std::lroundf(alpha * 255.0f)) << 24; + const u32 selectionColor = (color & 0xffffff) | gsl::narrow_cast(lrintf(alpha * 255.0f)) << 24; if (_api.s->misc->selectionColor != selectionColor) { _api.s.write()->misc.write()->selectionColor = selectionColor; @@ -678,8 +682,8 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo // Our cells can't overlap each other so we additionally clamp the bottom line to be inside the cell boundaries. doubleUnderlinePosBottom = std::min(doubleUnderlinePosBottom, adjustedHeight - thinLineWidth); - const auto cellWidth = gsl::narrow(std::lroundf(adjustedWidth)); - const auto cellHeight = gsl::narrow(std::lroundf(adjustedHeight)); + const auto cellWidth = gsl::narrow(lrintf(adjustedWidth)); + const auto cellHeight = gsl::narrow(lrintf(adjustedHeight)); { til::size coordSize; @@ -691,7 +695,7 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo // The coordSizeUnscaled parameter to SetFromEngine is used for API functions like GetConsoleFontSize. // Since clients expect that settings the font height to Y yields back a font height of Y, // we're scaling the X relative/proportional to the actual cellWidth/cellHeight ratio. - requestedSize.width = gsl::narrow_cast(std::lroundf(fontSize / cellHeight * cellWidth)); + requestedSize.width = gsl::narrow_cast(lrintf(fontSize / cellHeight * cellWidth)); } fontInfo.SetFromEngine(requestedFaceName, requestedFamily, requestedWeight, false, coordSize, requestedSize); @@ -700,6 +704,16 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo if (fontMetrics) { std::wstring fontName{ requestedFaceName }; + const auto fontWeightU16 = gsl::narrow_cast(requestedWeight); + const auto baselineU16 = static_cast(baseline + 0.5f); + const auto descenderU16 = gsl::narrow_cast(cellHeight - baselineU16); + const auto underlinePosU16 = static_cast(underlinePos + 0.5f); + const auto underlineWidthU16 = static_cast(underlineWidth + 0.5f); + const auto strikethroughPosU16 = static_cast(strikethroughPos + 0.5f); + const auto strikethroughWidthU16 = static_cast(strikethroughWidth + 0.5f); + const auto doubleUnderlinePosTopU16 = static_cast(doubleUnderlinePosTop + 0.5f); + const auto doubleUnderlinePosBottomU16 = static_cast(doubleUnderlinePosBottom + 0.5f); + const auto thinLineWidthU16 = static_cast(thinLineWidth + 0.5f); // NOTE: From this point onward no early returns or throwing code should exist, // as we might cause _api to be in an inconsistent state otherwise. @@ -711,15 +725,15 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo fontMetrics->advanceScale = cellWidth / advanceWidth; fontMetrics->cellSize.x = cellWidth; fontMetrics->cellSize.y = cellHeight; - fontMetrics->fontWeight = gsl::narrow_cast(requestedWeight); - fontMetrics->baseline = static_cast(baseline); - fontMetrics->descender = static_cast(cellHeight - fontMetrics->baseline); - fontMetrics->underlinePos = static_cast(underlinePos); - fontMetrics->underlineWidth = static_cast(underlineWidth); - fontMetrics->strikethroughPos = static_cast(strikethroughPos); - fontMetrics->strikethroughWidth = static_cast(strikethroughWidth); - fontMetrics->doubleUnderlinePos.x = static_cast(doubleUnderlinePosTop); - fontMetrics->doubleUnderlinePos.y = static_cast(doubleUnderlinePosBottom); - fontMetrics->thinLineWidth = static_cast(thinLineWidth); + fontMetrics->fontWeight = fontWeightU16; + fontMetrics->baseline = baselineU16; + fontMetrics->descender = descenderU16; + fontMetrics->underlinePos = underlinePosU16; + fontMetrics->underlineWidth = underlineWidthU16; + fontMetrics->strikethroughPos = strikethroughPosU16; + fontMetrics->strikethroughWidth = strikethroughWidthU16; + fontMetrics->doubleUnderlinePos.x = doubleUnderlinePosTopU16; + fontMetrics->doubleUnderlinePos.y = doubleUnderlinePosBottomU16; + fontMetrics->thinLineWidth = thinLineWidthU16; } } diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index e58fdb8c4d5..dbfb9bf2eb7 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -37,7 +37,7 @@ AtlasEngine::AtlasEngine() _p.dwriteFactory4 = _p.dwriteFactory.try_query(); THROW_IF_FAILED(_p.dwriteFactory->GetSystemFontFallback(_p.systemFontFallback.addressof())); - _p.systemFontFallback = _p.systemFontFallback.try_query(); + _p.systemFontFallback1 = _p.systemFontFallback.try_query(); wil::com_ptr textAnalyzer; THROW_IF_FAILED(_p.dwriteFactory->CreateTextAnalyzer(textAnalyzer.addressof())); @@ -246,7 +246,7 @@ CATCH_RETURN() [[nodiscard]] HRESULT AtlasEngine::PrepareLineTransform(const LineRendition lineRendition, const til::CoordType targetRow, const til::CoordType viewportLeft) noexcept { const auto y = gsl::narrow_cast(clamp(targetRow, 0, _p.s->cellCount.y)); - _p.rows[y]->lineRendition = lineRendition; + _p.rows[y]->lineRendition = static_cast(lineRendition); _api.lineRendition = lineRendition; return S_OK; } @@ -301,7 +301,7 @@ try const auto shift = _api.lineRendition >= LineRendition::DoubleWidth ? 1 : 0; const auto backgroundRow = _p.backgroundBitmap.begin() + static_cast(y) * _p.s->cellCount.x; auto it = backgroundRow + x; - const auto end = backgroundRow + (column << shift); + const auto end = backgroundRow + (static_cast(column) << shift); const auto bg = _api.currentColor.y; for (; it != end; ++it) @@ -418,10 +418,10 @@ try if (!isSettingDefaultBrushes) { const u32x2 newColors{ gsl::narrow_cast(fg), gsl::narrow_cast(bg) }; - const AtlasKeyAttributes attributes{ - .bold = textAttributes.IsIntense() && renderSettings.GetRenderMode(RenderSettings::Mode::IntenseIsBold), - .italic = textAttributes.IsItalic() - }; + + auto attributes = FontRelevantAttributes::None; + WI_SetFlagIf(attributes, FontRelevantAttributes::Bold, textAttributes.IsIntense() && renderSettings.GetRenderMode(RenderSettings::Mode::IntenseIsBold)); + WI_SetFlagIf(attributes, FontRelevantAttributes::Italic, textAttributes.IsItalic()); if (_api.attributes != attributes) { @@ -476,7 +476,18 @@ void AtlasEngine::_handleSettingsUpdate() void AtlasEngine::_recreateFontDependentResources() { - if (!_p.s->font->fontAxisValues.empty()) + _api.replacementCharacterFontFace.reset(); + _api.replacementCharacterGlyphIndex = 0; + _api.replacementCharacterLookedUp = false; + + if (_p.s->font->fontAxisValues.empty()) + { + for (auto& axes : _api.textFormatAxes) + { + axes = {}; + } + } + else { // See AtlasEngine::UpdateFont. // It hardcodes indices 0/1/2 in fontAxisValues to the weight/italic/slant axes. @@ -486,24 +497,19 @@ void AtlasEngine::_recreateFontDependentResources() const auto& standardAxes = _p.s->font->fontAxisValues; auto fontAxisValues = _p.s->font->fontAxisValues; - for (auto italic = 0; italic < 2; ++italic) + for (size_t i = 0; i < 4; ++i) { - for (auto bold = 0; bold < 2; ++bold) - { - // The wght axis defaults to the font weight. - fontAxisValues[0].value = bold ? DWRITE_FONT_WEIGHT_BOLD : (isnan(standardAxes[0].value) ? static_cast(_p.s->font->fontWeight) : standardAxes[0].value); - // The ital axis defaults to 1 if this is italic and 0 otherwise. - fontAxisValues[1].value = italic ? 1.0f : (isnan(standardAxes[1].value) ? 0.0f : standardAxes[1].value); - // The slnt axis defaults to -12 if this is italic and 0 otherwise. - fontAxisValues[2].value = italic ? -12.0f : (isnan(standardAxes[2].value) ? 0.0f : standardAxes[2].value); - _p.d.font.textFormatAxes[italic][bold] = { fontAxisValues.data(), fontAxisValues.size() }; - } + const auto bold = (i & static_cast(FontRelevantAttributes::Bold)) != 0; + const auto italic = (i & static_cast(FontRelevantAttributes::Italic)) != 0; + // The wght axis defaults to the font weight. + fontAxisValues[0].value = bold ? DWRITE_FONT_WEIGHT_BOLD : (isnan(standardAxes[0].value) ? static_cast(_p.s->font->fontWeight) : standardAxes[0].value); + // The ital axis defaults to 1 if this is italic and 0 otherwise. + fontAxisValues[1].value = italic ? 1.0f : (isnan(standardAxes[1].value) ? 0.0f : standardAxes[1].value); + // The slnt axis defaults to -12 if this is italic and 0 otherwise. + fontAxisValues[2].value = italic ? -12.0f : (isnan(standardAxes[2].value) ? 0.0f : standardAxes[2].value); + _api.textFormatAxes[i] = { fontAxisValues.data(), fontAxisValues.size() }; } } - - _api.replacementCharacterFontFace.reset(); - _api.replacementCharacterGlyphIndex = 0; - _api.replacementCharacterLookedUp = false; } void AtlasEngine::_recreateCellCountDependentResources() @@ -622,7 +628,7 @@ void AtlasEngine::_flushBufferLine() void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* mappedLength, float* scale, IDWriteFontFace** mappedFontFace) const { TextAnalysisSource analysisSource{ text, textLength }; - const auto& textFormatAxis = _p.d.font.textFormatAxes[_api.attributes.italic][_api.attributes.bold]; + const auto& textFormatAxis = _api.textFormatAxes[static_cast(_api.attributes)]; if (textFormatAxis) { @@ -640,8 +646,8 @@ void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* } else { - const auto baseWeight = _api.attributes.bold ? DWRITE_FONT_WEIGHT_BOLD : static_cast(_p.s->font->fontWeight); - const auto baseStyle = _api.attributes.italic ? DWRITE_FONT_STYLE_ITALIC : DWRITE_FONT_STYLE_NORMAL; + const auto baseWeight = WI_IsFlagSet(_api.attributes, FontRelevantAttributes::Bold) ? DWRITE_FONT_WEIGHT_BOLD : static_cast(_p.s->font->fontWeight); + const auto baseStyle = WI_IsFlagSet(_api.attributes, FontRelevantAttributes::Italic) ? DWRITE_FONT_STYLE_ITALIC : DWRITE_FONT_STYLE_NORMAL; wil::com_ptr font; THROW_IF_FAILED(_p.systemFontFallback->MapCharacters( @@ -812,6 +818,23 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) { + // TODO soft fonts +#if 0 + if (!_p.s->font->softFontPattern.empty()) + { + for (u32 i = from; i < to; ++i) + { + const auto ch = _api.bufferLine[from]; + if (ch >= 0xEF20 && ch < 0xEF80) + { + } + } + + const auto initialIndicesCount = row.glyphIndices.size(); + row.mappings.emplace_back(_api.replacementCharacterFontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(row.glyphIndices.size()), FontRendition::SoftFont); + } +#endif + if (!_api.replacementCharacterLookedUp) { bool succeeded = false; diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 9066cd562d6..a9d37fed617 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -78,14 +78,6 @@ namespace Microsoft::Console::Render::Atlas void UpdateHyperlinkHoveredId(uint16_t hoveredId) noexcept override; private: - struct AtlasKeyAttributes - { - bool bold = false; - bool italic = false; - - ATLAS_POD_OPS(AtlasKeyAttributes) - }; - // AtlasEngine.cpp __declspec(noinline) void _handleSettingsUpdate(); void _recreateFontDependentResources(); @@ -130,6 +122,7 @@ namespace Microsoft::Console::Render::Atlas std::vector bufferLineColumn; Buffer colorsForeground; + std::array, 4> textFormatAxes; std::vector analysisResults; Buffer clusterMap; Buffer textProps; @@ -147,7 +140,7 @@ namespace Microsoft::Console::Render::Atlas // UpdateDrawingBrushes() u32 backgroundOpaqueMixin = 0xff000000; u32x2 currentColor; - AtlasKeyAttributes attributes{}; + FontRelevantAttributes attributes = FontRelevantAttributes::None; u16x2 lastPaintBufferLineCoord; // UpdateHyperlinkHoveredId() u16 hyperlinkHoveredId = 0; @@ -159,9 +152,6 @@ namespace Microsoft::Console::Render::Atlas u16x2 invalidatedRows = invalidatedRowsNone; // x is treated as "top" and y as "bottom" i16 scrollOffset = 0; } _api; - -#undef ATLAS_POD_OPS -#undef ATLAS_FLAG_OPS }; } diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 321dabb7488..888c3c1bed9 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -204,27 +204,24 @@ void BackendD2D::_drawText(RenderingPayload& p) .glyphOffsets = &row->glyphOffsets[m.glyphsFrom], }; - // _getGlyphRunBlackBox returns a rectangle based on the design metrics of the - // glyphs, which doesn't take antialiasing nor font hinting into consideration. - // Especially the latter can technically move the rasterized result around - // however it pleases. A padding of 5px should hopefully avoid most issues. - // - // Technically ID2D1DeviceContext::GetGlyphRunWorldBounds would be the proper approach - // here, because it returns the exact (pixel accurate) boundaries of the glyph run. - // But that function is way, *way* more expensive than anything else, to the point - // its useless. To put numbers on it, it's about >20x more costly to call than - // DrawGlyphRun below, even though the function puts like half a million glyphs per - // second on the screen, filling hundreds of millions of pixels in the process. - const auto blackBox = _getGlyphRunDesignBounds(glyphRun, 0.0f, baselineY); - // We exclude setting the top/bottom dirty height for DECDHL double height rows, - // because DECDHL intentionally cuts off their bottom/top half respectively. - if (row->lineRendition != LineRendition::DoubleHeightTop) - { - row->dirtyBottom = std::max(row->dirtyBottom, static_cast(lround(blackBox.bottom) + 5)); - } - if (row->lineRendition != LineRendition::DoubleHeightBottom) + D2D1_RECT_F bounds{}; + THROW_IF_FAILED(_renderTarget->GetGlyphRunWorldBounds({ 0.0f, baselineY }, &glyphRun, DWRITE_MEASURING_MODE_NATURAL, &bounds)); + + if (bounds.top < bounds.bottom) { - row->dirtyTop = std::min(row->dirtyTop, static_cast(lround(blackBox.top) - 5)); + // If you print the top half of a double height row (DECDHL), the expectation is that only + // the top half is visible, which requires us to keep the clip rect at the bottom of the row. + // (Vice versa for the bottom half of a double height row.) + // + // Since we used SetUnitMode(D2D1_UNIT_MODE_PIXELS), bounds.top/bottom is in pixels already and requires no conversion nor rounding. + if (row->lineRendition != FontRendition::DoubleHeightBottom) + { + row->dirtyTop = std::min(row->dirtyTop, static_cast(lrintf(bounds.top))); + } + if (row->lineRendition != FontRendition::DoubleHeightTop) + { + row->dirtyBottom = std::max(row->dirtyBottom, static_cast(lrintf(bounds.bottom))); + } } } @@ -240,7 +237,7 @@ void BackendD2D::_drawText(RenderingPayload& p) }; _renderTarget->PushAxisAlignedClip(&clipRect, D2D1_ANTIALIAS_MODE_ALIASED); - if (row->lineRendition != LineRendition::SingleWidth) + if (row->lineRendition != FontRendition::SingleWidth) { baselineY = _drawTextPrepareLineRendition(p, baselineY, row->lineRendition); } @@ -281,7 +278,7 @@ void BackendD2D::_drawText(RenderingPayload& p) } } - if (row->lineRendition != LineRendition::SingleWidth) + if (row->lineRendition != FontRendition::SingleWidth) { _drawTextResetLineRendition(); } @@ -298,7 +295,7 @@ void BackendD2D::_drawText(RenderingPayload& p) } } -f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const +f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, FontRendition lineRendition) const noexcept { const auto descender = static_cast(p.s->font->cellSize.y - p.s->font->baseline); D2D1_MATRIX_3X2_F transform{ @@ -306,12 +303,12 @@ f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 bas .m22 = 1.0f, }; - if (lineRendition >= LineRendition::DoubleHeightTop) + if (lineRendition >= FontRendition::DoubleHeightTop) { transform.m22 = 2.0f; transform.dy = -1.0f * (baselineY + descender); - if (lineRendition == LineRendition::DoubleHeightTop) + if (lineRendition == FontRendition::DoubleHeightTop) { const auto delta = static_cast(p.s->font->cellSize.y); baselineY += delta; @@ -323,7 +320,7 @@ f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 bas return baselineY; } -void BackendD2D::_drawTextResetLineRendition() const +void BackendD2D::_drawTextResetLineRendition() const noexcept { static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1, .m22 = 1 }; _renderTarget->SetTransform(&identity); diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index b7a53b41f7b..862b360aa4b 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -19,8 +19,8 @@ namespace Microsoft::Console::Render::Atlas __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p) noexcept; void _drawText(RenderingPayload& p); - f32 _drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const; - void _drawTextResetLineRendition() const; + f32 _drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, FontRendition lineRendition) const noexcept; + void _drawTextResetLineRendition() const noexcept; __declspec(noinline) f32r _getGlyphRunDesignBounds(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index ffec4223252..3be3c3b140c 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -407,17 +407,17 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) { _updateFontDependents(p); } - - if (cellCountChanged) - { - _recreateBackgroundColorBitmap(p.s->cellCount); - } - if (miscChanged) { _recreateCustomShader(p); } + if (cellCountChanged) + { + _recreateBackgroundColorBitmap(p.s->cellCount); + } + // Similar to _renderTargetView above, we might have to recreate the _customRenderTargetView whenever _swapChainManager + // resets it. We only do it after calling _recreateCustomShader however, since that sets the _customPixelShader. if (_customPixelShader && !_customRenderTargetView) { _recreateCustomRenderTargetView(p.s->targetSize); @@ -1040,12 +1040,14 @@ void BackendD3D::_drawText(RenderingPayload& p) { f32 baselineX = 0; const auto baselineY = y * p.s->font->cellSize.y + p.s->font->baseline; - GlyphCacheKey key{ .lineRendition = static_cast(row->lineRendition) }; - const auto lineRenditionScale = static_cast(row->lineRendition != LineRendition::SingleWidth); + const auto lineRenditionScale = static_cast(row->lineRendition != FontRendition::SingleWidth); for (const auto& m : row->mappings) { - key.fontFace = m.fontFace.get(); + GlyphCacheKey key{ + .fontFace = m.fontFace.get(), + .fontRendition = row->lineRendition | m.fontRendition + }; for (auto x = m.glyphsFrom; x < m.glyphsTo; ++x) { @@ -1101,15 +1103,8 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f .glyphIndices = &entry.key.glyphIndex, }; - DWRITE_FONT_METRICS fontMetrics; - glyphRun.fontFace->GetMetrics(&fontMetrics); - - DWRITE_GLYPH_METRICS glyphMetrics; - glyphRun.fontFace->GetDesignGlyphMetrics(glyphRun.glyphIndices, glyphRun.glyphCount, &glyphMetrics, false); - - // This calculates the black box of the glyph, or in other words, it's extents/size relative to its baseline - // origin (at 0,0). The algorithm below is a reverse engineered variant of `IDWriteTextLayout::GetMetrics`. - // The coordinates will be in pixels and the positive direction will be bottom/right. + // This calculates the black box of the glyph, or in other words, + // it's extents/size relative to its baseline origin (at 0,0). // // box.top --------++-----######--+ // (-7) || ############ @@ -1125,16 +1120,11 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f // box.left box.right // (-1) (+14) // - const f32 fontScale = glyphRun.fontEmSize / fontMetrics.designUnitsPerEm; - f32r box{ - static_cast(glyphMetrics.leftSideBearing) * fontScale, - static_cast(glyphMetrics.topSideBearing - glyphMetrics.verticalOriginY) * fontScale, - static_cast(static_cast(glyphMetrics.advanceWidth) - glyphMetrics.rightSideBearing) * fontScale, - static_cast(static_cast(glyphMetrics.advanceHeight) - glyphMetrics.bottomSideBearing - glyphMetrics.verticalOriginY) * fontScale, - }; + D2D1_RECT_F box{}; + THROW_IF_FAILED(_d2dRenderTarget->GetGlyphRunWorldBounds({}, &glyphRun, DWRITE_MEASURING_MODE_NATURAL, &box)); // box may be empty if the glyph is whitespace. - if (box.empty()) + if (box.left >= box.right || box.top >= box.bottom) { // This will indicate to `BackendD3D::_drawText` that this glyph is whitespace. It's important to set this member, // because `GlyphCacheMap` does not zero out inserted entries and `shadingType` might still contain "garbage". @@ -1142,14 +1132,14 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f return; } - const auto isDoubleHeight = static_cast(entry.key.lineRendition) >= LineRendition::DoubleHeightTop; + const auto lineRendition = entry.key.fontRendition & FontRendition::LineRenditionMask; std::optional transform; - if (entry.key.lineRendition) + if (lineRendition != FontRendition::None) { auto& t = transform.emplace(); t.m11 = 2.0f; - t.m22 = isDoubleHeight ? 2.0f : 1.0f; + t.m22 = lineRendition >= FontRendition::DoubleHeightTop ? 2.0f : 1.0f; box.left *= t.m11; box.top *= t.m22; @@ -1157,14 +1147,10 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f box.bottom *= t.m22; } - // To take anti-aliasing of the borders into account, we'll add a 1px padding on all 4 sides. - // This doesn't work however if font hinting causes the pixels to be offset from the design outline. - // We need to use round (and not ceil/floor) to ensure we pixel-snap individual - // glyphs correctly and form a consistent baseline across an entire run of glyphs. - const auto bl = lround(box.left) - 1; - const auto bt = lround(box.top) - 1; - const auto br = lround(box.right) + 1; - const auto bb = lround(box.bottom) + 1; + const auto bl = lrintf(box.left); + const auto bt = lrintf(box.top); + const auto br = lrintf(box.right); + const auto bb = lrintf(box.bottom); stbrp_rect rect{ .w = br - bl, @@ -1177,11 +1163,11 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f _d2dBeginDrawing(); - D2D1_POINT_2F baseline{ + const D2D1_POINT_2F baseline{ static_cast(rect.x - bl), static_cast(rect.y - bt), }; - D2D1_RECT_F clipRect{ + const D2D1_RECT_F clipRect{ static_cast(rect.x), static_cast(rect.y), static_cast(rect.x + rect.w), @@ -1222,7 +1208,7 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1, .m22 = 1 }; _d2dRenderTarget->SetTransform(&identity); - if (isDoubleHeight) + if (lineRendition >= FontRendition::DoubleHeightTop) { _splitDoubleHeightGlyph(p, entry); } @@ -1249,28 +1235,27 @@ void BackendD3D::_drawGlyphRetry(const RenderingPayload& p, stbrp_rect& rect) // and create a second entry in our glyph cache hashmap that contains the other half. void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, GlyphCacheEntry& entry) { - static constexpr auto lrTop = static_cast(LineRendition::DoubleHeightTop); - static constexpr auto lrBottom = static_cast(LineRendition::DoubleHeightBottom); - // Twice the line height, twice the descender gap. For both. entry.data.offset.y -= p.s->font->descender; - const auto isTop = entry.key.lineRendition == lrTop; - const auto altRendition = isTop ? lrBottom : lrTop; - const auto topSize = clamp(-entry.data.offset.y - p.s->font->baseline, 0, static_cast(entry.data.size.y)); + const auto lineRendition = entry.key.fontRendition & FontRendition::LineRenditionMask; + const auto fontRendition = entry.key.fontRendition & FontRendition::FontRenditionMask; + const auto isTop = lineRendition == FontRendition::DoubleHeightTop; + const auto altRendition = isTop ? FontRendition::DoubleHeightBottom : FontRendition::DoubleHeightTop; - bool inserted; auto key2 = entry.key; - key2.lineRendition = altRendition; + key2.fontRendition = altRendition | fontRendition; + + bool inserted = false; auto& entry2 = _glyphCache.FindOrInsert(key2, inserted); entry2.data = entry.data; auto& top = isTop ? entry : entry2; auto& bottom = isTop ? entry2 : entry; + const auto topSize = clamp(-entry.data.offset.y - p.s->font->baseline, 0, static_cast(entry.data.size.y)); top.data.offset.y += p.s->font->cellSize.y; top.data.size.y = topSize; - bottom.data.offset.y += topSize; bottom.data.size.y = std::max(0, bottom.data.size.y - topSize); bottom.data.texcoord.y += topSize; diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index dce80dd67fb..f88bff322df 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -87,14 +87,15 @@ namespace Microsoft::Console::Render::Atlas // holding a reference / the reference count doesn't drop to 0 (see ActiveFaceCache). // This allows us to hash the value of the pointer as if it was uniquely identifying the font face. IDWriteFontFace* fontFace = nullptr; - u16 glyphIndex; - u16 lineRendition; + FontRendition fontRendition = FontRendition::None; + u16 glyphIndex = 0; constexpr bool operator==(const GlyphCacheKey& rhs) const noexcept { return __builtin_memcmp(this, &rhs, GlyphCacheKeyDataSize) == 0; } }; + static_assert(sizeof(GlyphCacheKey) == 2 * sizeof(void*)); // Due to padding on 64-Bit systems, sizeof(GlyphCacheKey) will be 16, // but the actual contents of the struct still only be 12 bytes. diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index eee789072f5..f7f10553790 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -13,44 +13,44 @@ namespace Microsoft::Console::Render::Atlas { #define ATLAS_FLAG_OPS(type, underlying) \ - friend constexpr type operator~(type v) noexcept \ + constexpr type operator~(type v) noexcept \ { \ return static_cast(~static_cast(v)); \ } \ - friend constexpr type operator|(type lhs, type rhs) noexcept \ + constexpr type operator|(type lhs, type rhs) noexcept \ { \ return static_cast(static_cast(lhs) | static_cast(rhs)); \ } \ - friend constexpr type operator&(type lhs, type rhs) noexcept \ + constexpr type operator&(type lhs, type rhs) noexcept \ { \ return static_cast(static_cast(lhs) & static_cast(rhs)); \ } \ - friend constexpr type operator^(type lhs, type rhs) noexcept \ + constexpr type operator^(type lhs, type rhs) noexcept \ { \ return static_cast(static_cast(lhs) ^ static_cast(rhs)); \ } \ - friend constexpr void operator|=(type& lhs, type rhs) noexcept \ + constexpr void operator|=(type& lhs, type rhs) noexcept \ { \ lhs = lhs | rhs; \ } \ - friend constexpr void operator&=(type& lhs, type rhs) noexcept \ + constexpr void operator&=(type& lhs, type rhs) noexcept \ { \ lhs = lhs & rhs; \ } \ - friend constexpr void operator^=(type& lhs, type rhs) noexcept \ + constexpr void operator^=(type& lhs, type rhs) noexcept \ { \ lhs = lhs ^ rhs; \ } -#define ATLAS_POD_OPS(type) \ - constexpr bool operator==(const type& rhs) const noexcept \ - { \ - return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0; \ - } \ - \ - constexpr bool operator!=(const type& rhs) const noexcept \ - { \ - return !(*this == rhs); \ +#define ATLAS_POD_OPS(type) \ + constexpr bool operator==(const type& rhs) const noexcept \ + { \ + return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0; \ + } \ + \ + constexpr bool operator!=(const type& rhs) const noexcept \ + { \ + return !(*this == rhs); \ } template @@ -311,6 +311,10 @@ namespace Microsoft::Console::Render::Atlas u16 thinLineWidth = 0; u16 dpi = 96; u8 antialiasingMode = DefaultAntialiasingMode; + + std::vector softFontPattern; + til::size softFontCellSize; + size_t softFontCenteringHint = 0; }; struct CursorSettings @@ -340,15 +344,29 @@ namespace Microsoft::Console::Render::Atlas u16x2 cellCount; }; - struct FontDependents + enum class FontRelevantAttributes : u8 { - Buffer textFormatAxes[2][2]; + None = 0, + Bold = 0b01, + Italic = 0b10, }; + ATLAS_FLAG_OPS(FontRelevantAttributes, u8) - struct Dependents + // This is u16 so that it fits right into BackendD3D's GlyphCacheKey alignment. + enum class FontRendition : u16 { - FontDependents font; + None = 0, + + LineRenditionMask = 0x00ff, + SingleWidth = LineRendition::SingleWidth, + DoubleWidth = LineRendition::DoubleWidth, + DoubleHeightTop = LineRendition::DoubleHeightTop, + DoubleHeightBottom = LineRendition::DoubleHeightBottom, + + FontRenditionMask = 0xff00, + SoftFont = 0x8000, }; + ATLAS_FLAG_OPS(FontRendition, u16) struct FontMapping { @@ -356,6 +374,7 @@ namespace Microsoft::Console::Render::Atlas f32 fontEmSize = 0; u32 glyphsFrom = 0; u32 glyphsTo = 0; + FontRendition fontRendition = FontRendition::None; }; struct GridLineRange @@ -376,7 +395,7 @@ namespace Microsoft::Console::Render::Atlas glyphOffsets.clear(); colors.clear(); gridLineRanges.clear(); - lineRendition = LineRendition::SingleWidth; + lineRendition = FontRendition::None; selectionFrom = 0; selectionTo = 0; dirtyTop = y * cellHeight; @@ -389,7 +408,7 @@ namespace Microsoft::Console::Render::Atlas std::vector glyphOffsets; // same size as glyphIndices std::vector colors; // same size as glyphIndices std::vector gridLineRanges; - LineRendition lineRendition = LineRendition::SingleWidth; + FontRendition lineRendition = FontRendition::None; u16 selectionFrom = 0; u16 selectionTo = 0; til::CoordType dirtyTop = 0; @@ -414,7 +433,6 @@ namespace Microsoft::Console::Render::Atlas //// Parameters which change seldom. til::generational s; - Dependents d; //// Parameters which change every frame. // This is the backing buffer for `rows`. From 2c06f8b1bb565b459b3a2eee202be30327ce9c85 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 28 Mar 2023 15:11:58 +0200 Subject: [PATCH 14/37] Fix glyph retry crash, Hyperlink hovering, Swap chain startup crash, DECDHL positioning --- src/cascadia/TerminalControl/ControlCore.cpp | 2 +- src/renderer/atlas/AtlasEngine.h | 2 +- src/renderer/atlas/Backend.h | 4 - src/renderer/atlas/BackendD3D.cpp | 129 +++++++++---------- src/renderer/atlas/BackendD3D.h | 5 +- src/renderer/atlas/common.h | 15 ++- src/renderer/base/renderer.cpp | 36 ++++-- src/renderer/base/renderer.hpp | 4 + src/renderer/dx/DxRenderer.cpp | 8 +- src/renderer/dx/DxRenderer.hpp | 1 - 10 files changed, 112 insertions(+), 94 deletions(-) diff --git a/src/cascadia/TerminalControl/ControlCore.cpp b/src/cascadia/TerminalControl/ControlCore.cpp index 2f3d4e44a86..6b11d9cc4ba 100644 --- a/src/cascadia/TerminalControl/ControlCore.cpp +++ b/src/cascadia/TerminalControl/ControlCore.cpp @@ -686,7 +686,7 @@ namespace winrt::Microsoft::Terminal::Control::implementation _lastHoveredId = newId; _lastHoveredInterval = newInterval; - _renderEngine->UpdateHyperlinkHoveredId(newId); + _renderer->UpdateHyperlinkHoveredId(newId); _renderer->UpdateLastHoveredInterval(newInterval); _renderer->TriggerRedrawAll(); } diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index a9d37fed617..ff6b21f7ff7 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -107,7 +107,7 @@ namespace Microsoft::Console::Render::Atlas struct ApiState { - til::generational s; + GenerationalSettings s = DirtyGenerationalSettings(); // This structure is loosely sorted in chunks from "very often accessed together" // to seldom accessed and/or usually not together. diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index fdd1fdc2761..21498fa522b 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -32,10 +32,6 @@ namespace Microsoft::Console::Render::Atlas #define ATLAS_DEBUG_DUMP_RENDER_TARGET 0 #define ATLAS_DEBUG_DUMP_RENDER_TARGET_PATH LR"(%USERPROFILE%\Downloads\AtlasEngine)" - // Draws a background behind each glyph placed into the BackendD3D glyph atlas. - // This can be helpful when debugging bugs in the algorithm that measures the size of glyph. -#define ATLAS_DEBUG_COLORIZE_GLYPH_ATLAS 0 - struct SwapChainManager { void UpdateSwapChainSettings(const RenderingPayload& p, IUnknown* device, auto&& prepareRecreate, auto&& prepareResize) diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 3be3c3b140c..b30de58723a 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -53,6 +53,11 @@ BackendD3D::GlyphCacheMap& BackendD3D::GlyphCacheMap::operator=(GlyphCacheMap&& return *this; } +size_t BackendD3D::GlyphCacheMap::Size() const noexcept +{ + return _size; +} + void BackendD3D::GlyphCacheMap::Clear() noexcept { for (auto& entry : _map) @@ -1052,27 +1057,46 @@ void BackendD3D::_drawText(RenderingPayload& p) for (auto x = m.glyphsFrom; x < m.glyphsTo; ++x) { key.glyphIndex = row->glyphIndices[x]; - bool inserted = false; - auto& entry = _glyphCache.FindOrInsert(key, inserted); - if (inserted) - { - _drawGlyph(p, entry, m.fontEmSize); - } - if (entry.data.shadingType != ShadingType::Default) + // This loop merely exists to allow us to retry rendering a glyph if the glyph atlas was full. + // We need to retry here, because a retry will cause the atlas texture as well as the + // _glyphCache hashmap to be cleared, and so we'll have to call FindOrInsert() again. + for (;;) { - auto l = static_cast(baselineX + row->glyphOffsets[x].advanceOffset + 0.5f) + entry.data.offset.x; - const auto t = static_cast(baselineY - row->glyphOffsets[x].ascenderOffset + 0.5f) + entry.data.offset.y; + bool inserted = false; + auto& entry = _glyphCache.FindOrInsert(key, inserted); - l <<= lineRenditionScale; + if (inserted) + { + if (!_drawGlyph(p, entry, m.fontEmSize)) + { + continue; + } + } + + if (entry.data.shadingType != ShadingType::Default) + { + auto l = static_cast(lrintf(baselineX + row->glyphOffsets[x].advanceOffset)); + auto t = static_cast(lrintf(baselineY - row->glyphOffsets[x].ascenderOffset)); - row->dirtyTop = std::min(row->dirtyTop, t); - row->dirtyBottom = std::max(row->dirtyBottom, t + entry.data.size.y); + l <<= lineRenditionScale; - _appendQuad({ static_cast(l), static_cast(t) }, entry.data.size, entry.data.texcoord, row->colors[x], entry.data.shadingType); - } + l += entry.data.offset.x; + t += entry.data.offset.y; + + row->dirtyTop = std::min(row->dirtyTop, t); + row->dirtyBottom = std::max(row->dirtyBottom, t + entry.data.size.y); + + const i16x2 position{ + static_cast(l), + static_cast(t), + }; + _appendQuad(position, entry.data.size, entry.data.texcoord, row->colors[x], entry.data.shadingType); + } - baselineX += row->glyphAdvances[x]; + baselineX += row->glyphAdvances[x]; + break; + } } } @@ -1094,7 +1118,7 @@ void BackendD3D::_drawText(RenderingPayload& p) _d2dEndDrawing(); } -void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) +bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) { const DWRITE_GLYPH_RUN glyphRun{ .fontFace = entry.key.fontFace, @@ -1103,6 +1127,22 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f .glyphIndices = &entry.key.glyphIndex, }; + const auto lineRendition = entry.key.fontRendition & FontRendition::LineRenditionMask; + + std::optional transform; + if (lineRendition != FontRendition::None) + { + auto& t = transform.emplace(); + t.m11 = 2.0f; + t.m22 = lineRendition >= FontRendition::DoubleHeightTop ? 2.0f : 1.0f; + _d2dRenderTarget->SetTransform(&t); + } + + const auto restoreTransform = wil::scope_exit([&]() { + static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1, .m22 = 1 }; + _d2dRenderTarget->SetTransform(&identity); + }); + // This calculates the black box of the glyph, or in other words, // it's extents/size relative to its baseline origin (at 0,0). // @@ -1129,22 +1169,7 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f // This will indicate to `BackendD3D::_drawText` that this glyph is whitespace. It's important to set this member, // because `GlyphCacheMap` does not zero out inserted entries and `shadingType` might still contain "garbage". entry.data.shadingType = ShadingType::Default; - return; - } - - const auto lineRendition = entry.key.fontRendition & FontRendition::LineRenditionMask; - - std::optional transform; - if (lineRendition != FontRendition::None) - { - auto& t = transform.emplace(); - t.m11 = 2.0f; - t.m22 = lineRendition >= FontRendition::DoubleHeightTop ? 2.0f : 1.0f; - - box.left *= t.m11; - box.top *= t.m22; - box.right *= t.m11; - box.bottom *= t.m22; + return true; } const auto bl = lrintf(box.left); @@ -1158,7 +1183,8 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f }; if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) { - _drawGlyphRetry(p, rect); + _drawGlyphPrepareRetry(p); + return false; } _d2dBeginDrawing(); @@ -1167,23 +1193,6 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f static_cast(rect.x - bl), static_cast(rect.y - bt), }; - const D2D1_RECT_F clipRect{ - static_cast(rect.x), - static_cast(rect.y), - static_cast(rect.x + rect.w), - static_cast(rect.y + rect.h), - }; - _d2dRenderTarget->PushAxisAlignedClip(&clipRect, D2D1_ANTIALIAS_MODE_ALIASED); - -#if ATLAS_DEBUG_COLORIZE_GLYPH_ATLAS - { - const auto d2dColor = colorFromU32(colorbrewer::pastel1[_colorizeGlyphAtlasCounter] | 0x3f000000); - _colorizeGlyphAtlasCounter = (_colorizeGlyphAtlasCounter + 1) % std::size(colorbrewer::pastel1); - wil::com_ptr brush; - THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&d2dColor, nullptr, brush.addressof())); - _d2dRenderTarget->FillRectangle(&clipRect, brush.get()); - } -#endif if (transform) { @@ -1203,30 +1212,20 @@ void BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f entry.data.texcoord.y = rect.y; entry.data.shadingType = colorGlyph ? ShadingType::Passthrough : _textShadingType; - if (transform) + if (lineRendition >= FontRendition::DoubleHeightTop) { - static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1, .m22 = 1 }; - _d2dRenderTarget->SetTransform(&identity); - - if (lineRendition >= FontRendition::DoubleHeightTop) - { - _splitDoubleHeightGlyph(p, entry); - } + _splitDoubleHeightGlyph(p, entry); } - _d2dRenderTarget->PopAxisAlignedClip(); + return true; } -void BackendD3D::_drawGlyphRetry(const RenderingPayload& p, stbrp_rect& rect) +void BackendD3D::_drawGlyphPrepareRetry(const RenderingPayload& p) { + THROW_HR_IF_MSG(E_UNEXPECTED, _glyphCache.Size() == 0, "BackendD3D::_drawGlyph deadlock"); _d2dEndDrawing(); _flushQuads(p); _resetGlyphAtlasAndBeginDraw(p); - - if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) - { - THROW_HR_MSG(E_UNEXPECTED, "BackendD3D::_drawGlyph deadlock"); - } } // If this is a double-height glyph (DECDHL), we need to split it into 2 glyph entries: diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index f88bff322df..265d4e69df3 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -126,6 +126,7 @@ namespace Microsoft::Console::Render::Atlas const GlyphCacheMap& operator=(const GlyphCacheMap&) = delete; GlyphCacheMap& operator=(GlyphCacheMap&& other) noexcept; + size_t Size() const noexcept; void Clear() noexcept; GlyphCacheEntry& FindOrInsert(const GlyphCacheKey& key, bool& inserted); @@ -163,8 +164,8 @@ namespace Microsoft::Console::Render::Atlas __declspec(noinline) void _recreateInstanceBuffers(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p); void _drawText(RenderingPayload& p); - __declspec(noinline) void _drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize); - __declspec(noinline) void _drawGlyphRetry(const RenderingPayload& p, stbrp_rect& rect); + __declspec(noinline) [[nodiscard]] bool _drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize); + void _drawGlyphPrepareRetry(const RenderingPayload& p); void _splitDoubleHeightGlyph(const RenderingPayload& p, GlyphCacheEntry& entry); void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index f7f10553790..dfdcdb7a580 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -344,6 +344,19 @@ namespace Microsoft::Console::Render::Atlas u16x2 cellCount; }; + using GenerationalSettings = til::generational; + + inline GenerationalSettings DirtyGenerationalSettings() noexcept + { + return GenerationalSettings{ + til::generation_t{ 1 }, + til::generational{ til::generation_t{ 1 } }, + til::generational{ til::generation_t{ 1 } }, + til::generational{ til::generation_t{ 1 } }, + til::generational{ til::generation_t{ 1 } }, + }; + } + enum class FontRelevantAttributes : u8 { None = 0, @@ -432,7 +445,7 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr dxgiFactory; //// Parameters which change seldom. - til::generational s; + GenerationalSettings s; //// Parameters which change every frame. // This is the backing buffer for `rows`. diff --git a/src/renderer/base/renderer.cpp b/src/renderer/base/renderer.cpp index ad8a3b3581e..8d59e677735 100644 --- a/src/renderer/base/renderer.cpp +++ b/src/renderer/base/renderer.cpp @@ -985,19 +985,10 @@ void Renderer::_PaintBufferOutputGridLineHelper(_In_ IRenderEngine* const pEngin auto lines = Renderer::s_GetGridlines(textAttribute); // For now, we dash underline patterns and switch to regular underline on hover - // Since we're only rendering pattern links on *hover*, there's no point in checking - // the pattern range if we aren't currently hovering. - if (_hoveredInterval.has_value()) + if (_isHoveredHyperlink(textAttribute) || _isInHoveredInterval(coordTarget)) { - const til::point coordTargetTil{ coordTarget }; - if (_hoveredInterval->start <= coordTargetTil && - coordTargetTil <= _hoveredInterval->stop) - { - if (_pData->GetPatternId(coordTarget).size() > 0) - { - lines.set(GridLines::Underline); - } - } + lines.reset(GridLines::HyperlinkUnderline); + lines.set(GridLines::Underline); } // Return early if there are no lines to paint. @@ -1010,6 +1001,18 @@ void Renderer::_PaintBufferOutputGridLineHelper(_In_ IRenderEngine* const pEngin } } +bool Renderer::_isHoveredHyperlink(const TextAttribute& textAttribute) const noexcept +{ + return _hyperlinkHoveredId && _hyperlinkHoveredId == textAttribute.GetHyperlinkId(); +} + +bool Renderer::_isInHoveredInterval(const til::point coordTarget) const noexcept +{ + return _hoveredInterval && + _hoveredInterval->start <= coordTarget && coordTarget <= _hoveredInterval->stop && + _pData->GetPatternId(coordTarget).size() > 0; +} + // Routine Description: // - Retrieve information about the cursor, and pack it into a CursorOptions // which the render engine can use for painting the cursor. @@ -1348,6 +1351,15 @@ void Renderer::ResetErrorStateAndResume() EnablePainting(); } +void Renderer::UpdateHyperlinkHoveredId(uint16_t id) noexcept +{ + _hyperlinkHoveredId = id; + FOREACH_ENGINE(pEngine) + { + pEngine->UpdateHyperlinkHoveredId(id); + } +} + void Renderer::UpdateLastHoveredInterval(const std::optional& newInterval) { _hoveredInterval = newInterval; diff --git a/src/renderer/base/renderer.hpp b/src/renderer/base/renderer.hpp index e555bfa0e8b..70d3050ec8a 100644 --- a/src/renderer/base/renderer.hpp +++ b/src/renderer/base/renderer.hpp @@ -88,6 +88,7 @@ namespace Microsoft::Console::Render void SetRendererEnteredErrorStateCallback(std::function pfn); void ResetErrorStateAndResume(); + void UpdateHyperlinkHoveredId(uint16_t id) noexcept; void UpdateLastHoveredInterval(const std::optional::interval>& newInterval); private: @@ -100,6 +101,7 @@ namespace Microsoft::Console::Render void _PaintBufferOutput(_In_ IRenderEngine* const pEngine); void _PaintBufferOutputHelper(_In_ IRenderEngine* const pEngine, TextBufferCellIterator it, const til::point target, const bool lineWrapped); void _PaintBufferOutputGridLineHelper(_In_ IRenderEngine* const pEngine, const TextAttribute textAttribute, const size_t cchLine, const til::point coordTarget); + bool _isHoveredHyperlink(const TextAttribute& textAttribute) const noexcept; void _PaintSelection(_In_ IRenderEngine* const pEngine); void _PaintCursor(_In_ IRenderEngine* const pEngine); void _PaintOverlays(_In_ IRenderEngine* const pEngine); @@ -109,6 +111,7 @@ namespace Microsoft::Console::Render std::vector _GetSelectionRects() const; void _ScrollPreviousSelection(const til::point delta); [[nodiscard]] HRESULT _PaintTitle(IRenderEngine* const pEngine); + bool _isInHoveredInterval(til::point coordTarget) const noexcept; [[nodiscard]] std::optional _GetCursorInfo(); [[nodiscard]] HRESULT _PrepareRenderInfo(_In_ IRenderEngine* const pEngine); @@ -118,6 +121,7 @@ namespace Microsoft::Console::Render std::unique_ptr _pThread; static constexpr size_t _firstSoftFontChar = 0xEF20; size_t _lastSoftFontChar = 0; + uint16_t _hyperlinkHoveredId = 0; std::optional::interval> _hoveredInterval; Microsoft::Console::Types::Viewport _viewport; std::vector _clusterBuffer; diff --git a/src/renderer/dx/DxRenderer.cpp b/src/renderer/dx/DxRenderer.cpp index 70e6e01806e..f74bf528dc1 100644 --- a/src/renderer/dx/DxRenderer.cpp +++ b/src/renderer/dx/DxRenderer.cpp @@ -821,7 +821,6 @@ static constexpr D2D1_ALPHA_MODE _dxgiAlphaToD2d1Alpha(DXGI_ALPHA_MODE mode) noe // 1234123412341234 static constexpr std::array hyperlinkDashes{ 1.f, 3.f }; RETURN_IF_FAILED(_d2dFactory->CreateStrokeStyle(&_dashStrokeStyleProperties, hyperlinkDashes.data(), gsl::narrow_cast(hyperlinkDashes.size()), &_dashStrokeStyle)); - _hyperlinkStrokeStyle = _dashStrokeStyle; // If in composition mode, apply scaling factor matrix if (_chainMode == SwapChainMode::ForComposition) @@ -1723,7 +1722,7 @@ try }; const auto DrawHyperlinkLine = [=](const auto x0, const auto y0, const auto x1, const auto y1, const auto strokeWidth) noexcept { - _d2dDeviceContext->DrawLine({ x0, y0 }, { x1, y1 }, _d2dBrushForeground.Get(), strokeWidth, _hyperlinkStrokeStyle.Get()); + _d2dDeviceContext->DrawLine({ x0, y0 }, { x1, y1 }, _d2dBrushForeground.Get(), strokeWidth, _dashStrokeStyle.Get()); }; // NOTE: Line coordinates are centered within the line, so they need to be @@ -1980,11 +1979,6 @@ try _drawingContext->useItalicFont = textAttributes.IsItalic(); } - if (textAttributes.IsHyperlink()) - { - _hyperlinkStrokeStyle = (textAttributes.GetHyperlinkId() == _hyperlinkHoveredId) ? _strokeStyle : _dashStrokeStyle; - } - // Update pixel shader settings as background color might have changed _ComputePixelShaderSettings(); diff --git a/src/renderer/dx/DxRenderer.hpp b/src/renderer/dx/DxRenderer.hpp index 9dc9fdff741..bfb11205a05 100644 --- a/src/renderer/dx/DxRenderer.hpp +++ b/src/renderer/dx/DxRenderer.hpp @@ -203,7 +203,6 @@ namespace Microsoft::Console::Render ::Microsoft::WRL::ComPtr _customRenderer; ::Microsoft::WRL::ComPtr _strokeStyle; ::Microsoft::WRL::ComPtr _dashStrokeStyle; - ::Microsoft::WRL::ComPtr _hyperlinkStrokeStyle; std::unique_ptr _fontRenderData; DxSoftFont _softFont; From c32bfec5bce4e14e49dfc83f120b061f511f1e6a Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 28 Mar 2023 15:17:12 +0200 Subject: [PATCH 15/37] Silence spell check --- .github/actions/spelling/excludes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/actions/spelling/excludes.txt b/.github/actions/spelling/excludes.txt index bc509a5669e..3f6e31b1072 100644 --- a/.github/actions/spelling/excludes.txt +++ b/.github/actions/spelling/excludes.txt @@ -109,6 +109,7 @@ ^src/tools/integrity/packageuwp/ConsoleUWP\.appxSources$ ^src/tools/lnkd/lnkd\.bat$ ^src/tools/pixels/pixels\.bat$ +^src/tools/RenderingTests/main.cpp$ ^src/tools/texttests/fira\.txt$ ^src/tools/U8U16Test/(?:fr|ru|zh)\.txt$ ^src/types/ut_types/UtilsTests.cpp$ From f068688ca38ea8f75e83888bd231aa5b528de222 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 28 Mar 2023 23:43:58 +0200 Subject: [PATCH 16/37] Simplify dxgi adapter invalidation, Fix dirty rect on backend recreation --- src/renderer/atlas/AtlasEngine.cpp | 5 +- src/renderer/atlas/AtlasEngine.h | 2 +- src/renderer/atlas/AtlasEngine.r.cpp | 24 +++++----- src/renderer/atlas/Backend.cpp | 71 ++++++++++++++++------------ src/renderer/atlas/Backend.h | 11 +++-- src/renderer/atlas/BackendD3D.cpp | 2 +- src/renderer/atlas/common.h | 3 +- 7 files changed, 68 insertions(+), 50 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index dbfb9bf2eb7..cb636225e39 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -464,6 +464,9 @@ void AtlasEngine::_handleSettingsUpdate() _recreateCellCountDependentResources(); } + // !!! NOTE !!! + // This will indirectly mark the entire viewport as dirty, but AtlasEngine::_recreateBackend() + // may do the same, so make sure that that function stays in sync with what this code does. _api.invalidatedRows = invalidatedRowsAll; u16 y = 0; @@ -829,7 +832,7 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) { } } - + const auto initialIndicesCount = row.glyphIndices.size(); row.mappings.emplace_back(_api.replacementCharacterFontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(row.glyphIndices.size()), FontRendition::SoftFont); } diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index ff6b21f7ff7..5a53fcc0723 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -23,7 +23,6 @@ namespace Microsoft::Console::Render::Atlas [[nodiscard]] HRESULT EndPaint() noexcept override; [[nodiscard]] bool RequiresContinuousRedraw() noexcept override; void WaitUntilCanRender() noexcept override; - void _recreateBackend(); [[nodiscard]] HRESULT Present() noexcept override; [[nodiscard]] HRESULT PrepareForTeardown(_Out_ bool* pForcePaint) noexcept override; [[nodiscard]] HRESULT ScrollFrame() noexcept override; @@ -93,6 +92,7 @@ namespace Microsoft::Console::Render::Atlas void _resolveFontMetrics(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, FontSettings* fontMetrics = nullptr) const; // AtlasEngine.r.cpp + void _recreateBackend(); static constexpr u16 u16min = 0x0000; static constexpr u16 u16max = 0xffff; diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index ecfc0a459c7..a17680e9ebf 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -32,23 +32,16 @@ using namespace Microsoft::Console::Render::Atlas; [[nodiscard]] HRESULT AtlasEngine::Present() noexcept try { - if (!_p.dirtyRectInPx) - { - return S_OK; - } - - if (_p.dxgiFactory && !_p.dxgiFactory->IsCurrent()) + if (!_b) { - _p.dxgiFactory.reset(); - _b.reset(); + _recreateBackend(); } - if (!_b) + if (_p.dirtyRectInPx) { - _recreateBackend(); + _b->Render(_p); } - _b->Render(_p); return S_OK; } catch (const wil::ResultException& exception) @@ -228,4 +221,13 @@ void AtlasEngine::_recreateBackend() { _b = std::make_unique(std::move(device), std::move(deviceContext)); } + + // !!! NOTE !!! + // Normally the viewport is indirectly marked as dirty by `AtlasEngine::_handleSettingsUpdate()` whenever + // the settings change, but the `!_p.dxgiFactory->IsCurrent()` check is not part of the settings change + // flow and so we have to manually recreate how AtlasEngine.cpp marks viewports as dirty here. + // This ensures that the backends redraw their entire viewports whenever a new swap chain is created. + _p.dirtyRectInPx = { 0, 0, _p.s->targetSize.x, _p.s->targetSize.y }; + _p.invalidatedRows = { 0, _p.s->cellCount.y }; + _p.scrollOffset = 0; } diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index 24a511e23a8..ea93b7c4ea8 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -87,35 +87,36 @@ void SwapChainManager::_createSwapChain(const RenderingPayload& p, IUnknown* dev _swapChain.reset(); _frameLatencyWaitableObject.reset(); - DXGI_SWAP_CHAIN_DESC1 desc{}; - desc.Width = p.s->targetSize.x; - desc.Height = p.s->targetSize.y; - desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; - desc.SampleDesc.Count = 1; - desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; - // Sometimes up to 2 buffers are locked, for instance during screen capture or when moving the window. - // 3 buffers seems to guarantee a stable framerate at display frequency at all times. - desc.BufferCount = 3; - desc.Scaling = DXGI_SCALING_NONE; - // DXGI_SWAP_EFFECT_FLIP_DISCARD is a mode that was created at a time were display drivers - // lacked support for Multiplane Overlays (MPO) and were copying buffers was expensive. - // This allowed DWM to quickly draw overlays (like gamebars) on top of rendered content. - // With faster GPU memory in general and with support for MPO in particular this isn't - // really an advantage anymore. Instead DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL allows for a - // more "intelligent" composition and display updates to occur like Panel Self Refresh - // (PSR) which requires dirty rectangles (Present1 API) to work correctly. - desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; - // If our background is opaque we can enable "independent" flips by setting DXGI_ALPHA_MODE_IGNORE. - // As our swap chain won't have to compose with DWM anymore it reduces the display latency dramatically. - desc.AlphaMode = p.s->target->enableTransparentBackground ? DXGI_ALPHA_MODE_PREMULTIPLIED : DXGI_ALPHA_MODE_IGNORE; - desc.Flags = flags; - - wil::com_ptr swapChain0; + DXGI_SWAP_CHAIN_DESC1 desc{ + .Width = p.s->targetSize.x, + .Height = p.s->targetSize.y, + .Format = DXGI_FORMAT_B8G8R8A8_UNORM, + .SampleDesc = { .Count = 1 }, + .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT, + // Sometimes up to 2 buffers are locked, for instance during screen capture or when moving the window. + // 3 buffers seems to guarantee a stable framerate at display frequency at all times. + .BufferCount = 3, + .Scaling = DXGI_SCALING_NONE, + // DXGI_SWAP_EFFECT_FLIP_DISCARD is a mode that was created at a time were display drivers + // lacked support for Multiplane Overlays (MPO) and were copying buffers was expensive. + // This allowed DWM to quickly draw overlays (like gamebars) on top of rendered content. + // With faster GPU memory in general and with support for MPO in particular this isn't + // really an advantage anymore. Instead DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL allows for a + // more "intelligent" composition and display updates to occur like Panel Self Refresh + // (PSR) which requires dirty rectangles (Present1 API) to work correctly. + .SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL, + // If our background is opaque we can enable "independent" flips by setting DXGI_ALPHA_MODE_IGNORE. + // As our swap chain won't have to compose with DWM anymore it reduces the display latency dramatically. + .AlphaMode = p.s->target->enableTransparentBackground ? DXGI_ALPHA_MODE_PREMULTIPLIED : DXGI_ALPHA_MODE_IGNORE, + .Flags = flags, + }; + + wil::com_ptr swapChain1; if (p.s->target->hwnd) { desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; - THROW_IF_FAILED(p.dxgiFactory->CreateSwapChainForHwnd(device, p.s->target->hwnd, &desc, nullptr, nullptr, swapChain0.addressof())); + THROW_IF_FAILED(p.dxgiFactory->CreateSwapChainForHwnd(device, p.s->target->hwnd, &desc, nullptr, nullptr, swapChain1.addressof())); } else { @@ -126,12 +127,13 @@ void SwapChainManager::_createSwapChain(const RenderingPayload& p, IUnknown* dev // As per: https://docs.microsoft.com/en-us/windows/win32/api/dcomp/nf-dcomp-dcompositioncreatesurfacehandle static constexpr DWORD COMPOSITIONSURFACE_ALL_ACCESS = 0x0003L; THROW_IF_FAILED(DCompositionCreateSurfaceHandle(COMPOSITIONSURFACE_ALL_ACCESS, nullptr, _swapChainHandle.addressof())); - THROW_IF_FAILED(p.dxgiFactory.query()->CreateSwapChainForCompositionSurfaceHandle(device, _swapChainHandle.get(), &desc, nullptr, swapChain0.addressof())); + THROW_IF_FAILED(p.dxgiFactory.query()->CreateSwapChainForCompositionSurfaceHandle(device, _swapChainHandle.get(), &desc, nullptr, swapChain1.addressof())); } - _swapChain = swapChain0.query(); + _swapChain = swapChain1.query(); _frameLatencyWaitableObject.reset(_swapChain->GetFrameLatencyWaitableObject()); _targetGeneration = p.s->target.generation(); + _fontGeneration = {}; _targetSize = p.s->targetSize; _waitForPresentation = true; @@ -147,18 +149,25 @@ void SwapChainManager::_createSwapChain(const RenderingPayload& p, IUnknown* dev } } -void SwapChainManager::_updateMatrixTransform(const RenderingPayload& p) const +void SwapChainManager::_resizeBuffers(const RenderingPayload& p) { - // XAML's SwapChainPanel combines the worst of both worlds and always applies a transform to - // the swap chain to make it match the display scale. This if condition undoes the damage. - if (_fontGeneration != p.s->font.generation() && !p.s->target->hwnd) + THROW_IF_FAILED(_swapChain->ResizeBuffers(0, p.s->targetSize.x, p.s->targetSize.y, DXGI_FORMAT_UNKNOWN, flags)); + _targetSize = p.s->targetSize; +} + +void SwapChainManager::_updateMatrixTransform(const RenderingPayload& p) +{ + if (!p.s->target->hwnd) { + // XAML's SwapChainPanel combines the worst of both worlds and always applies a transform + // to the swap chain to make it match the display scale. This undoes the damage. const DXGI_MATRIX_3X2_F matrix{ ._11 = static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(p.s->font->dpi), ._22 = static_cast(USER_DEFAULT_SCREEN_DPI) / static_cast(p.s->font->dpi), }; THROW_IF_FAILED(_swapChain->SetMatrixTransform(&matrix)); } + _fontGeneration = p.s->font.generation(); } // Draws a `DWRITE_GLYPH_RUN` at `baselineOrigin` into the given `ID2D1DeviceContext`. diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 21498fa522b..e5a0cd3d2c1 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -47,11 +47,13 @@ namespace Microsoft::Console::Render::Atlas else if (_targetSize != p.s->targetSize) { prepareResize(); - THROW_IF_FAILED(_swapChain->ResizeBuffers(0, p.s->targetSize.x, p.s->targetSize.y, DXGI_FORMAT_UNKNOWN, flags)); - _targetSize = p.s->targetSize; + _resizeBuffers(p); } - _updateMatrixTransform(p); + if (_fontGeneration != p.s->font.generation()) + { + _updateMatrixTransform(p); + } } wil::com_ptr GetBuffer() const; @@ -60,7 +62,8 @@ namespace Microsoft::Console::Render::Atlas private: void _createSwapChain(const RenderingPayload& p, IUnknown* device); - void _updateMatrixTransform(const RenderingPayload& p) const; + void _resizeBuffers(const RenderingPayload& p); + void _updateMatrixTransform(const RenderingPayload& p); static constexpr DXGI_SWAP_CHAIN_FLAG flags = ATLAS_DEBUG_DISABLE_FRAME_LATENCY_WAITABLE_OBJECT ? DXGI_SWAP_CHAIN_FLAG{} : DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index b30de58723a..755bef29f5d 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -1138,7 +1138,7 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f _d2dRenderTarget->SetTransform(&t); } - const auto restoreTransform = wil::scope_exit([&]() { + const auto restoreTransform = wil::scope_exit([&]() noexcept { static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1, .m22 = 1 }; _d2dRenderTarget->SetTransform(&identity); }); diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index dfdcdb7a580..33365042969 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -457,9 +457,10 @@ namespace Microsoft::Console::Render::Atlas // 1 ensures that the backends redraw the background, even if the background is // entirely black, just like `backgroundBitmap` is all back after it gets created. til::generation_t backgroundBitmapGeneration{ 1 }; + u16r cursorRect; + til::rect dirtyRectInPx; u16x2 invalidatedRows; - u16r cursorRect; i16 scrollOffset = 0; }; From f95d435fb7aad0b178c8d3fec0ce25271315272e Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 30 Mar 2023 19:25:21 +0200 Subject: [PATCH 17/37] DWM folks said to test for IsCurrent(), Added basic soft font support --- src/renderer/atlas/AtlasEngine.cpp | 106 ++++++++++++------ src/renderer/atlas/AtlasEngine.h | 1 + src/renderer/atlas/AtlasEngine.r.cpp | 94 +++++++++------- src/renderer/atlas/Backend.cpp | 4 +- src/renderer/atlas/BackendD2D.cpp | 26 +++-- src/renderer/atlas/BackendD2D.h | 2 +- src/renderer/atlas/BackendD3D.cpp | 157 +++++++++++++++++++++------ src/renderer/atlas/BackendD3D.h | 27 +++-- src/renderer/atlas/common.h | 144 +++++++++++++++++++++--- 9 files changed, 411 insertions(+), 150 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index cb636225e39..6f7e9c7e818 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -132,11 +132,10 @@ try // Scrolling the background bitmap is a lot easier because we can rely on memmove which works // with both forwards and backwards copying. It's a mystery why the STL doesn't have this. { - const auto width = _p.s->cellCount.x; const auto beg = _p.backgroundBitmap.begin(); const auto end = _p.backgroundBitmap.end(); - const auto src = beg - std::min(0, offset) * width; - const auto dst = beg + std::max(0, offset) * width; + const auto src = beg - std::min(0, offset) * _p.backgroundBitmapStride; + const auto dst = beg + std::max(0, offset) * _p.backgroundBitmapStride; const auto count = end - std::max(src, dst); assert(dst >= beg && dst + count <= end); assert(src >= beg && src + count <= end); @@ -205,6 +204,10 @@ try } } +#if ATLAS_DEBUG_CONTINUOUS_REDRAW + _p.MarkAllAsDirt(); +#endif + return S_OK; } CATCH_RETURN() @@ -246,7 +249,7 @@ CATCH_RETURN() [[nodiscard]] HRESULT AtlasEngine::PrepareLineTransform(const LineRendition lineRendition, const til::CoordType targetRow, const til::CoordType viewportLeft) noexcept { const auto y = gsl::narrow_cast(clamp(targetRow, 0, _p.s->cellCount.y)); - _p.rows[y]->lineRendition = static_cast(lineRendition); + _p.rows[y]->lineRendition = lineRendition; _api.lineRendition = lineRendition; return S_OK; } @@ -299,7 +302,7 @@ try { const auto shift = _api.lineRendition >= LineRendition::DoubleWidth ? 1 : 0; - const auto backgroundRow = _p.backgroundBitmap.begin() + static_cast(y) * _p.s->cellCount.x; + const auto backgroundRow = _p.backgroundBitmap.begin() + _p.backgroundBitmapStride * y; auto it = backgroundRow + x; const auto end = backgroundRow + (static_cast(column) << shift); const auto bg = _api.currentColor.y; @@ -453,7 +456,9 @@ void AtlasEngine::_handleSettingsUpdate() if (targetChanged) { - _b.reset(); + // target->useSoftwareRendering affects the selection of our IDXGIAdapter which requires us to reset _p.dxgi. + // This will indirectly also recreate the backend, when AtlasEngine::_recreateAdapter() detects this change. + _p.dxgi = {}; } if (fontChanged) { @@ -539,7 +544,14 @@ void AtlasEngine::_recreateCellCountDependentResources() _p.unorderedRows = Buffer(_p.s->cellCount.y); _p.rowsScratch = Buffer(_p.s->cellCount.y); _p.rows = Buffer(_p.s->cellCount.y); - _p.backgroundBitmap = Buffer(static_cast(_p.s->cellCount.x) * _p.s->cellCount.y); + + // Our render loop heavily relies on memcpy() which is up to between 1.5x (Intel) + // and 40x (AMD) faster for allocations with an alignment of 32 or greater. + // backgroundBitmapStride is a "count" of u32 and not in bytes, + // so we round up to multiple of 8 because 8 * sizeof(u32) == 32. + _p.backgroundBitmapStride = (static_cast(_p.s->cellCount.x) + 7) & ~7; + _p.backgroundBitmap = Buffer(_p.backgroundBitmapStride * _p.s->cellCount.y); + memset(_p.backgroundBitmap.data(), 0, _p.backgroundBitmap.size() * sizeof(u32)); auto it = _p.unorderedRows.data(); for (auto& r : _p.rows) @@ -821,23 +833,6 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) { - // TODO soft fonts -#if 0 - if (!_p.s->font->softFontPattern.empty()) - { - for (u32 i = from; i < to; ++i) - { - const auto ch = _api.bufferLine[from]; - if (ch >= 0xEF20 && ch < 0xEF80) - { - } - } - - const auto initialIndicesCount = row.glyphIndices.size(); - row.mappings.emplace_back(_api.replacementCharacterFontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(row.glyphIndices.size()), FontRendition::SoftFont); - } -#endif - if (!_api.replacementCharacterLookedUp) { bool succeeded = false; @@ -861,16 +856,59 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) _api.replacementCharacterLookedUp = true; } - if (_api.replacementCharacterFontFace) + static constexpr auto isSoftFontChar = [](wchar_t ch) noexcept { + return ch >= 0xEF20 && ch < 0xEF80; + }; + + auto pos1 = from; + auto pos2 = pos1; + auto col1 = _api.bufferLineColumn[from]; + auto col2 = col1; + auto initialIndicesCount = row.glyphIndices.size(); + const auto softFontAvailable = !_p.s->font->softFontPattern.empty(); + auto currentlyMappingSoftFont = isSoftFontChar(_api.bufferLine[pos1]); + + while (pos2 < to) { - const auto initialIndicesCount = row.glyphIndices.size(); - const auto col0 = _api.bufferLineColumn[from]; - const auto col1 = _api.bufferLineColumn[to]; - const auto cols = gsl::narrow_cast(col1 - col0); - row.glyphIndices.insert(row.glyphIndices.end(), cols, _api.replacementCharacterGlyphIndex); - row.glyphAdvances.insert(row.glyphAdvances.end(), cols, _p.s->font->cellSize.x); - row.glyphOffsets.insert(row.glyphOffsets.end(), cols, DWRITE_GLYPH_OFFSET{}); - row.colors.insert(row.colors.end(), _api.colorsForeground.begin() + col0, _api.colorsForeground.begin() + col1); - row.mappings.emplace_back(_api.replacementCharacterFontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(row.glyphIndices.size())); + col2 = _api.bufferLineColumn[++pos2]; + if (col1 == col2) + { + continue; + } + + const auto cols = col2 - col1; + const auto ch = static_cast(_api.bufferLine[pos1]); + const auto nowMappingSoftFont = isSoftFontChar(ch); + + row.glyphIndices.emplace_back(nowMappingSoftFont ? ch : _api.replacementCharacterGlyphIndex); + row.glyphAdvances.emplace_back(static_cast(cols * _p.s->font->cellSize.x)); + row.glyphOffsets.emplace_back(DWRITE_GLYPH_OFFSET{}); + row.colors.emplace_back(_api.colorsForeground[col1]); + + if (currentlyMappingSoftFont != nowMappingSoftFont) + { + const auto indicesCount = row.glyphIndices.size(); + const auto fontFace = currentlyMappingSoftFont && softFontAvailable ? IDWriteFontFace_SoftFont : _api.replacementCharacterFontFace.get(); + + if (indicesCount > initialIndicesCount && fontFace) + { + row.mappings.emplace_back(fontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); + initialIndicesCount = indicesCount; + } + } + + pos1 = pos2; + col1 = col2; + currentlyMappingSoftFont = nowMappingSoftFont; + } + + { + const auto indicesCount = row.glyphIndices.size(); + const auto fontFace = currentlyMappingSoftFont && softFontAvailable ? IDWriteFontFace_SoftFont : _api.replacementCharacterFontFace.get(); + + if (indicesCount > initialIndicesCount && fontFace) + { + row.mappings.emplace_back(fontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); + } } } diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 5a53fcc0723..68c1870393c 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -92,6 +92,7 @@ namespace Microsoft::Console::Render::Atlas void _resolveFontMetrics(const wchar_t* faceName, const FontInfoDesired& fontInfoDesired, FontInfo& fontInfo, FontSettings* fontMetrics = nullptr) const; // AtlasEngine.r.cpp + void _recreateAdapter(); void _recreateBackend(); static constexpr u16 u16min = 0x0000; diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index a17680e9ebf..bfa6889cd6d 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -32,6 +32,11 @@ using namespace Microsoft::Console::Render::Atlas; [[nodiscard]] HRESULT AtlasEngine::Present() noexcept try { + if (!_p.dxgi.adapter || !_p.dxgi.factory->IsCurrent()) + { + _recreateAdapter(); + } + if (!_b) { _recreateBackend(); @@ -47,9 +52,14 @@ try catch (const wil::ResultException& exception) { const auto hr = exception.GetErrorCode(); - const auto isExpected = hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET || hr == D2DERR_RECREATE_TARGET; - if (!isExpected && _p.warningCallback) + if (hr == DXGI_ERROR_DEVICE_REMOVED || hr == DXGI_ERROR_DEVICE_RESET) + { + _p.dxgi = {}; + return E_PENDING; + } + + if (_p.warningCallback) { try { @@ -58,9 +68,8 @@ catch (const wil::ResultException& exception) CATCH_LOG() } - _p.dxgiFactory.reset(); _b.reset(); - return isExpected ? E_PENDING : hr; + return hr; } CATCH_RETURN() @@ -83,7 +92,7 @@ void AtlasEngine::WaitUntilCanRender() noexcept #pragma endregion -void AtlasEngine::_recreateBackend() +void AtlasEngine::_recreateAdapter() { #ifndef NDEBUG if (IsDebuggerPresent()) @@ -101,16 +110,6 @@ void AtlasEngine::_recreateBackend() } #endif - // Tell the OS that we're resilient to graphics device removal. Docs say: - // > This function should be called once per process and before any device creation. - if (const auto module = GetModuleHandleW(L"dxgi.dll")) - { - if (const auto func = GetProcAddressByFunctionDeclaration(module, DXGIDeclareAdapterRemovalSupport)) - { - LOG_IF_FAILED(func()); - } - } - #ifndef NDEBUG static constexpr UINT flags = DXGI_CREATE_FACTORY_DEBUG; #else @@ -118,31 +117,19 @@ void AtlasEngine::_recreateBackend() #endif // IID_PPV_ARGS doesn't work here for some reason. - THROW_IF_FAILED(CreateDXGIFactory2(flags, __uuidof(_p.dxgiFactory), _p.dxgiFactory.put_void())); + THROW_IF_FAILED(CreateDXGIFactory2(flags, __uuidof(_p.dxgi.factory), _p.dxgi.factory.put_void())); - auto d2dMode = ATLAS_DEBUG_FORCE_D2D_MODE; - auto deviceFlags = D3D11_CREATE_DEVICE_SINGLETHREADED -#ifndef NDEBUG - | D3D11_CREATE_DEVICE_DEBUG -#endif - // This flag prevents the driver from creating a large thread pool for things like shader computations - // that would be advantageous for games. For us this has only a minimal performance benefit, - // but comes with a large memory usage overhead. At the time of writing the Nvidia - // driver launches $cpu_thread_count more worker threads without this flag. - | D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS - // Direct2D support. - | D3D11_CREATE_DEVICE_BGRA_SUPPORT; + wil::com_ptr adapter; + DXGI_ADAPTER_DESC1 desc{}; - wil::com_ptr dxgiAdapter; { const auto useSoftwareRendering = _p.s->target->useSoftwareRendering; - DXGI_ADAPTER_DESC1 desc{}; UINT index = 0; do { - THROW_IF_FAILED(_p.dxgiFactory->EnumAdapters1(index++, dxgiAdapter.put())); - THROW_IF_FAILED(dxgiAdapter->GetDesc1(&desc)); + THROW_IF_FAILED(_p.dxgi.factory->EnumAdapters1(index++, adapter.put())); + THROW_IF_FAILED(adapter->GetDesc1(&desc)); // If useSoftwareRendering is false we exit during the first iteration. Using the default adapter (index 0) // is the right thing to do under most circumstances, unless you _really_ want to get your hands dirty. @@ -151,12 +138,37 @@ void AtlasEngine::_recreateBackend() // // If useSoftwareRendering is true we search until we find the first WARP adapter (usually the last adapter). } while (useSoftwareRendering && WI_IsFlagClear(desc.Flags, DXGI_ADAPTER_FLAG_SOFTWARE)); + } - if (WI_IsFlagSet(desc.Flags, DXGI_ADAPTER_FLAG_SOFTWARE)) - { - WI_ClearFlag(deviceFlags, D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS); - d2dMode = true; - } + if (memcmp(&_p.dxgi.adapterLuid, &desc.AdapterLuid, sizeof(LUID)) != 0) + { + _p.dxgi.adapter = std::move(adapter); + _p.dxgi.adapterLuid = desc.AdapterLuid; + _p.dxgi.adapterFlags = desc.Flags; + _b.reset(); + } +} + +void AtlasEngine::_recreateBackend() +{ + auto d2dMode = ATLAS_DEBUG_FORCE_D2D_MODE; + auto deviceFlags = + D3D11_CREATE_DEVICE_SINGLETHREADED +#ifndef NDEBUG + | D3D11_CREATE_DEVICE_DEBUG +#endif + // This flag prevents the driver from creating a large thread pool for things like shader computations + // that would be advantageous for games. For us this has only a minimal performance benefit, + // but comes with a large memory usage overhead. At the time of writing the Nvidia + // driver launches $cpu_thread_count more worker threads without this flag. + | D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS + // Direct2D support. + | D3D11_CREATE_DEVICE_BGRA_SUPPORT; + + if (WI_IsFlagSet(_p.dxgi.adapterFlags, DXGI_ADAPTER_FLAG_SOFTWARE)) + { + WI_ClearFlag(deviceFlags, D3D11_CREATE_DEVICE_PREVENT_INTERNAL_THREADING_OPTIMIZATIONS); + d2dMode = true; } wil::com_ptr device0; @@ -174,7 +186,7 @@ void AtlasEngine::_recreateBackend() }; THROW_IF_FAILED(D3D11CreateDevice( - /* pAdapter */ dxgiAdapter.get(), + /* pAdapter */ _p.dxgi.adapter.get(), /* DriverType */ D3D_DRIVER_TYPE_UNKNOWN, /* Software */ nullptr, /* Flags */ deviceFlags, @@ -224,10 +236,8 @@ void AtlasEngine::_recreateBackend() // !!! NOTE !!! // Normally the viewport is indirectly marked as dirty by `AtlasEngine::_handleSettingsUpdate()` whenever - // the settings change, but the `!_p.dxgiFactory->IsCurrent()` check is not part of the settings change + // the settings change, but the `!_p.dxgi.factory->IsCurrent()` check is not part of the settings change // flow and so we have to manually recreate how AtlasEngine.cpp marks viewports as dirty here. // This ensures that the backends redraw their entire viewports whenever a new swap chain is created. - _p.dirtyRectInPx = { 0, 0, _p.s->targetSize.x, _p.s->targetSize.y }; - _p.invalidatedRows = { 0, _p.s->cellCount.y }; - _p.scrollOffset = 0; + _p.MarkAllAsDirty(); } diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index ea93b7c4ea8..8966b9dbdd2 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -116,7 +116,7 @@ void SwapChainManager::_createSwapChain(const RenderingPayload& p, IUnknown* dev if (p.s->target->hwnd) { desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE; - THROW_IF_FAILED(p.dxgiFactory->CreateSwapChainForHwnd(device, p.s->target->hwnd, &desc, nullptr, nullptr, swapChain1.addressof())); + THROW_IF_FAILED(p.dxgi.factory->CreateSwapChainForHwnd(device, p.s->target->hwnd, &desc, nullptr, nullptr, swapChain1.addressof())); } else { @@ -127,7 +127,7 @@ void SwapChainManager::_createSwapChain(const RenderingPayload& p, IUnknown* dev // As per: https://docs.microsoft.com/en-us/windows/win32/api/dcomp/nf-dcomp-dcompositioncreatesurfacehandle static constexpr DWORD COMPOSITIONSURFACE_ALL_ACCESS = 0x0003L; THROW_IF_FAILED(DCompositionCreateSurfaceHandle(COMPOSITIONSURFACE_ALL_ACCESS, nullptr, _swapChainHandle.addressof())); - THROW_IF_FAILED(p.dxgiFactory.query()->CreateSwapChainForCompositionSurfaceHandle(device, _swapChainHandle.get(), &desc, nullptr, swapChain1.addressof())); + THROW_IF_FAILED(p.dxgi.factory.query()->CreateSwapChainForCompositionSurfaceHandle(device, _swapChainHandle.get(), &desc, nullptr, swapChain1.addressof())); } _swapChain = swapChain1.query(); diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 888c3c1bed9..052486c66e7 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -161,7 +161,7 @@ void BackendD2D::_drawBackground(const RenderingPayload& p) noexcept { if (_backgroundBitmapGeneration != p.backgroundBitmapGeneration) { - _backgroundBitmap->CopyFromMemory(nullptr, p.backgroundBitmap.data(), p.s->cellCount.x * 4); + _backgroundBitmap->CopyFromMemory(nullptr, p.backgroundBitmap.data(), gsl::narrow_cast(p.backgroundBitmapStride * sizeof(u32))); _backgroundBitmapGeneration = p.backgroundBitmapGeneration; } @@ -195,6 +195,11 @@ void BackendD2D::_drawText(RenderingPayload& p) { for (const auto& m : row->mappings) { + if (!m.fontFace.is_proper_font()) + { + continue; + } + const DWRITE_GLYPH_RUN glyphRun{ .fontFace = m.fontFace.get(), .fontEmSize = m.fontEmSize, @@ -214,11 +219,11 @@ void BackendD2D::_drawText(RenderingPayload& p) // (Vice versa for the bottom half of a double height row.) // // Since we used SetUnitMode(D2D1_UNIT_MODE_PIXELS), bounds.top/bottom is in pixels already and requires no conversion nor rounding. - if (row->lineRendition != FontRendition::DoubleHeightBottom) + if (row->lineRendition != LineRendition::DoubleHeightBottom) { row->dirtyTop = std::min(row->dirtyTop, static_cast(lrintf(bounds.top))); } - if (row->lineRendition != FontRendition::DoubleHeightTop) + if (row->lineRendition != LineRendition::DoubleHeightTop) { row->dirtyBottom = std::max(row->dirtyBottom, static_cast(lrintf(bounds.bottom))); } @@ -237,7 +242,7 @@ void BackendD2D::_drawText(RenderingPayload& p) }; _renderTarget->PushAxisAlignedClip(&clipRect, D2D1_ANTIALIAS_MODE_ALIASED); - if (row->lineRendition != FontRendition::SingleWidth) + if (row->lineRendition != LineRendition::SingleWidth) { baselineY = _drawTextPrepareLineRendition(p, baselineY, row->lineRendition); } @@ -269,7 +274,10 @@ void BackendD2D::_drawText(RenderingPayload& p) .glyphOffsets = &row->glyphOffsets[off], }; - DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), { baselineX, baselineY }, &glyphRun, brush); + if (m.fontFace.is_proper_font()) + { + DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), { baselineX, baselineY }, &glyphRun, brush); + } for (UINT32 i = 0; i < glyphRun.glyphCount; ++i) { @@ -278,7 +286,7 @@ void BackendD2D::_drawText(RenderingPayload& p) } } - if (row->lineRendition != FontRendition::SingleWidth) + if (row->lineRendition != LineRendition::SingleWidth) { _drawTextResetLineRendition(); } @@ -295,7 +303,7 @@ void BackendD2D::_drawText(RenderingPayload& p) } } -f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, FontRendition lineRendition) const noexcept +f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const noexcept { const auto descender = static_cast(p.s->font->cellSize.y - p.s->font->baseline); D2D1_MATRIX_3X2_F transform{ @@ -303,12 +311,12 @@ f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 bas .m22 = 1.0f, }; - if (lineRendition >= FontRendition::DoubleHeightTop) + if (lineRendition >= LineRendition::DoubleHeightTop) { transform.m22 = 2.0f; transform.dy = -1.0f * (baselineY + descender); - if (lineRendition == FontRendition::DoubleHeightTop) + if (lineRendition == LineRendition::DoubleHeightTop) { const auto delta = static_cast(p.s->font->cellSize.y); baselineY += delta; diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index 862b360aa4b..cb230f081fa 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -19,7 +19,7 @@ namespace Microsoft::Console::Render::Atlas __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p) noexcept; void _drawText(RenderingPayload& p); - f32 _drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, FontRendition lineRendition) const noexcept; + f32 _drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const noexcept; void _drawTextResetLineRendition() const noexcept; __declspec(noinline) f32r _getGlyphRunDesignBounds(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); void _drawGridlines(const RenderingPayload& p); diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 755bef29f5d..cd40f2419e4 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -60,22 +60,27 @@ size_t BackendD3D::GlyphCacheMap::Size() const noexcept void BackendD3D::GlyphCacheMap::Clear() noexcept { - for (auto& entry : _map) + for (const auto& entry : _map) { - if (entry.key.fontFace) + if (entry.key.fontFace > IDWriteFontFace_SoftFont) { // I'm pretty sure Release() doesn't throw exceptions. #pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'Release()' which may throw exceptions (f.6). entry.key.fontFace->Release(); - entry.key.fontFace = nullptr; } } + // memset() is used instead of std::fill() and its variants, because MSVC fails to understand that + // GlyphCacheEntry can be initialized with all zeroes and so it uses much slower approaches. + memset(_map.data(), 0, _map.size() * sizeof(*_map.data())); _size = 0; } BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::FindOrInsert(const GlyphCacheKey& key, bool& inserted) { + // The fontRendition member must be non-zero to mark the hashmap slot as occupied + //assert(key.fontRendition != LineRendition::None); + // Putting this into the Find() path is a little pessimistic, but it // allows us to default-construct this hashmap with a size of 0. if (_size >= _capacity) @@ -87,7 +92,7 @@ BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::FindOrInsert(const Glyph for (auto i = hash;; ++i) { auto& entry = _map[i & _mask]; - if (entry.key == key) + if (_equals(entry.key, key)) { inserted = false; return entry; @@ -96,7 +101,10 @@ BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::FindOrInsert(const Glyph { ++_size; entry.key = key; - entry.key.fontFace->AddRef(); + if (entry.key.fontFace > IDWriteFontFace_SoftFont) + { + entry.key.fontFace->AddRef(); + } inserted = true; return entry; } @@ -105,15 +113,14 @@ BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::FindOrInsert(const Glyph size_t BackendD3D::GlyphCacheMap::_hash(const GlyphCacheKey& key) noexcept { - //auto h = UINT64_C(0xcafef00dd15ea5e5); - //h = (h ^ key.hashField1) * UINT64_C(6364136223846793005) + UINT64_C(1442695040888963407); - //h = (h ^ key.hashField2) * UINT64_C(6364136223846793005) + UINT64_C(1442695040888963407); - //const int r = h & 63; - //const auto x = static_cast(h >> 32) ^ static_cast(h); - //return _rotl(x, r); return til::hash(&key, GlyphCacheKeyDataSize); } +bool BackendD3D::GlyphCacheMap::_equals(const GlyphCacheKey& lhs, const GlyphCacheKey& rhs) noexcept +{ + return memcmp(&lhs, &rhs, GlyphCacheKeyDataSize) == 0; +} + void BackendD3D::GlyphCacheMap::_bumpSize() { // The following block of code may be used to assess the quality of the hash function. @@ -449,6 +456,8 @@ void BackendD3D::_updateFontDependents(const RenderingPayload& p) { _d2dRenderTargetUpdateFontSettings(*p.s->font); } + + _softFontBitmap.reset(); } void BackendD3D::_recreateCustomShader(const RenderingPayload& p) @@ -910,7 +919,9 @@ void BackendD3D::_bumpInstancesSize() const auto newSize = std::max(256, _instances.size() * 2); Expects(newSize > _instances.size()); - auto newInstances = Buffer{ newSize }; + // Our render loop heavily relies on memcpy() which is up to between 1.5x (Intel) + // and 40x (AMD) faster for allocations with an alignment of 32 or greater. + auto newInstances = Buffer{ newSize }; std::copy_n(_instances.data(), _instances.size(), newInstances.data()); _instances = std::move(newInstances); @@ -1016,11 +1027,16 @@ void BackendD3D::_drawBackground(const RenderingPayload& p) D3D11_MAPPED_SUBRESOURCE mapped{}; THROW_IF_FAILED(_deviceContext->Map(_backgroundBitmap.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - auto data = static_cast(mapped.pData); + const auto srcStride = p.backgroundBitmapStride * sizeof(u32); +#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). + auto src = reinterpret_cast(p.backgroundBitmap.data()); + auto dst = static_cast(mapped.pData); + for (size_t i = 0; i < p.s->cellCount.y; ++i) { - memcpy(data, p.backgroundBitmap.data() + i * p.s->cellCount.x, p.s->cellCount.x * sizeof(u32)); - data += mapped.RowPitch; + memcpy(dst, src, srcStride); + src += srcStride; + dst += mapped.RowPitch; } _deviceContext->Unmap(_backgroundBitmap.get(), 0); @@ -1045,33 +1061,31 @@ void BackendD3D::_drawText(RenderingPayload& p) { f32 baselineX = 0; const auto baselineY = y * p.s->font->cellSize.y + p.s->font->baseline; - const auto lineRenditionScale = static_cast(row->lineRendition != FontRendition::SingleWidth); + const auto lineRenditionScale = static_cast(row->lineRendition != LineRendition::SingleWidth); for (const auto& m : row->mappings) { GlyphCacheKey key{ .fontFace = m.fontFace.get(), - .fontRendition = row->lineRendition | m.fontRendition + .lineRendition = static_cast(row->lineRendition), }; for (auto x = m.glyphsFrom; x < m.glyphsTo; ++x) { key.glyphIndex = row->glyphIndices[x]; - // This loop merely exists to allow us to retry rendering a glyph if the glyph atlas was full. - // We need to retry here, because a retry will cause the atlas texture as well as the + // This loop exists to allow us to retry rendering a glyph if the glyph atlas was full. + // We need to loop here, because a retry will cause the atlas texture as well as the // _glyphCache hashmap to be cleared, and so we'll have to call FindOrInsert() again. for (;;) { bool inserted = false; auto& entry = _glyphCache.FindOrInsert(key, inserted); - if (inserted) + if (inserted && !_drawGlyph(p, entry, m.fontEmSize)) { - if (!_drawGlyph(p, entry, m.fontEmSize)) - { - continue; - } + // A deadlock in this retry loop is detected in _drawGlyphPrepareRetry. + continue; } if (entry.data.shadingType != ShadingType::Default) @@ -1120,6 +1134,11 @@ void BackendD3D::_drawText(RenderingPayload& p) bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) { + if (entry.key.fontFace == IDWriteFontFace_SoftFont) + { + return _drawSoftFontGlyph(p, entry); + } + const DWRITE_GLYPH_RUN glyphRun{ .fontFace = entry.key.fontFace, .fontEmSize = fontEmSize, @@ -1127,14 +1146,14 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f .glyphIndices = &entry.key.glyphIndex, }; - const auto lineRendition = entry.key.fontRendition & FontRendition::LineRenditionMask; + const auto lineRendition = static_cast(entry.key.lineRendition); std::optional transform; - if (lineRendition != FontRendition::None) + if (lineRendition != LineRendition::SingleWidth) { auto& t = transform.emplace(); t.m11 = 2.0f; - t.m22 = lineRendition >= FontRendition::DoubleHeightTop ? 2.0f : 1.0f; + t.m22 = lineRendition >= LineRendition::DoubleHeightTop ? 2.0f : 1.0f; _d2dRenderTarget->SetTransform(&t); } @@ -1166,9 +1185,6 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f // box may be empty if the glyph is whitespace. if (box.left >= box.right || box.top >= box.bottom) { - // This will indicate to `BackendD3D::_drawText` that this glyph is whitespace. It's important to set this member, - // because `GlyphCacheMap` does not zero out inserted entries and `shadingType` might still contain "garbage". - entry.data.shadingType = ShadingType::Default; return true; } @@ -1212,7 +1228,7 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f entry.data.texcoord.y = rect.y; entry.data.shadingType = colorGlyph ? ShadingType::Passthrough : _textShadingType; - if (lineRendition >= FontRendition::DoubleHeightTop) + if (lineRendition >= LineRendition::DoubleHeightTop) { _splitDoubleHeightGlyph(p, entry); } @@ -1220,6 +1236,75 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f return true; } +bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, BackendD3D::GlyphCacheEntry& entry) +{ + // TODO: Add support for line renditions - call _splitDoubleHeightGlyph for this. + stbrp_rect rect{ + .w = p.s->font->cellSize.x, + .h = p.s->font->cellSize.y, + }; + if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) + { + _drawGlyphPrepareRetry(p); + return false; + } + + if (!_softFontBitmap) + { + const D2D1_SIZE_U size{ + static_cast(p.s->font->softFontCellSize.width), + static_cast(p.s->font->softFontCellSize.height), + }; + static constexpr D2D1_BITMAP_PROPERTIES1 bitmapProperties{ + .pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }, + }; + THROW_IF_FAILED(_d2dRenderTarget->CreateBitmap(size, nullptr, 0, &bitmapProperties, _softFontBitmap.addressof())); + } + + { + const auto width = static_cast(p.s->font->softFontCellSize.width); + const auto height = static_cast(p.s->font->softFontCellSize.height); + + auto bitmapData = Buffer{ width * height }; + auto src = p.s->font->softFontPattern.begin() + height * (entry.key.glyphIndex - 0xEF20u); + auto dst = bitmapData.begin(); + + for (size_t y = 0; y < height; y++) + { + auto srcBits = *src++; + for (size_t x = 0; x < width; x++) + { + const auto srcBitIsSet = (srcBits & 0x8000) != 0; + *dst++ = srcBitIsSet ? 0xffffffff : 0x00000000; + srcBits <<= 1; + } + } + + const auto pitch = static_cast(width * sizeof(u32)); + THROW_IF_FAILED(_softFontBitmap->CopyFromMemory(nullptr, bitmapData.data(), pitch)); + } + + const D2D1_RECT_F dest{ + static_cast(rect.x), + static_cast(rect.y), + static_cast(rect.x + rect.w), + static_cast(rect.y + rect.h), + }; + + _d2dBeginDrawing(); + _d2dRenderTarget->DrawBitmap(_softFontBitmap.get(), &dest, 1, D2D1_INTERPOLATION_MODE_HIGH_QUALITY_CUBIC, nullptr, nullptr); + + // TODO: What is the p.s->font->softFontCenteringHint? + entry.data.offset.x = 0; + entry.data.offset.y = -p.s->font->baseline; + entry.data.size.x = rect.w; + entry.data.size.y = rect.h; + entry.data.texcoord.x = rect.x; + entry.data.texcoord.y = rect.y; + entry.data.shadingType = ShadingType::TextGrayscale; + return true; +} + void BackendD3D::_drawGlyphPrepareRetry(const RenderingPayload& p) { THROW_HR_IF_MSG(E_UNEXPECTED, _glyphCache.Size() == 0, "BackendD3D::_drawGlyph deadlock"); @@ -1234,16 +1319,16 @@ void BackendD3D::_drawGlyphPrepareRetry(const RenderingPayload& p) // and create a second entry in our glyph cache hashmap that contains the other half. void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, GlyphCacheEntry& entry) { + static constexpr auto lrTop = static_cast(LineRendition::DoubleHeightTop); + static constexpr auto lrBottom = static_cast(LineRendition::DoubleHeightBottom); + // Twice the line height, twice the descender gap. For both. entry.data.offset.y -= p.s->font->descender; - const auto lineRendition = entry.key.fontRendition & FontRendition::LineRenditionMask; - const auto fontRendition = entry.key.fontRendition & FontRendition::FontRenditionMask; - const auto isTop = lineRendition == FontRendition::DoubleHeightTop; - const auto altRendition = isTop ? FontRendition::DoubleHeightBottom : FontRendition::DoubleHeightTop; + const auto isTop = entry.key.lineRendition == lrTop; auto key2 = entry.key; - key2.fontRendition = altRendition | fontRendition; + key2.lineRendition = isTop ? lrBottom : lrTop; bool inserted = false; auto& entry2 = _glyphCache.FindOrInsert(key2, inserted); diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 265d4e69df3..3eac1b0dab7 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -86,20 +86,24 @@ namespace Microsoft::Console::Render::Atlas // which at the time of writing returns the same IDWriteFontFace as long as someone is // holding a reference / the reference count doesn't drop to 0 (see ActiveFaceCache). // This allows us to hash the value of the pointer as if it was uniquely identifying the font face. + // + // This isn't using a raw pointer instead of a managed struct, because this allows + // us to construct a GlyphCacheKey for lookup without AddRef()ing the fontFace. IDWriteFontFace* fontFace = nullptr; - FontRendition fontRendition = FontRendition::None; + u16 lineRendition = 0; u16 glyphIndex = 0; - - constexpr bool operator==(const GlyphCacheKey& rhs) const noexcept - { - return __builtin_memcmp(this, &rhs, GlyphCacheKeyDataSize) == 0; - } +#ifdef _WIN64 + u32 _padding = 0; +#endif }; - static_assert(sizeof(GlyphCacheKey) == 2 * sizeof(void*)); + static_assert(std::has_unique_object_representations_v); // Due to padding on 64-Bit systems, sizeof(GlyphCacheKey) will be 16, // but the actual contents of the struct still only be 12 bytes. - static constexpr size_t GlyphCacheKeyDataSize = sizeof(GlyphCacheKey::fontFace) + 2 * sizeof(u16); + static constexpr size_t GlyphCacheKeyDataSize = + sizeof(GlyphCacheKey::fontFace) + + sizeof(GlyphCacheKey::lineRendition) + + sizeof(GlyphCacheKey::glyphIndex); struct GlyphCacheData { @@ -108,12 +112,14 @@ namespace Microsoft::Console::Render::Atlas u16x2 texcoord; ShadingType shadingType = ShadingType::Default; }; + static_assert(std::has_unique_object_representations_v); struct GlyphCacheEntry { GlyphCacheKey key; GlyphCacheData data; }; + static_assert(std::has_unique_object_representations_v); struct GlyphCacheMap { @@ -132,6 +138,7 @@ namespace Microsoft::Console::Render::Atlas private: static size_t _hash(const GlyphCacheKey& key) noexcept; + static bool _equals(const GlyphCacheKey& lhs, const GlyphCacheKey& rhs) noexcept; void _bumpSize(); Buffer _map; @@ -165,6 +172,7 @@ namespace Microsoft::Console::Render::Atlas void _drawBackground(const RenderingPayload& p); void _drawText(RenderingPayload& p); __declspec(noinline) [[nodiscard]] bool _drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize); + bool _drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& entry); void _drawGlyphPrepareRetry(const RenderingPayload& p); void _splitDoubleHeightGlyph(const RenderingPayload& p, GlyphCacheEntry& entry); void _drawGridlines(const RenderingPayload& p); @@ -191,7 +199,7 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _indexBuffer; wil::com_ptr _instanceBuffer; size_t _instanceBufferCapacity = 0; - Buffer _instances; + Buffer _instances; size_t _instancesCount = 0; // This allows us to batch inverted cursors into the same @@ -227,6 +235,7 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _d2dRenderTarget; wil::com_ptr _d2dRenderTarget4; // Optional. Supported since Windows 10 14393. wil::com_ptr _brush; + wil::com_ptr _softFontBitmap; bool _d2dBeganDrawing = false; bool _fontChangedResetGlyphAtlas = false; diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 33365042969..ef4dde530e3 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -365,29 +365,123 @@ namespace Microsoft::Console::Render::Atlas }; ATLAS_FLAG_OPS(FontRelevantAttributes, u8) - // This is u16 so that it fits right into BackendD3D's GlyphCacheKey alignment. - enum class FontRendition : u16 + // This fake IDWriteFontFace* is a place holder that is used when we draw DECDLD/DRCS soft fonts. It's wildly + // invalid C++, but I wrote the alternative, proper code with bitfields/flags and such and it turned into a + // bigger mess than this violation against the C++ consortium's conscience. It also didn't help BackendD3D, + // which hashes FontFace and an additional flag field would double the hashmap key size due to padding. + // It's a macro, because constexpr doesn't work here in C++20 and regular "const" doesn't inline. +#define IDWriteFontFace_SoftFont (static_cast(nullptr) + 1) + + // The existence of IDWriteFontFace_SoftFont unfortunately requires us to reimplement wil::com_ptr. + // + // Unfortunately this code seems to confuse MSVC's linter? The 3 smart pointer warnings are somewhat funny. + // It doesn't understand that this class is a smart pointer itself. The other 2 are valid, but don't apply here. +#pragma warning(push) +#pragma warning(disable : 26415) // Smart pointer parameter 'other' is used only to access contained pointer. Use T* or T& instead (r.30). +#pragma warning(disable : 26416) // Shared pointer parameter 'other' is passed by rvalue reference. Pass by value instead (r.34). +#pragma warning(disable : 26418) // Shared pointer parameter 'other' is not copied or moved. Use T* or T& instead (r.36). +#pragma warning(disable : 26447) // The function is declared 'noexcept' but calls function '...' which may throw exceptions (f.6).) +#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). + struct FontFace { - None = 0, + FontFace() = default; + + ~FontFace() noexcept + { + _release(); + } + + FontFace(const FontFace& other) noexcept : + FontFace{ other.get() } + { + } + + FontFace(FontFace&& other) noexcept : + _ptr{ other.detach() } + { + } + + FontFace& operator=(const FontFace& other) noexcept + { + _release(); + _ptr = other.get(); + _addRef(); + return *this; + } + + FontFace& operator=(FontFace&& other) noexcept + { + _release(); + _ptr = other.detach(); + return *this; + } + + FontFace(IDWriteFontFace* ptr) noexcept : + _ptr{ ptr } + { + _addRef(); + } + + FontFace(const wil::com_ptr& other) noexcept : + FontFace{ other.get() } + { + } + + FontFace(wil::com_ptr&& other) noexcept : + _ptr{ other.detach() } + { + } - LineRenditionMask = 0x00ff, - SingleWidth = LineRendition::SingleWidth, - DoubleWidth = LineRendition::DoubleWidth, - DoubleHeightTop = LineRendition::DoubleHeightTop, - DoubleHeightBottom = LineRendition::DoubleHeightBottom, + void attach(IDWriteFontFace* other) noexcept + { + _release(); + _ptr = other; + } + + [[nodiscard]] IDWriteFontFace* detach() noexcept + { + const auto tmp = _ptr; + _ptr = nullptr; + return tmp; + } + + IDWriteFontFace* get() const noexcept + { + return _ptr; + } + + bool is_proper_font() const noexcept + { + return _ptr > IDWriteFontFace_SoftFont; + } + + private: + void _addRef() const noexcept + { + if (is_proper_font()) + { + _ptr->AddRef(); + } + } + + void _release() const noexcept + { + if (is_proper_font()) + { + _ptr->Release(); + } + } - FontRenditionMask = 0xff00, - SoftFont = 0x8000, + IDWriteFontFace* _ptr = nullptr; }; - ATLAS_FLAG_OPS(FontRendition, u16) +#pragma warning(pop) struct FontMapping { - wil::com_ptr fontFace; + FontFace fontFace; f32 fontEmSize = 0; u32 glyphsFrom = 0; u32 glyphsTo = 0; - FontRendition fontRendition = FontRendition::None; }; struct GridLineRange @@ -408,7 +502,7 @@ namespace Microsoft::Console::Render::Atlas glyphOffsets.clear(); colors.clear(); gridLineRanges.clear(); - lineRendition = FontRendition::None; + lineRendition = LineRendition::SingleWidth; selectionFrom = 0; selectionTo = 0; dirtyTop = y * cellHeight; @@ -421,7 +515,7 @@ namespace Microsoft::Console::Render::Atlas std::vector glyphOffsets; // same size as glyphIndices std::vector colors; // same size as glyphIndices std::vector gridLineRanges; - FontRendition lineRendition = FontRendition::None; + LineRendition lineRendition = LineRendition::SingleWidth; u16 selectionFrom = 0; u16 selectionTo = 0; til::CoordType dirtyTop = 0; @@ -442,7 +536,13 @@ namespace Microsoft::Console::Render::Atlas std::function swapChainChangedCallback; //// Parameters which are constant for the existence of the backend. - wil::com_ptr dxgiFactory; + struct + { + wil::com_ptr factory; + wil::com_ptr adapter; + LUID adapterLuid{}; + UINT adapterFlags = 0; + } dxgi; //// Parameters which change seldom. GenerationalSettings s; @@ -453,15 +553,25 @@ namespace Microsoft::Console::Render::Atlas // This is used as a scratch buffer during scrolling. Buffer rowsScratch; Buffer rows; - Buffer backgroundBitmap; + // This stride (width) of the backgroundBitmap is a "count" of u32 and not in bytes. + size_t backgroundBitmapStride = 0; + Buffer backgroundBitmap; // 1 ensures that the backends redraw the background, even if the background is // entirely black, just like `backgroundBitmap` is all back after it gets created. til::generation_t backgroundBitmapGeneration{ 1 }; + u16r cursorRect; til::rect dirtyRectInPx; u16x2 invalidatedRows; i16 scrollOffset = 0; + + void MarkAllAsDirty() noexcept + { + dirtyRectInPx = { 0, 0, s->targetSize.x, s->targetSize.y }; + invalidatedRows = { 0, s->cellCount.y }; + scrollOffset = 0; + } }; struct IBackend From 0f3b1d32f72ebf305fee91bccff2b786c5a678b8 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 30 Mar 2023 23:59:39 +0200 Subject: [PATCH 18/37] Implement line renditions for soft fonts --- src/renderer/atlas/AtlasEngine.cpp | 2 +- src/renderer/atlas/BackendD3D.cpp | 27 +++++++++++++--- src/tools/RenderingTests/main.cpp | 51 +++++++++++++++++++----------- 3 files changed, 55 insertions(+), 25 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 6f7e9c7e818..34072863253 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -205,7 +205,7 @@ try } #if ATLAS_DEBUG_CONTINUOUS_REDRAW - _p.MarkAllAsDirt(); + _p.MarkAllAsDirty(); #endif return S_OK; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index cd40f2419e4..8788a47d1dc 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -904,6 +904,7 @@ void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u32 color, ShadingType _appendQuad(position, size, {}, color, shadingType); } +// NOTE: Up to 5M calls per second -> no std::vector, no std::unordered_map. void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 color, ShadingType shadingType) { if (_instancesCount >= _instances.size()) @@ -1147,8 +1148,8 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f }; const auto lineRendition = static_cast(entry.key.lineRendition); - std::optional transform; + if (lineRendition != LineRendition::SingleWidth) { auto& t = transform.emplace(); @@ -1236,13 +1237,20 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f return true; } -bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, BackendD3D::GlyphCacheEntry& entry) +bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& entry) { - // TODO: Add support for line renditions - call _splitDoubleHeightGlyph for this. stbrp_rect rect{ .w = p.s->font->cellSize.x, .h = p.s->font->cellSize.y, }; + + const auto lineRendition = static_cast(entry.key.lineRendition); + if (lineRendition != LineRendition::SingleWidth) + { + rect.w <<= 1; + rect.h <<= static_cast(lineRendition >= LineRendition::DoubleHeightTop); + } + if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) { _drawGlyphPrepareRetry(p); @@ -1266,7 +1274,8 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, BackendD3D::Glyph const auto height = static_cast(p.s->font->softFontCellSize.height); auto bitmapData = Buffer{ width * height }; - auto src = p.s->font->softFontPattern.begin() + height * (entry.key.glyphIndex - 0xEF20u); + const auto glyphIndex = entry.key.glyphIndex - 0xEF20u; + auto src = p.s->font->softFontPattern.begin() + height * glyphIndex; auto dst = bitmapData.begin(); for (size_t y = 0; y < height; y++) @@ -1284,6 +1293,7 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, BackendD3D::Glyph THROW_IF_FAILED(_softFontBitmap->CopyFromMemory(nullptr, bitmapData.data(), pitch)); } + const auto interpolation = p.s->font->antialiasingMode == D2D1_ANTIALIAS_MODE_ALIASED ? D2D1_INTERPOLATION_MODE_NEAREST_NEIGHBOR : D2D1_INTERPOLATION_MODE_HIGH_QUALITY_CUBIC; const D2D1_RECT_F dest{ static_cast(rect.x), static_cast(rect.y), @@ -1292,7 +1302,7 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, BackendD3D::Glyph }; _d2dBeginDrawing(); - _d2dRenderTarget->DrawBitmap(_softFontBitmap.get(), &dest, 1, D2D1_INTERPOLATION_MODE_HIGH_QUALITY_CUBIC, nullptr, nullptr); + _d2dRenderTarget->DrawBitmap(_softFontBitmap.get(), &dest, 1, interpolation, nullptr, nullptr); // TODO: What is the p.s->font->softFontCenteringHint? entry.data.offset.x = 0; @@ -1302,6 +1312,13 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, BackendD3D::Glyph entry.data.texcoord.x = rect.x; entry.data.texcoord.y = rect.y; entry.data.shadingType = ShadingType::TextGrayscale; + + if (lineRendition >= LineRendition::DoubleHeightTop) + { + entry.data.offset.y -= p.s->font->cellSize.y; + _splitDoubleHeightGlyph(p, entry); + } + return true; } diff --git a/src/tools/RenderingTests/main.cpp b/src/tools/RenderingTests/main.cpp index e64fd7511f0..bb94129f9b1 100644 --- a/src/tools/RenderingTests/main.cpp +++ b/src/tools/RenderingTests/main.cpp @@ -191,32 +191,33 @@ int main() printUTF16(L"\x1bP1;1;2{ @\x1b\\"); }; + constexpr auto width = 13; const auto glyph = - " W W " - " W W " - " W W W " - " W W W " - " W W W " - " W W W TTTTTTT " - " W W T " - " T " - " T " - " T " - " T " - " T "; + "W W " + "W W " + "W W W " + "W W W " + "W W W " + "W W W TTTTTTT" + " W W T " + " T " + " T " + " T " + " T " + " T "; // Convert the above visual glyph to sixels - wchar_t rows[2][15]; + wchar_t rows[2][width]; for (int r = 0; r < 2; ++r) { - const auto glyphData = &glyph[r * 15 * 6]; + const auto glyphData = &glyph[r * width * 6]; - for (int x = 0; x < 15; ++x) + for (int x = 0; x < width; ++x) { unsigned int accumulator = 0; for (int y = 5; y >= 0; --y) { - const auto isSet = glyphData[y * 15 + x] != ' '; + const auto isSet = glyphData[y * width + x] != ' '; accumulator <<= 1; accumulator |= static_cast(isSet); } @@ -225,21 +226,33 @@ int main() } } + // DECDLD - Dynamically Redefinable Character Sets printfUTF16( // * Pfn | font number | 1 | // * Pcn | starting character | 3 | = ASCII 0x23 "#" // * Pe | erase control | 2 | erase all - // Pcmw | character matrix width | 0 | 15 pixels + // Pcmw | character matrix width | 13 | 13 pixels // Pw | font width | 0 | 80 columns // Pt | text or full cell | 0 | text // Pcmh | character matrix height | 0 | 12 pixels // Pcss | character set size | 0 | 94 // * Dscs | character set name | " @" | unregistered soft set - L"\x1bP1;3;2{ @%.15s/%.15s\x1b\\", + L"\x1bP1;3;2;%d{ @%.15s/%.15s\x1b\\", + width, rows[0], rows[1]); - printUTF16(L"\x1B[3;5HDECDLD glyph \"WT\": \x1b( @#\x1b(A"); +#define DRCS_SEQUENCE L"\x1b( @#\x1b(A" + printUTF16( + L"\x1B[3;5HDECDLD and DRCS test - it should show \"WT\" in a single cell" + L"\x1B[5;5HRegular: " DRCS_SEQUENCE L"" + L"\x1B[7;3H\x1b#6DECDWL: " DRCS_SEQUENCE L"" + L"\x1B[9;3H\x1b#3DECDHL: " DRCS_SEQUENCE L"" + L"\x1B[10;3H\x1b#4DECDHL: " DRCS_SEQUENCE L"" + // We map soft fonts into the private use area starting at U+EF20. This test ensures + // that we correctly map actual fallback glyphs mixed into the DRCS glyphs. + L"\x1B[12;5HUnicode Fallback: \uE000\uE001" DRCS_SEQUENCE L"\uE003\uE004"); +#undef DRCS_SEQUENCE wait(); } From 20cb489b56537ad32a3196ee6adea90572dc2526 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 31 Mar 2023 00:22:29 +0200 Subject: [PATCH 19/37] Fix AuditMode failures --- src/renderer/atlas/common.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index ef4dde530e3..78a0f162d2f 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -165,13 +165,15 @@ namespace Microsoft::Console::Render::Atlas destroy(); } + Buffer(const Buffer& other) = delete; + Buffer& operator=(const Buffer& other) = delete; + Buffer(Buffer&& other) noexcept : _data{ std::exchange(other._data, nullptr) }, _size{ std::exchange(other._size, 0) } { } -#pragma warning(suppress : 26432) // If you define or delete any default operation in the type '...', define or delete them all (c.21). Buffer& operator=(Buffer&& other) noexcept { destroy(); From 4caf341e5a46b5d326d2b799efcc6f897afef954 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 31 Mar 2023 16:02:37 +0200 Subject: [PATCH 20/37] Fix background opacity in BackendD2D --- src/renderer/atlas/AtlasEngine.api.cpp | 1 - src/renderer/atlas/AtlasEngine.cpp | 2 +- src/renderer/atlas/Backend.h | 17 +++++++++------- src/renderer/atlas/BackendD3D.cpp | 9 ++++++--- src/renderer/atlas/common.h | 1 - src/renderer/atlas/shader_ps.hlsl | 1 - src/tools/RenderingTests/main.cpp | 28 +++++++++++++------------- 7 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index 3ac4e1dd7ec..12a46decbc5 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -165,7 +165,6 @@ constexpr HRESULT vec2_narrow(U x, U y, vec2& out) noexcept const auto softFont = _api.s.write()->font.write(); softFont->softFontPattern = std::vector(bitPattern.begin(), bitPattern.end()); softFont->softFontCellSize = cellSize; - softFont->softFontCenteringHint = centeringHint; return S_OK; } diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 34072863253..14033974491 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -305,7 +305,7 @@ try const auto backgroundRow = _p.backgroundBitmap.begin() + _p.backgroundBitmapStride * y; auto it = backgroundRow + x; const auto end = backgroundRow + (static_cast(column) << shift); - const auto bg = _api.currentColor.y; + const auto bg = u32ColorPremultiply(_api.currentColor.y); for (; it != end; ++it) { diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index e5a0cd3d2c1..1be9ef7ef4b 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -86,14 +86,17 @@ namespace Microsoft::Console::Render::Atlas return { r, g, b, a }; } - template - constexpr T colorFromU32Premultiply(u32 rgba) + constexpr u32 u32ColorPremultiply(u32 rgba) { - const auto r = static_cast((rgba >> 0) & 0xff) / 255.0f; - const auto g = static_cast((rgba >> 8) & 0xff) / 255.0f; - const auto b = static_cast((rgba >> 16) & 0xff) / 255.0f; - const auto a = static_cast((rgba >> 24) & 0xff) / 255.0f; - return { r * a, g * a, b * a, a }; + auto rb = rgba & 0x00ff00ff; + auto g = rgba & 0x0000ff00; + const auto a = rgba & 0xff000000; + + const auto m = rgba >> 24; + rb = (rb * m / 0xff) & 0x00ff00ff; + g = (g * m / 0xff) & 0x0000ff00; + + return rb | g | a; } // MSVC STL (version 22000) implements std::clamp(T, T, T) in terms of the generic diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 8788a47d1dc..f33a1455c5a 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -1259,12 +1259,16 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& if (!_softFontBitmap) { + // Allocating such a tiny texture is very wasteful (min. texture size on GPUs + // right now is 64kB), but this is a seldomly used feature so it's fine... const D2D1_SIZE_U size{ static_cast(p.s->font->softFontCellSize.width), static_cast(p.s->font->softFontCellSize.height), }; - static constexpr D2D1_BITMAP_PROPERTIES1 bitmapProperties{ + const D2D1_BITMAP_PROPERTIES1 bitmapProperties{ .pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }, + .dpiX = static_cast(p.s->font->dpi), + .dpiY = static_cast(p.s->font->dpi), }; THROW_IF_FAILED(_d2dRenderTarget->CreateBitmap(size, nullptr, 0, &bitmapProperties, _softFontBitmap.addressof())); } @@ -1293,7 +1297,7 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& THROW_IF_FAILED(_softFontBitmap->CopyFromMemory(nullptr, bitmapData.data(), pitch)); } - const auto interpolation = p.s->font->antialiasingMode == D2D1_ANTIALIAS_MODE_ALIASED ? D2D1_INTERPOLATION_MODE_NEAREST_NEIGHBOR : D2D1_INTERPOLATION_MODE_HIGH_QUALITY_CUBIC; + const auto interpolation = p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_ALIASED ? D2D1_INTERPOLATION_MODE_NEAREST_NEIGHBOR : D2D1_INTERPOLATION_MODE_HIGH_QUALITY_CUBIC; const D2D1_RECT_F dest{ static_cast(rect.x), static_cast(rect.y), @@ -1304,7 +1308,6 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& _d2dBeginDrawing(); _d2dRenderTarget->DrawBitmap(_softFontBitmap.get(), &dest, 1, interpolation, nullptr, nullptr); - // TODO: What is the p.s->font->softFontCenteringHint? entry.data.offset.x = 0; entry.data.offset.y = -p.s->font->baseline; entry.data.size.x = rect.w; diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 78a0f162d2f..69d995fca68 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -316,7 +316,6 @@ namespace Microsoft::Console::Render::Atlas std::vector softFontPattern; til::size softFontCellSize; - size_t softFontCenteringHint = 0; }; struct CursorSettings diff --git a/src/renderer/atlas/shader_ps.hlsl b/src/renderer/atlas/shader_ps.hlsl index 2b86534ded4..169d47a085c 100644 --- a/src/renderer/atlas/shader_ps.hlsl +++ b/src/renderer/atlas/shader_ps.hlsl @@ -35,7 +35,6 @@ Output main(PSData data) : SV_Target case SHADING_TYPE_TEXT_BACKGROUND: float2 pos = data.texcoord / cellSize; color = all(pos < cellCount) ? background[pos] : backgroundColor; - color.rgb *= color.a; weights = float4(1, 1, 1, 1); break; case SHADING_TYPE_TEXT_GRAYSCALE: diff --git a/src/tools/RenderingTests/main.cpp b/src/tools/RenderingTests/main.cpp index bb94129f9b1..b933d9301da 100644 --- a/src/tools/RenderingTests/main.cpp +++ b/src/tools/RenderingTests/main.cpp @@ -191,20 +191,20 @@ int main() printUTF16(L"\x1bP1;1;2{ @\x1b\\"); }; - constexpr auto width = 13; + constexpr auto width = 14; const auto glyph = - "W W " - "W W " - "W W W " - "W W W " - "W W W " - "W W W TTTTTTT" - " W W T " - " T " - " T " - " T " - " T " - " T "; + "W W " + "W W " + "W W W " + "W W W " + "W W W " + "W W W TTTTTTT" + " W W T " + " T " + " T " + " T " + " T " + " T "; // Convert the above visual glyph to sixels wchar_t rows[2][width]; @@ -231,7 +231,7 @@ int main() // * Pfn | font number | 1 | // * Pcn | starting character | 3 | = ASCII 0x23 "#" // * Pe | erase control | 2 | erase all - // Pcmw | character matrix width | 13 | 13 pixels + // Pcmw | character matrix width | %d | `width` pixels // Pw | font width | 0 | 80 columns // Pt | text or full cell | 0 | text // Pcmh | character matrix height | 0 | 12 pixels From d0fcc5be911bced6ee6b8a99ac6ba7ae752d1858 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 31 Mar 2023 23:00:07 +0200 Subject: [PATCH 21/37] Slightly reduce memory usage, Clean up AntialiasingMode, Document IDWriteGlyphRunAnalysis usage --- src/renderer/atlas/AtlasEngine.api.cpp | 4 +- src/renderer/atlas/AtlasEngine.cpp | 22 +++-- src/renderer/atlas/AtlasEngine.h | 6 +- src/renderer/atlas/Backend.cpp | 43 ++++++---- src/renderer/atlas/Backend.h | 4 +- src/renderer/atlas/BackendD2D.cpp | 6 +- src/renderer/atlas/BackendD3D.cpp | 111 +++++++++++++++++++++---- src/renderer/atlas/BackendD3D.h | 5 +- src/renderer/atlas/common.h | 29 ++++--- 9 files changed, 166 insertions(+), 64 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index 12a46decbc5..9d678f89c25 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -341,7 +341,7 @@ HRESULT AtlasEngine::Enable() noexcept void AtlasEngine::SetAntialiasingMode(const D2D1_TEXT_ANTIALIAS_MODE antialiasingMode) noexcept { - const auto mode = gsl::narrow_cast(antialiasingMode); + const auto mode = static_cast(antialiasingMode); if (_api.antialiasingMode != mode) { _api.antialiasingMode = mode; @@ -475,7 +475,7 @@ void AtlasEngine::_resolveTransparencySettings() noexcept // If the user asks for ClearType, but also for a transparent background // (which our ClearType shader doesn't simultaneously support) // then we need to sneakily force the renderer to grayscale AA. - const u8 antialiasingMode = _api.enableTransparentBackground && _api.antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE : _api.antialiasingMode; + const auto antialiasingMode = _api.enableTransparentBackground && _api.antialiasingMode == AntialiasingMode::ClearType ? AntialiasingMode::Grayscale : _api.antialiasingMode; const bool enableTransparentBackground = _api.enableTransparentBackground || !_api.s->misc->customPixelShaderPath.empty() || _api.s->misc->useRetroTerminalEffect; if (antialiasingMode != _api.s->font->antialiasingMode || enableTransparentBackground != _api.s->target->enableTransparentBackground) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 14033974491..bdfdfe99777 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -577,7 +577,7 @@ void AtlasEngine::_flushBufferLine() auto& row = *_p.rows[_api.lastPaintBufferLineCoord.y]; - wil::com_ptr mappedFontFace; + wil::com_ptr mappedFontFace; #pragma warning(suppress : 26494) // Variable 'mappedEnd' is uninitialized. Always initialize an object (type.5). for (u32 idx = 0, mappedEnd; idx < _api.bufferLine.size(); idx = mappedEnd) @@ -608,10 +608,9 @@ void AtlasEngine::_flushBufferLine() // We can reuse idx here, as it'll be reset to "idx = mappedEnd" in the outer loop anyways. for (u32 complexityLength = 0; idx < mappedEnd; idx += complexityLength) { - BOOL isTextSimple; + BOOL isTextSimple = FALSE; THROW_IF_FAILED(_p.textAnalyzer->GetTextComplexity(_api.bufferLine.data() + idx, mappedEnd - idx, mappedFontFace.get(), &isTextSimple, &complexityLength, _api.glyphIndices.data())); -#pragma warning(suppress : 4127) if (isTextSimple) { for (size_t i = 0; i < complexityLength; ++i) @@ -640,7 +639,7 @@ void AtlasEngine::_flushBufferLine() } } -void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* mappedLength, float* scale, IDWriteFontFace** mappedFontFace) const +void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* mappedLength, float* scale, IDWriteFontFace2** mappedFontFace) const { TextAnalysisSource analysisSource{ text, textLength }; const auto& textFormatAxis = _api.textFormatAxes[static_cast(_api.attributes)]; @@ -680,7 +679,7 @@ void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* if (font) { - THROW_IF_FAILED(font->CreateFontFace(mappedFontFace)); + THROW_IF_FAILED(font->CreateFontFace(reinterpret_cast(mappedFontFace))); } } } @@ -856,6 +855,11 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) _api.replacementCharacterLookedUp = true; } + if (!_api.replacementCharacterFontFace) + { + return; + } + static constexpr auto isSoftFontChar = [](wchar_t ch) noexcept { return ch >= 0xEF20 && ch < 0xEF80; }; @@ -888,9 +892,9 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) if (currentlyMappingSoftFont != nowMappingSoftFont) { const auto indicesCount = row.glyphIndices.size(); - const auto fontFace = currentlyMappingSoftFont && softFontAvailable ? IDWriteFontFace_SoftFont : _api.replacementCharacterFontFace.get(); + const auto fontFace = currentlyMappingSoftFont && softFontAvailable ? nullptr : _api.replacementCharacterFontFace.get(); - if (indicesCount > initialIndicesCount && fontFace) + if (indicesCount > initialIndicesCount) { row.mappings.emplace_back(fontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); initialIndicesCount = indicesCount; @@ -904,9 +908,9 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) { const auto indicesCount = row.glyphIndices.size(); - const auto fontFace = currentlyMappingSoftFont && softFontAvailable ? IDWriteFontFace_SoftFont : _api.replacementCharacterFontFace.get(); + const auto fontFace = currentlyMappingSoftFont && softFontAvailable ? nullptr : _api.replacementCharacterFontFace.get(); - if (indicesCount > initialIndicesCount && fontFace) + if (indicesCount > initialIndicesCount) { row.mappings.emplace_back(fontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); } diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 68c1870393c..21eaaa43dee 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -82,7 +82,7 @@ namespace Microsoft::Console::Render::Atlas void _recreateFontDependentResources(); void _recreateCellCountDependentResources(); void _flushBufferLine(); - void _mapCharacters(const wchar_t* text, u32 textLength, u32* mappedLength, float* scale, IDWriteFontFace** mappedFontFace) const; + void _mapCharacters(const wchar_t* text, u32 textLength, u32* mappedLength, float* scale, IDWriteFontFace2** mappedFontFace) const; void _mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 length, ShapedRow& row); __declspec(noinline) void _mapReplacementCharacter(u32 from, u32 to, ShapedRow& row); @@ -117,7 +117,7 @@ namespace Microsoft::Console::Render::Atlas // These two are redundant with TargetSettings/MiscellaneousSettings, but that's because _resolveTransparencySettings() // turns the given settings into potentially different actual settings (which are then written into the Settings). bool enableTransparentBackground = false; - u8 antialiasingMode = DefaultAntialiasingMode; + AntialiasingMode antialiasingMode = DefaultAntialiasingMode; std::vector bufferLine; std::vector bufferLineColumn; @@ -132,7 +132,7 @@ namespace Microsoft::Console::Render::Atlas Buffer glyphAdvances; Buffer glyphOffsets; - wil::com_ptr replacementCharacterFontFace; + wil::com_ptr replacementCharacterFontFace; u16 replacementCharacterGlyphIndex = 0; bool replacementCharacterLookedUp = false; diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index 8966b9dbdd2..f909bf10392 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -170,12 +170,8 @@ void SwapChainManager::_updateMatrixTransform(const RenderingPayload& p) _fontGeneration = p.s->font.generation(); } -// Draws a `DWRITE_GLYPH_RUN` at `baselineOrigin` into the given `ID2D1DeviceContext`. -// `d2dRenderTarget4` and `dwriteFactory4` are optional and used to draw colored glyphs. -// Returns true if the `DWRITE_GLYPH_RUN` contained a color glyph. -bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRenderTarget, ID2D1DeviceContext4* d2dRenderTarget4, IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) +wil::com_ptr Microsoft::Console::Render::Atlas::TranslateColorGlyphRun(IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun) { - static constexpr auto measuringMode = DWRITE_MEASURING_MODE_NATURAL; static constexpr auto formats = DWRITE_GLYPH_IMAGE_FORMATS_TRUETYPE | DWRITE_GLYPH_IMAGE_FORMATS_CFF | @@ -188,24 +184,39 @@ bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRend wil::com_ptr enumerator; - // If ID2D1DeviceContext4 isn't supported, we'll exit early below. - auto hr = DWRITE_E_NOCOLOR; - - if (d2dRenderTarget4) + if (dwriteFactory4) { - // Support for ID2D1DeviceContext4 implies support for IDWriteFactory4. - // ID2D1DeviceContext4 is required for drawing below. - hr = dwriteFactory4->TranslateColorGlyphRun(baselineOrigin, glyphRun, nullptr, formats, measuringMode, nullptr, 0, &enumerator); + THROW_IF_FAILED(dwriteFactory4->TranslateColorGlyphRun(baselineOrigin, glyphRun, nullptr, formats, DWRITE_MEASURING_MODE_NATURAL, nullptr, 0, enumerator.addressof())); } - if (hr == DWRITE_E_NOCOLOR) + return enumerator; +} + +// Draws a `DWRITE_GLYPH_RUN` at `baselineOrigin` into the given `ID2D1DeviceContext`. +// `d2dRenderTarget4` and `dwriteFactory4` are optional and used to draw colored glyphs. +// Returns true if the `DWRITE_GLYPH_RUN` contained a color glyph. +bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRenderTarget, ID2D1DeviceContext4* d2dRenderTarget4, IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) +{ + // Support for ID2D1DeviceContext4 implies support for IDWriteFactory4 and vice versa. + if (const auto enumerator = TranslateColorGlyphRun(dwriteFactory4, baselineOrigin, glyphRun)) + { + DrawColorGlyphRun(d2dRenderTarget4, enumerator.get(), foregroundBrush); + return true; + } + else { - d2dRenderTarget->DrawGlyphRun(baselineOrigin, glyphRun, foregroundBrush, measuringMode); + DrawBasicGlyphRun(d2dRenderTarget, baselineOrigin, glyphRun, foregroundBrush); return false; } +} - THROW_IF_FAILED(hr); +void Microsoft::Console::Render::Atlas::DrawBasicGlyphRun(ID2D1DeviceContext* d2dRenderTarget, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) +{ + d2dRenderTarget->DrawGlyphRun(baselineOrigin, glyphRun, foregroundBrush, DWRITE_MEASURING_MODE_NATURAL); +} +void Microsoft::Console::Render::Atlas::DrawColorGlyphRun(ID2D1DeviceContext4* d2dRenderTarget4, IDWriteColorGlyphRunEnumerator1* enumerator, ID2D1Brush* foregroundBrush) +{ const auto previousAntialiasingMode = d2dRenderTarget4->GetTextAntialiasMode(); d2dRenderTarget4->SetTextAntialiasMode(D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE); const auto cleanup = wil::scope_exit([&]() { @@ -267,8 +278,6 @@ bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRend break; } } - - return true; } TIL_FAST_MATH_END diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 1be9ef7ef4b..672e656c8f5 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -108,6 +108,8 @@ namespace Microsoft::Console::Render::Atlas return val < min ? min : (max < val ? max : val); } - f32r GetGlyphRunBlackBox(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); + wil::com_ptr TranslateColorGlyphRun(IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun); bool DrawGlyphRun(ID2D1DeviceContext* d2dRenderTarget, ID2D1DeviceContext4* d2dRenderTarget4, IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush); + void DrawBasicGlyphRun(ID2D1DeviceContext* d2dRenderTarget, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush); + void DrawColorGlyphRun(ID2D1DeviceContext4* d2dRenderTarget4, IDWriteColorGlyphRunEnumerator1* enumerator, ID2D1Brush* foregroundBrush); } diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 052486c66e7..fc165931572 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -273,10 +273,14 @@ void BackendD2D::_drawText(RenderingPayload& p) .glyphAdvances = &row->glyphAdvances[off], .glyphOffsets = &row->glyphOffsets[off], }; + const D2D1_POINT_2F baselineOrigin{ + baselineX, + baselineY, + }; if (m.fontFace.is_proper_font()) { - DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), { baselineX, baselineY }, &glyphRun, brush); + DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), baselineOrigin, &glyphRun, brush); } for (UINT32 i = 0; i < glyphRun.glyphCount; ++i) diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index f33a1455c5a..e084504be1d 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -450,7 +450,7 @@ void BackendD3D::_updateFontDependents(const RenderingPayload& p) DWrite_GetRenderParams(p.dwriteFactory.get(), &_gamma, &_cleartypeEnhancedContrast, &_grayscaleEnhancedContrast, _textRenderingParams.put()); // Clearing the atlas requires BeginDraw(), which is expensive. Defer this until we need Direct2D anyways. _fontChangedResetGlyphAtlas = true; - _textShadingType = p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? ShadingType::TextClearType : ShadingType::TextGrayscale; + _textShadingType = p.s->font->antialiasingMode == AntialiasingMode::ClearType ? ShadingType::TextClearType : ShadingType::TextGrayscale; if (_d2dRenderTarget) { @@ -658,7 +658,7 @@ void BackendD3D::_recreateConstBuffer(const RenderingPayload& p) const data.cellSize = { static_cast(p.s->font->cellSize.x), static_cast(p.s->font->cellSize.y) }; data.cellCount = { static_cast(p.s->cellCount.x), static_cast(p.s->cellCount.y) }; DWrite_GetGammaRatios(_gamma, data.gammaRatios); - data.enhancedContrast = p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE ? _cleartypeEnhancedContrast : _grayscaleEnhancedContrast; + data.enhancedContrast = p.s->font->antialiasingMode == AntialiasingMode::ClearType ? _cleartypeEnhancedContrast : _grayscaleEnhancedContrast; data.dashedLineLength = p.s->font->underlineWidth * 3.0f; _deviceContext->UpdateSubresource(_psConstantBuffer.get(), 0, nullptr, &data, 0, 0); } @@ -801,13 +801,7 @@ void BackendD3D::_d2dEndDrawing() } } -void BackendD3D::_handleFontChangedResetGlyphAtlas(const RenderingPayload& p) -{ - _fontChangedResetGlyphAtlas = false; - _resetGlyphAtlasAndBeginDraw(p); -} - -void BackendD3D::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) +void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) { // The index returned by _BitScanReverse is undefined when the input is 0. We can simultaneously guard // against that and avoid unreasonably small textures, by clamping the min. texture size to `minArea`. @@ -868,6 +862,19 @@ void BackendD3D::_resetGlyphAtlasAndBeginDraw(const RenderingPayload& p) _d2dRenderTargetUpdateFontSettings(*p.s->font); } + // We have our own glyph cache so Direct2D's cache doesn't help much. + // This saves us 1MB of RAM, which is not much, but also not nothing. + { + wil::com_ptr device; + _d2dRenderTarget4->GetDevice(device.addressof()); + + device->SetMaximumTextureMemory(0); + if (const auto device4 = device.try_query()) + { + device4->SetMaximumColorGlyphCacheMemory(0); + } + } + { static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _brush.put())); @@ -1051,7 +1058,8 @@ void BackendD3D::_drawText(RenderingPayload& p) { if (_fontChangedResetGlyphAtlas) { - _handleFontChangedResetGlyphAtlas(p); + _fontChangedResetGlyphAtlas = false; + _resetGlyphAtlas(p); } til::CoordType dirtyTop = til::CoordTypeMax; @@ -1147,6 +1155,74 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f .glyphIndices = &entry.key.glyphIndex, }; + // It took me a while to figure out how to rasterize glyphs manually with DirectWrite without depending on Direct2D. + // The benefits are a reduction in memory usage, an increase in performance under certain circumstances and most + // importantly, the ability to debug the renderer more easily, because many graphics debuggers don't support Direct2D. + // Direct2D has one big advantage however: It features a clever glyph uploader with a pool of D3D11_USAGE_STAGING textures, + // which I was too short on time to implement myself. This makes rasterization with Direct2D roughly 2x faster. + // + // This code remains, because it features some parts that are slightly more and some parts that are outright difficult to figure out. +#if 0 + const auto wantsClearType = p.s->font->antialiasingMode == AntialiasingMode::ClearType; + const auto wantsAliased = p.s->font->antialiasingMode == AntialiasingMode::Aliased; + const auto antialiasMode = wantsClearType ? DWRITE_TEXT_ANTIALIAS_MODE_CLEARTYPE : DWRITE_TEXT_ANTIALIAS_MODE_GRAYSCALE; + const auto outlineThreshold = wantsAliased ? DWRITE_OUTLINE_THRESHOLD_ALIASED : DWRITE_OUTLINE_THRESHOLD_ANTIALIASED; + + DWRITE_RENDERING_MODE renderingMode{}; + DWRITE_GRID_FIT_MODE gridFitMode{}; + THROW_IF_FAILED(entry.key.fontFace->GetRecommendedRenderingMode( + /* fontEmSize */ fontEmSize, + /* dpiX */ 1, // fontEmSize is already in pixel + /* dpiY */ 1, // fontEmSize is already in pixel + /* transform */ nullptr, + /* isSideways */ FALSE, + /* outlineThreshold */ outlineThreshold, + /* measuringMode */ DWRITE_MEASURING_MODE_NATURAL, + /* renderingParams */ _textRenderingParams.get(), + /* renderingMode */ &renderingMode, + /* gridFitMode */ &gridFitMode)); + + wil::com_ptr glyphRunAnalysis; + THROW_IF_FAILED(p.dwriteFactory->CreateGlyphRunAnalysis( + /* glyphRun */ &glyphRun, + /* transform */ nullptr, + /* renderingMode */ renderingMode, + /* measuringMode */ DWRITE_MEASURING_MODE_NATURAL, + /* gridFitMode */ gridFitMode, + /* antialiasMode */ antialiasMode, + /* baselineOriginX */ 0, + /* baselineOriginY */ 0, + /* glyphRunAnalysis */ glyphRunAnalysis.put())); + + RECT textureBounds{}; + + if (wantsClearType) + { + THROW_IF_FAILED(glyphRunAnalysis->GetAlphaTextureBounds(DWRITE_TEXTURE_CLEARTYPE_3x1, &textureBounds)); + + // Some glyphs cannot be drawn with ClearType, such as bitmap fonts. In that case + // GetAlphaTextureBounds() supposedly returns an empty RECT, but I haven't tested that yet. + if (!IsRectEmpty(&textureBounds)) + { + // Allocate a buffer of `3 * width * height` bytes. + THROW_IF_FAILED(glyphRunAnalysis->CreateAlphaTexture(DWRITE_TEXTURE_CLEARTYPE_3x1, &textureBounds, buffer.data(), buffer.size())); + // The buffer contains RGB ClearType weights which can now be packed into RGBA and uploaded to the glyph atlas. + return; + } + + // --> Retry with grayscale AA. + } + + // Even though it says "ALIASED" and even though the docs for the flag still say: + // > [...] that is, each pixel is either fully opaque or fully transparent [...] + // don't be confused: It's grayscale antialiased. lol + THROW_IF_FAILED(glyphRunAnalysis->GetAlphaTextureBounds(DWRITE_TEXTURE_ALIASED_1x1, &textureBounds)); + + // Allocate a buffer of `width * height` bytes. + THROW_IF_FAILED(glyphRunAnalysis->CreateAlphaTexture(DWRITE_TEXTURE_ALIASED_1x1, &textureBounds, buffer.data(), buffer.size())); + // The buffer now contains a grayscale alpha mask. +#endif + const auto lineRendition = static_cast(entry.key.lineRendition); std::optional transform; @@ -1204,9 +1280,7 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f return false; } - _d2dBeginDrawing(); - - const D2D1_POINT_2F baseline{ + const D2D1_POINT_2F baselineOrigin{ static_cast(rect.x - bl), static_cast(rect.y - bt), }; @@ -1214,12 +1288,13 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f if (transform) { auto& t = *transform; - t.dx = (1.0f - t.m11) * baseline.x; - t.dy = (1.0f - t.m22) * baseline.y; + t.dx = (1.0f - t.m11) * baselineOrigin.x; + t.dy = (1.0f - t.m22) * baselineOrigin.y; _d2dRenderTarget->SetTransform(&t); } - const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baseline, &glyphRun, _brush.get()); + _d2dBeginDrawing(); + const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baselineOrigin, &glyphRun, _brush.get()); entry.data.offset.x = bl; entry.data.offset.y = bt; @@ -1297,7 +1372,7 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& THROW_IF_FAILED(_softFontBitmap->CopyFromMemory(nullptr, bitmapData.data(), pitch)); } - const auto interpolation = p.s->font->antialiasingMode == D2D1_TEXT_ANTIALIAS_MODE_ALIASED ? D2D1_INTERPOLATION_MODE_NEAREST_NEIGHBOR : D2D1_INTERPOLATION_MODE_HIGH_QUALITY_CUBIC; + const auto interpolation = p.s->font->antialiasingMode == AntialiasingMode::Aliased ? D2D1_INTERPOLATION_MODE_NEAREST_NEIGHBOR : D2D1_INTERPOLATION_MODE_HIGH_QUALITY_CUBIC; const D2D1_RECT_F dest{ static_cast(rect.x), static_cast(rect.y), @@ -1330,7 +1405,7 @@ void BackendD3D::_drawGlyphPrepareRetry(const RenderingPayload& p) THROW_HR_IF_MSG(E_UNEXPECTED, _glyphCache.Size() == 0, "BackendD3D::_drawGlyph deadlock"); _d2dEndDrawing(); _flushQuads(p); - _resetGlyphAtlasAndBeginDraw(p); + _resetGlyphAtlas(p); } // If this is a double-height glyph (DECDHL), we need to split it into 2 glyph entries: diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 3eac1b0dab7..9756af4f2ea 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -89,7 +89,7 @@ namespace Microsoft::Console::Render::Atlas // // This isn't using a raw pointer instead of a managed struct, because this allows // us to construct a GlyphCacheKey for lookup without AddRef()ing the fontFace. - IDWriteFontFace* fontFace = nullptr; + IDWriteFontFace2* fontFace = nullptr; u16 lineRendition = 0; u16 glyphIndex = 0; #ifdef _WIN64 @@ -160,8 +160,7 @@ namespace Microsoft::Console::Render::Atlas void _debugDumpRenderTarget(const RenderingPayload& p); void _d2dBeginDrawing() noexcept; void _d2dEndDrawing(); - void _handleFontChangedResetGlyphAtlas(const RenderingPayload& p); - void _resetGlyphAtlasAndBeginDraw(const RenderingPayload& p); + void _resetGlyphAtlas(const RenderingPayload& p); void _markStateChange(ID3D11BlendState* blendState); QuadInstance& _getLastQuad() noexcept; void _appendQuad(i16x2 position, u16x2 size, u32 color, ShadingType shadingType); diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 69d995fca68..190e52c98cb 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -290,7 +290,14 @@ namespace Microsoft::Console::Render::Atlas bool useSoftwareRendering = false; }; - inline constexpr auto DefaultAntialiasingMode = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE; + enum class AntialiasingMode : u8 + { + ClearType = D2D1_TEXT_ANTIALIAS_MODE_CLEARTYPE, + Grayscale = D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE, + Aliased = D2D1_TEXT_ANTIALIAS_MODE_ALIASED, + }; + + inline constexpr auto DefaultAntialiasingMode = AntialiasingMode::ClearType; struct FontSettings { @@ -312,7 +319,7 @@ namespace Microsoft::Console::Render::Atlas u16x2 doubleUnderlinePos; u16 thinLineWidth = 0; u16 dpi = 96; - u8 antialiasingMode = DefaultAntialiasingMode; + AntialiasingMode antialiasingMode = DefaultAntialiasingMode; std::vector softFontPattern; til::size softFontCellSize; @@ -371,7 +378,7 @@ namespace Microsoft::Console::Render::Atlas // bigger mess than this violation against the C++ consortium's conscience. It also didn't help BackendD3D, // which hashes FontFace and an additional flag field would double the hashmap key size due to padding. // It's a macro, because constexpr doesn't work here in C++20 and regular "const" doesn't inline. -#define IDWriteFontFace_SoftFont (static_cast(nullptr) + 1) +#define IDWriteFontFace_SoftFont (static_cast(nullptr) + 1) // The existence of IDWriteFontFace_SoftFont unfortunately requires us to reimplement wil::com_ptr. // @@ -385,6 +392,8 @@ namespace Microsoft::Console::Render::Atlas #pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). struct FontFace { + using InterfaceType = IDWriteFontFace2; + FontFace() = default; ~FontFace() noexcept @@ -417,36 +426,36 @@ namespace Microsoft::Console::Render::Atlas return *this; } - FontFace(IDWriteFontFace* ptr) noexcept : + FontFace(InterfaceType* ptr) noexcept : _ptr{ ptr } { _addRef(); } - FontFace(const wil::com_ptr& other) noexcept : + FontFace(const wil::com_ptr& other) noexcept : FontFace{ other.get() } { } - FontFace(wil::com_ptr&& other) noexcept : + FontFace(wil::com_ptr&& other) noexcept : _ptr{ other.detach() } { } - void attach(IDWriteFontFace* other) noexcept + void attach(InterfaceType* other) noexcept { _release(); _ptr = other; } - [[nodiscard]] IDWriteFontFace* detach() noexcept + [[nodiscard]] InterfaceType* detach() noexcept { const auto tmp = _ptr; _ptr = nullptr; return tmp; } - IDWriteFontFace* get() const noexcept + InterfaceType* get() const noexcept { return _ptr; } @@ -473,7 +482,7 @@ namespace Microsoft::Console::Render::Atlas } } - IDWriteFontFace* _ptr = nullptr; + InterfaceType* _ptr = nullptr; }; #pragma warning(pop) From 480361753a377b2cbebff197dd309a8cdbec4ee7 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Sun, 2 Apr 2023 20:30:50 +0200 Subject: [PATCH 22/37] Fix line endings, Remove weird IDWriteFontFace_SoftFont, Add flat_set & 15% faster --- src/inc/til/flat_set.h | 194 +++++++++ src/renderer/atlas/AtlasEngine.cpp | 22 +- src/renderer/atlas/AtlasEngine.h | 2 +- src/renderer/atlas/Backend.cpp | 6 +- src/renderer/atlas/Backend.h | 4 +- src/renderer/atlas/BackendD2D.cpp | 8 +- src/renderer/atlas/BackendD3D.cpp | 406 ++++++++---------- src/renderer/atlas/BackendD3D.h | 124 +++--- src/renderer/atlas/colorbrewer.h | 72 ++-- src/renderer/atlas/common.h | 116 +---- src/renderer/atlas/shader_common.hlsl | 92 ++-- src/renderer/atlas/wic.h | 118 ++--- src/til/ut_til/til.unit.tests.vcxproj | 6 +- src/til/ut_til/til.unit.tests.vcxproj.filters | 14 +- tools/ConsoleTypes.natvis | 24 +- 15 files changed, 644 insertions(+), 564 deletions(-) create mode 100644 src/inc/til/flat_set.h diff --git a/src/inc/til/flat_set.h b/src/inc/til/flat_set.h new file mode 100644 index 00000000000..e344964ce64 --- /dev/null +++ b/src/inc/til/flat_set.h @@ -0,0 +1,194 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#pragma warning(push) +#pragma warning(suppress : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). + +namespace til +{ + // A simple hash function for simple hash maps. + // As demonstrated in https://doi.org/10.14778/2850583.2850585, a simple "multiply and shift" hash performs + // very well with linear probing hash maps and I found this to be true as well in my own testing. This hash + // function doesn't do the "shift" part, because linear_flat_set already does it by an appropriate amount. + constexpr size_t flat_set_hash_integer(size_t v) noexcept + { + // These two multipliers are the same as used by the PCG family of random number generators. + // The 32-Bit version is described in https://doi.org/10.1090/S0025-5718-99-00996-5, Table 5. + // The 64-Bit version is the multiplier as used by Donald Knuth for MMIX and found by C. E. Haynes. +#ifdef _WIN64 + return v * UINT64_C(6364136223846793005); +#else + return v * UINT32_C(747796405); +#endif + } + + template + struct flat_set_trait; + + // This is an example implementation for a linear_flat_set that can store any size_t != -1. + // Apart from this trait, the only other thing the type T has to implement is a copy or move assignment operator. + template<> + struct flat_set_trait + { + using T = size_t; + + static size_t hash(T v) noexcept + { + return flat_set_hash_integer(v); + } + + // Return true if the key and existing slot in the hashmap match. + static bool equals(T slot, T key) + { + return slot == key; + } + + // Return true if this slot can be filled with a new item. + static bool empty(T slot) + { + return slot == -1; + } + + // Called when a new item is inserted into the hashmap. + // T::operator=(T&&) is called when the map is resized and existing items must be moved over. + static void fill(T& slot, T key) + { + slot = key; + } + + // Called when a new backing buffer is allocated. You need to then initialize the raw memory. + static std::unique_ptr allocate(size_t capacity) + { + return std::unique_ptr{ new T[capacity]{ size_t(-1) } }; + } + + static void clear(T* data, size_t capacity) noexcept + { + for (auto& slot : std::span{ data, capacity }) + { + slot = size_t(-1); + } + } + }; + + // A basic, hashmap with linear probing. A `LoadFactor` of 2 equals + // a max. load of roughly 50% and a `LoadFactor` of 4 roughly 25%. + // + // It performs best with: + // * small and cheap T + // * >= 50% successful lookups + // * <= 50% load factor (LoadFactor >= 2, which is the minimum anyways) + template + struct linear_flat_set + { + using Trait = typename flat_set_trait; + + static_assert(LoadFactor >= 2); + + bool empty() const noexcept + { + return _load == 0; + } + + size_t load() const noexcept + { + return _load; + } + + size_t size() const noexcept + { + return _load / LoadFactor; + } + + template + std::pair insert(U&& key) + { + // Putting this into the lookup path is a little pessimistic, but it + // allows us to default-construct this hashmap with a size of 0. + if (_load >= _capacity) [[unlikely]] + { + _bumpSize(); + } + + // The most common, basic and performant hash function is to multiply the value + // by some prime number and divide by the number of slots. It's been shown + // many times in literature that such a scheme performs the best on average. + // As such, we perform the divide her to get the topmost bits down. + const auto hash = Trait::hash(key) >> _shift; + + for (auto i = hash;; ++i) + { + auto& slot = _map[i & _mask]; + if (Trait::empty(slot)) + { + Trait::fill(slot, std::forward(key)); + _load += LoadFactor; + return { slot, true }; + } + if (Trait::equals(slot, key)) [[likely]] + { + return { slot, false }; + } + } + } + + void clear() noexcept + { + Trait::clear(_map.get(), _capacity); + _load = 0; + } + + private: + __declspec(noinline) void _bumpSize() + { + if (!_shift) + { + throw std::bad_array_new_length{}; + } + + const auto newShift = _shift - 1; + const auto newCapacity = size_t{ 1 } << (digits - newShift); + const auto newMask = newCapacity - 1; + auto newMap = Trait::allocate(newCapacity); + + // This mirrors the insert() function, but without the lookup part. + for (auto& oldSlot : std::span{ _map.get(), _capacity }) + { + if (Trait::empty(oldSlot)) + { + continue; + } + + const auto hash = Trait::hash(oldSlot) >> newShift; + + for (auto i = hash;; ++i) + { + auto& slot = newMap[i & newMask]; + if (Trait::empty(slot)) + { + slot = std::move_if_noexcept(oldSlot); + break; + } + } + } + + _map = std::move(newMap); + _capacity = newCapacity; + _shift = newShift; + _mask = newMask; + } + + static constexpr auto digits = std::numeric_limits::digits; + + std::unique_ptr _map; + size_t _capacity = 0; + size_t _load = 0; + // This results in an initial capacity of 8 items, independent of the LoadFactor. + size_t _shift = digits - LoadFactor - 1; + size_t _mask = 0; + }; +} + +#pragma warning(pop) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index bdfdfe99777..758f95c7975 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -582,9 +582,8 @@ void AtlasEngine::_flushBufferLine() #pragma warning(suppress : 26494) // Variable 'mappedEnd' is uninitialized. Always initialize an object (type.5). for (u32 idx = 0, mappedEnd; idx < _api.bufferLine.size(); idx = mappedEnd) { - f32 scale = 1; u32 mappedLength = 0; - _mapCharacters(_api.bufferLine.data() + idx, gsl::narrow_cast(_api.bufferLine.size()) - idx, &mappedLength, &scale, mappedFontFace.put()); + _mapCharacters(_api.bufferLine.data() + idx, gsl::narrow_cast(_api.bufferLine.size()) - idx, &mappedLength, mappedFontFace.put()); mappedEnd = idx + mappedLength; if (!mappedFontFace) @@ -634,16 +633,20 @@ void AtlasEngine::_flushBufferLine() const auto indicesCount = row.glyphIndices.size(); if (indicesCount > initialIndicesCount) { - row.mappings.emplace_back(std::move(mappedFontFace), _p.s->font->fontSize * scale, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); + row.mappings.emplace_back(std::move(mappedFontFace), gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); } } } -void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* mappedLength, float* scale, IDWriteFontFace2** mappedFontFace) const +void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* mappedLength, IDWriteFontFace2** mappedFontFace) const { TextAnalysisSource analysisSource{ text, textLength }; const auto& textFormatAxis = _api.textFormatAxes[static_cast(_api.attributes)]; + // We don't read from scale anyways. +#pragma warning(suppress : 26494) // Variable 'scale' is uninitialized. Always initialize an object (type.5). + f32 scale; + if (textFormatAxis) { THROW_IF_FAILED(_p.systemFontFallback1->MapCharacters( @@ -655,7 +658,7 @@ void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* /* fontAxisValues */ textFormatAxis.data(), /* fontAxisValueCount */ gsl::narrow_cast(textFormatAxis.size()), /* mappedLength */ mappedLength, - /* scale */ scale, + /* scale */ &scale, /* mappedFontFace */ reinterpret_cast(mappedFontFace))); } else @@ -675,7 +678,7 @@ void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* /* baseStretch */ DWRITE_FONT_STRETCH_NORMAL, /* mappedLength */ mappedLength, /* mappedFont */ font.addressof(), - /* scale */ scale)); + /* scale */ &scale)); if (font) { @@ -837,8 +840,7 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) bool succeeded = false; u32 mappedLength = 0; - f32 scale = 1.0f; - _mapCharacters(L"\uFFFD", 1, &mappedLength, &scale, _api.replacementCharacterFontFace.put()); + _mapCharacters(L"\uFFFD", 1, &mappedLength, _api.replacementCharacterFontFace.put()); if (mappedLength == 1) { @@ -896,7 +898,7 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) if (indicesCount > initialIndicesCount) { - row.mappings.emplace_back(fontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); + row.mappings.emplace_back(fontFace, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); initialIndicesCount = indicesCount; } } @@ -912,7 +914,7 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) if (indicesCount > initialIndicesCount) { - row.mappings.emplace_back(fontFace, _p.s->font->fontSize, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); + row.mappings.emplace_back(fontFace, gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); } } } diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 21eaaa43dee..16ac18c39d9 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -82,7 +82,7 @@ namespace Microsoft::Console::Render::Atlas void _recreateFontDependentResources(); void _recreateCellCountDependentResources(); void _flushBufferLine(); - void _mapCharacters(const wchar_t* text, u32 textLength, u32* mappedLength, float* scale, IDWriteFontFace2** mappedFontFace) const; + void _mapCharacters(const wchar_t* text, u32 textLength, u32* mappedLength, IDWriteFontFace2** mappedFontFace) const; void _mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 length, ShapedRow& row); __declspec(noinline) void _mapReplacementCharacter(u32 from, u32 to, ShapedRow& row); diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index f909bf10392..56f6edd92b2 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -170,7 +170,7 @@ void SwapChainManager::_updateMatrixTransform(const RenderingPayload& p) _fontGeneration = p.s->font.generation(); } -wil::com_ptr Microsoft::Console::Render::Atlas::TranslateColorGlyphRun(IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun) +wil::com_ptr Microsoft::Console::Render::Atlas::TranslateColorGlyphRun(IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun) noexcept { static constexpr auto formats = DWRITE_GLYPH_IMAGE_FORMATS_TRUETYPE | @@ -186,7 +186,7 @@ wil::com_ptr Microsoft::Console::Render::Atlas: if (dwriteFactory4) { - THROW_IF_FAILED(dwriteFactory4->TranslateColorGlyphRun(baselineOrigin, glyphRun, nullptr, formats, DWRITE_MEASURING_MODE_NATURAL, nullptr, 0, enumerator.addressof())); + std::ignore = dwriteFactory4->TranslateColorGlyphRun(baselineOrigin, glyphRun, nullptr, formats, DWRITE_MEASURING_MODE_NATURAL, nullptr, 0, enumerator.addressof()); } return enumerator; @@ -210,7 +210,7 @@ bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRend } } -void Microsoft::Console::Render::Atlas::DrawBasicGlyphRun(ID2D1DeviceContext* d2dRenderTarget, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) +void Microsoft::Console::Render::Atlas::DrawBasicGlyphRun(ID2D1DeviceContext* d2dRenderTarget, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) noexcept { d2dRenderTarget->DrawGlyphRun(baselineOrigin, glyphRun, foregroundBrush, DWRITE_MEASURING_MODE_NATURAL); } diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 672e656c8f5..933463a81e4 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -108,8 +108,8 @@ namespace Microsoft::Console::Render::Atlas return val < min ? min : (max < val ? max : val); } - wil::com_ptr TranslateColorGlyphRun(IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun); + wil::com_ptr TranslateColorGlyphRun(IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun) noexcept; bool DrawGlyphRun(ID2D1DeviceContext* d2dRenderTarget, ID2D1DeviceContext4* d2dRenderTarget4, IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush); - void DrawBasicGlyphRun(ID2D1DeviceContext* d2dRenderTarget, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush); + void DrawBasicGlyphRun(ID2D1DeviceContext* d2dRenderTarget, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) noexcept; void DrawColorGlyphRun(ID2D1DeviceContext4* d2dRenderTarget4, IDWriteColorGlyphRunEnumerator1* enumerator, ID2D1Brush* foregroundBrush); } diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index fc165931572..0d01c8ef262 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -195,14 +195,14 @@ void BackendD2D::_drawText(RenderingPayload& p) { for (const auto& m : row->mappings) { - if (!m.fontFace.is_proper_font()) + if (!m.fontFace) { continue; } const DWRITE_GLYPH_RUN glyphRun{ .fontFace = m.fontFace.get(), - .fontEmSize = m.fontEmSize, + .fontEmSize = p.s->font->fontSize, .glyphCount = gsl::narrow_cast(m.glyphsTo - m.glyphsFrom), .glyphIndices = &row->glyphIndices[m.glyphsFrom], .glyphAdvances = &row->glyphAdvances[m.glyphsFrom], @@ -267,7 +267,7 @@ void BackendD2D::_drawText(RenderingPayload& p) const auto brush = _brushWithColor(fg); const DWRITE_GLYPH_RUN glyphRun{ .fontFace = m.fontFace.get(), - .fontEmSize = m.fontEmSize, + .fontEmSize = p.s->font->fontSize, .glyphCount = gsl::narrow_cast(count), .glyphIndices = &row->glyphIndices[off], .glyphAdvances = &row->glyphAdvances[off], @@ -278,7 +278,7 @@ void BackendD2D::_drawText(RenderingPayload& p) baselineY, }; - if (m.fontFace.is_proper_font()) + if (glyphRun.fontFace) { DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), baselineOrigin, &glyphRun, brush); } diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index e084504be1d..9d767397a53 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -4,8 +4,6 @@ #include "pch.h" #include "BackendD3D.h" -#include - #include #include #include @@ -39,157 +37,102 @@ TIL_FAST_MATH_BEGIN using namespace Microsoft::Console::Render::Atlas; -BackendD3D::GlyphCacheMap::~GlyphCacheMap() -{ - Clear(); -} - -BackendD3D::GlyphCacheMap& BackendD3D::GlyphCacheMap::operator=(GlyphCacheMap&& other) noexcept +namespace til { - _map = std::exchange(other._map, {}); - _mask = std::exchange(other._mask, 0); - _capacity = std::exchange(other._capacity, 0); - _size = std::exchange(other._size, 0); - return *this; -} - -size_t BackendD3D::GlyphCacheMap::Size() const noexcept -{ - return _size; -} - -void BackendD3D::GlyphCacheMap::Clear() noexcept -{ - for (const auto& entry : _map) + template<> + struct flat_set_trait { - if (entry.key.fontFace > IDWriteFontFace_SoftFont) + using T = BackendD3D::AtlasGlyphEntry; + + static size_t hash(u16 key) noexcept { - // I'm pretty sure Release() doesn't throw exceptions. -#pragma warning(suppress : 26447) // The function is declared 'noexcept' but calls function 'Release()' which may throw exceptions (f.6). - entry.key.fontFace->Release(); + return flat_set_hash_integer(key); } - } - - // memset() is used instead of std::fill() and its variants, because MSVC fails to understand that - // GlyphCacheEntry can be initialized with all zeroes and so it uses much slower approaches. - memset(_map.data(), 0, _map.size() * sizeof(*_map.data())); - _size = 0; -} -BackendD3D::GlyphCacheEntry& BackendD3D::GlyphCacheMap::FindOrInsert(const GlyphCacheKey& key, bool& inserted) -{ - // The fontRendition member must be non-zero to mark the hashmap slot as occupied - //assert(key.fontRendition != LineRendition::None); + static size_t hash(const T& slot) noexcept + { + return flat_set_hash_integer(slot.glyphIndex); + } - // Putting this into the Find() path is a little pessimistic, but it - // allows us to default-construct this hashmap with a size of 0. - if (_size >= _capacity) - { - _bumpSize(); - } + static bool equals(const T& slot, u16 key) + { + return slot.glyphIndex == key; + } - const auto hash = _hash(key); - for (auto i = hash;; ++i) - { - auto& entry = _map[i & _mask]; - if (_equals(entry.key, key)) + static bool empty(const T& slot) { - inserted = false; - return entry; + return !slot._occupied; } - if (!entry.key.fontFace) + + static void fill(T& slot, u16 key) { - ++_size; - entry.key = key; - if (entry.key.fontFace > IDWriteFontFace_SoftFont) - { - entry.key.fontFace->AddRef(); - } - inserted = true; - return entry; + slot.glyphIndex = key; + slot._occupied = 1; } - } -} -size_t BackendD3D::GlyphCacheMap::_hash(const GlyphCacheKey& key) noexcept -{ - return til::hash(&key, GlyphCacheKeyDataSize); -} + static std::unique_ptr allocate(size_t capacity) + { + return std::make_unique(capacity); + } -bool BackendD3D::GlyphCacheMap::_equals(const GlyphCacheKey& lhs, const GlyphCacheKey& rhs) noexcept -{ - return memcmp(&lhs, &rhs, GlyphCacheKeyDataSize) == 0; -} + static void clear(T* data, size_t capacity) noexcept + { + memset(data, 0, capacity * sizeof(T)); + } + }; -void BackendD3D::GlyphCacheMap::_bumpSize() -{ - // The following block of code may be used to assess the quality of the hash function. - // The displacement is the distance between the ideal slot the hash value points at to - // the slot the value actually ended up in. A low displacement is not everything however, - // and the size and performance of the hash function is just as important. -#if 0 - if (_size) + template<> + struct flat_set_trait { - size_t displacementMax = 0; - size_t displacementTotal = 0; - size_t actualSlot = 0; + using T = BackendD3D::AtlasFontFaceEntry; - for (const auto& entry : _map) + static size_t hash(const BackendD3D::AtlasFontFaceKey& key) noexcept { - if (entry.key.fontFace) - { - const auto idealSlot = _hash(entry.key) & _mask; - size_t displacement = actualSlot - idealSlot; - - // A hash near the end of the map may wrap around to the beginning. - // This if condition will fix the displacement in that case. - if (actualSlot < idealSlot) - { - displacement += _map.size(); - } + return flat_set_hash_integer(std::bit_cast(key.fontFace) ^ static_cast(key.lineRendition)); + } - if (displacement > displacementMax) - { - displacementMax = displacement; - displacementTotal += displacement; - } - } + static size_t hash(const T& slot) noexcept + { + const auto& inner = *slot.inner; + return flat_set_hash_integer(std::bit_cast(inner.fontFace.get()) ^ static_cast(inner.lineRendition)); + } - actualSlot++; + static bool equals(const T& slot, const BackendD3D::AtlasFontFaceKey& key) + { + const auto& inner = *slot.inner; + return inner.fontFace.get() == key.fontFace && inner.lineRendition == key.lineRendition; } - const auto displacementAvg = static_cast(displacementTotal) / static_cast(_size); - wchar_t buffer[128]; - swprintf_s(buffer, L"GlyphCacheMap resize at %zu, max. displacement: %zu, avg. displacement: %f%%\r\n", _map.size(), displacementMax, displacementAvg); - OutputDebugStringW(&buffer[0]); - } -#endif + static bool empty(const T& slot) + { + return !slot.inner; + } - const auto newSize = std::max(256, _map.size() * 2); - const auto newMask = newSize - 1; + static void fill(T& slot, const BackendD3D::AtlasFontFaceKey& key) + { + slot.inner = std::make_unique(); - static constexpr auto sizeLimit = std::numeric_limits::max() / 2; - THROW_HR_IF_MSG(E_OUTOFMEMORY, newSize >= sizeLimit, "GlyphCacheMap overflow"); + auto& inner = *slot.inner; + inner.fontFace = key.fontFace; + inner.lineRendition = key.lineRendition; + } - auto newMap = Buffer(newSize); + static std::unique_ptr allocate(size_t capacity) + { + return std::make_unique(capacity); + } - for (const auto& oldEntry : _map) - { - const auto hash = _hash(oldEntry.key); - for (auto i = hash;; ++i) + static void clear(T* data, size_t capacity) noexcept { - auto& newEntry = newMap[i & newMask]; - if (!newEntry.key.fontFace) + for (auto& slot : std::span{ data, capacity }) { - newEntry = oldEntry; - break; + if (!empty(slot)) + { + slot = {}; + } } } - } - - _map = std::move(newMap); - _mask = newMask; - _capacity = newSize / 2; + }; } BackendD3D::BackendD3D(wil::com_ptr device, wil::com_ptr deviceContext) : @@ -202,9 +145,9 @@ BackendD3D::BackendD3D(wil::com_ptr device, wil::com_ptr(p.s->font->cellSize.x) * p.s->font->cellSize.y * 64; + static constexpr u32 maxArea = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION * D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; + + const auto cellArea = static_cast(p.s->font->cellSize.x) * p.s->font->cellSize.y; + + const auto minAreaByFont = cellArea * 95; // Covers all printable ASCII characters const auto minAreaByGrowth = static_cast(_rectPacker.width) * _rectPacker.height * 2; - const auto maxArea = static_cast(p.s->targetSize.x) * static_cast(p.s->targetSize.y); - const auto area = std::min(maxArea, std::max(minArea, std::max(minAreaByFont, minAreaByGrowth))); + const auto min = std::max(minArea, std::max(minAreaByFont, minAreaByGrowth)); + + // It's hard to say what the max. size of the cache should be. Optimally I think we should use as much memory as + // is available, but the rendering code in this project is a big mess and so integrating memory pressure feedback + // (RegisterVideoMemoryBudgetChangeNotificationEvent) is rather difficult. As an alternative I'm using the size + // of 10k cells of the terminal as an estimate, which scales with the font size and thus the DPI of the display. + const auto maxAreaByFont = cellArea * 10000; + const auto area = std::min(maxArea, std::min(maxAreaByFont, min)); + // This block of code calculates the size of a power-of-2 texture that has an area larger than the given `area`. // For instance, for an area of 985x1946 = 1916810 it would result in a u/v of 2048x1024 (area = 2097152). // This has 2 benefits: GPUs like power-of-2 textures and it ensures that we don't resize the texture @@ -884,7 +839,7 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) _deviceContext->PSSetShaderResources(0, 2, &resources[0]); } - _glyphCache.Clear(); + _glyphAtlasMap.clear(); _rectPackerData = Buffer{ u }; stbrp_init_target(&_rectPacker, u, v, _rectPackerData.data(), _rectPackerData.size()); @@ -919,7 +874,16 @@ void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 col _bumpInstancesSize(); } - _instances[_instancesCount++] = QuadInstance{ position, size, texcoord, shadingType, color }; + _instances[_instancesCount++] = QuadInstance{ + position.x, + position.y, + size.x, + size.y, + texcoord.x, + texcoord.y, + shadingType, + color, + }; } void BackendD3D::_bumpInstancesSize() @@ -1074,52 +1038,57 @@ void BackendD3D::_drawText(RenderingPayload& p) for (const auto& m : row->mappings) { - GlyphCacheKey key{ + auto x = m.glyphsFrom; + const AtlasFontFaceKey fontFaceKey{ .fontFace = m.fontFace.get(), - .lineRendition = static_cast(row->lineRendition), + .lineRendition = row->lineRendition, }; - for (auto x = m.glyphsFrom; x < m.glyphsTo; ++x) + // This goto label exists to allow us to retry rendering a glyph if the glyph atlas was full. + // We need to goto here, because a retry will cause the atlas texture as well as the + // _glyphCache hashmap to be cleared, and so we'll have to call insert() again. + drawGlyphRetry: + auto& fontFaceEntry = *_glyphAtlasMap.insert(fontFaceKey).first.inner; + + while (x < m.glyphsTo) { - key.glyphIndex = row->glyphIndices[x]; + const auto [glyphEntry, inserted] = fontFaceEntry.glyphs.insert(row->glyphIndices[x]); - // This loop exists to allow us to retry rendering a glyph if the glyph atlas was full. - // We need to loop here, because a retry will cause the atlas texture as well as the - // _glyphCache hashmap to be cleared, and so we'll have to call FindOrInsert() again. - for (;;) + if (inserted && !_drawGlyph(p, fontFaceEntry, glyphEntry)) { - bool inserted = false; - auto& entry = _glyphCache.FindOrInsert(key, inserted); - - if (inserted && !_drawGlyph(p, entry, m.fontEmSize)) - { - // A deadlock in this retry loop is detected in _drawGlyphPrepareRetry. - continue; - } - - if (entry.data.shadingType != ShadingType::Default) - { - auto l = static_cast(lrintf(baselineX + row->glyphOffsets[x].advanceOffset)); - auto t = static_cast(lrintf(baselineY - row->glyphOffsets[x].ascenderOffset)); - - l <<= lineRenditionScale; + // A deadlock in this retry loop is detected in _drawGlyphPrepareRetry. + // + // Yes, I agree, avoid goto. Sometimes. It's not my fault that C++ still doesn't + // have a `continue outerloop;` like other languages had it for decades. :( +#pragma warning(suppress : 26438) // Avoid 'goto' (es.76). + goto drawGlyphRetry; + } - l += entry.data.offset.x; - t += entry.data.offset.y; + if (glyphEntry.data.shadingType != ShadingType::Default) + { + auto l = static_cast(lrintf(baselineX + row->glyphOffsets[x].advanceOffset)); + auto t = static_cast(lrintf(baselineY - row->glyphOffsets[x].ascenderOffset)); + l <<= lineRenditionScale; + l += glyphEntry.data.positionX; + t += glyphEntry.data.positionY; - row->dirtyTop = std::min(row->dirtyTop, t); - row->dirtyBottom = std::max(row->dirtyBottom, t + entry.data.size.y); + row->dirtyTop = std::min(row->dirtyTop, t); + row->dirtyBottom = std::max(row->dirtyBottom, t + glyphEntry.data.sizeY); - const i16x2 position{ - static_cast(l), - static_cast(t), - }; - _appendQuad(position, entry.data.size, entry.data.texcoord, row->colors[x], entry.data.shadingType); + if (_instancesCount >= _instances.size()) + { + _bumpInstancesSize(); } - baselineX += row->glyphAdvances[x]; - break; + auto& instance = _instances[_instancesCount++]; + instance = glyphEntry.data; + instance.positionX = static_cast(l); + instance.positionY = static_cast(t); + instance.color = row->colors[x]; } + + baselineX += row->glyphAdvances[x]; + ++x; } } @@ -1141,18 +1110,18 @@ void BackendD3D::_drawText(RenderingPayload& p) _d2dEndDrawing(); } -bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize) +bool BackendD3D::_drawGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry) { - if (entry.key.fontFace == IDWriteFontFace_SoftFont) + if (!fontFaceEntry.fontFace) { - return _drawSoftFontGlyph(p, entry); + return _drawSoftFontGlyph(p, fontFaceEntry, glyphEntry); } const DWRITE_GLYPH_RUN glyphRun{ - .fontFace = entry.key.fontFace, - .fontEmSize = fontEmSize, + .fontFace = fontFaceEntry.fontFace.get(), + .fontEmSize = p.s->font->fontSize, .glyphCount = 1, - .glyphIndices = &entry.key.glyphIndex, + .glyphIndices = &glyphEntry.glyphIndex, }; // It took me a while to figure out how to rasterize glyphs manually with DirectWrite without depending on Direct2D. @@ -1170,7 +1139,7 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f DWRITE_RENDERING_MODE renderingMode{}; DWRITE_GRID_FIT_MODE gridFitMode{}; - THROW_IF_FAILED(entry.key.fontFace->GetRecommendedRenderingMode( + THROW_IF_FAILED(fontFaceEntry.fontFace->GetRecommendedRenderingMode( /* fontEmSize */ fontEmSize, /* dpiX */ 1, // fontEmSize is already in pixel /* dpiY */ 1, // fontEmSize is already in pixel @@ -1223,7 +1192,7 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f // The buffer now contains a grayscale alpha mask. #endif - const auto lineRendition = static_cast(entry.key.lineRendition); + const auto lineRendition = static_cast(fontFaceEntry.lineRendition); std::optional transform; if (lineRendition != LineRendition::SingleWidth) @@ -1296,30 +1265,30 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f _d2dBeginDrawing(); const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baselineOrigin, &glyphRun, _brush.get()); - entry.data.offset.x = bl; - entry.data.offset.y = bt; - entry.data.size.x = rect.w; - entry.data.size.y = rect.h; - entry.data.texcoord.x = rect.x; - entry.data.texcoord.y = rect.y; - entry.data.shadingType = colorGlyph ? ShadingType::Passthrough : _textShadingType; + glyphEntry.data.positionX = bl; + glyphEntry.data.positionY = bt; + glyphEntry.data.sizeX = rect.w; + glyphEntry.data.sizeY = rect.h; + glyphEntry.data.texcoordX = rect.x; + glyphEntry.data.texcoordY = rect.y; + glyphEntry.data.shadingType = colorGlyph ? ShadingType::Passthrough : _textShadingType; if (lineRendition >= LineRendition::DoubleHeightTop) { - _splitDoubleHeightGlyph(p, entry); + _splitDoubleHeightGlyph(p, fontFaceEntry, glyphEntry); } return true; } -bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& entry) +bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry) { stbrp_rect rect{ .w = p.s->font->cellSize.x, .h = p.s->font->cellSize.y, }; - const auto lineRendition = static_cast(entry.key.lineRendition); + const auto lineRendition = static_cast(fontFaceEntry.lineRendition); if (lineRendition != LineRendition::SingleWidth) { rect.w <<= 1; @@ -1353,7 +1322,7 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& const auto height = static_cast(p.s->font->softFontCellSize.height); auto bitmapData = Buffer{ width * height }; - const auto glyphIndex = entry.key.glyphIndex - 0xEF20u; + const auto glyphIndex = glyphEntry.glyphIndex - 0xEF20u; auto src = p.s->font->softFontPattern.begin() + height * glyphIndex; auto dst = bitmapData.begin(); @@ -1383,18 +1352,18 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& _d2dBeginDrawing(); _d2dRenderTarget->DrawBitmap(_softFontBitmap.get(), &dest, 1, interpolation, nullptr, nullptr); - entry.data.offset.x = 0; - entry.data.offset.y = -p.s->font->baseline; - entry.data.size.x = rect.w; - entry.data.size.y = rect.h; - entry.data.texcoord.x = rect.x; - entry.data.texcoord.y = rect.y; - entry.data.shadingType = ShadingType::TextGrayscale; + glyphEntry.data.positionX = 0; + glyphEntry.data.positionY = -p.s->font->baseline; + glyphEntry.data.sizeX = rect.w; + glyphEntry.data.sizeY = rect.h; + glyphEntry.data.texcoordX = rect.x; + glyphEntry.data.texcoordY = rect.y; + glyphEntry.data.shadingType = ShadingType::TextGrayscale; if (lineRendition >= LineRendition::DoubleHeightTop) { - entry.data.offset.y -= p.s->font->cellSize.y; - _splitDoubleHeightGlyph(p, entry); + glyphEntry.data.positionY -= p.s->font->cellSize.y; + _splitDoubleHeightGlyph(p, fontFaceEntry, glyphEntry); } return true; @@ -1402,50 +1371,49 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& void BackendD3D::_drawGlyphPrepareRetry(const RenderingPayload& p) { - THROW_HR_IF_MSG(E_UNEXPECTED, _glyphCache.Size() == 0, "BackendD3D::_drawGlyph deadlock"); + THROW_HR_IF_MSG(E_UNEXPECTED, _glyphAtlasMap.load() == 0, "BackendD3D::_drawGlyph deadlock"); _d2dEndDrawing(); _flushQuads(p); _resetGlyphAtlas(p); } // If this is a double-height glyph (DECDHL), we need to split it into 2 glyph entries: -// One for the top half and one for the bottom half, because that's how DECDHL works. -// This will clip `entry` to only contain the top/bottom half (as specified by `entry.key.lineRendition`) +// One for the top half and one for the bottom half, because that's how DECDHL works.This will clip +// `glyphEntry` to only contain the top/bottom half (as specified by `fontFaceEntry.lineRendition`) // and create a second entry in our glyph cache hashmap that contains the other half. -void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, GlyphCacheEntry& entry) +void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry) { - static constexpr auto lrTop = static_cast(LineRendition::DoubleHeightTop); - static constexpr auto lrBottom = static_cast(LineRendition::DoubleHeightBottom); - // Twice the line height, twice the descender gap. For both. - entry.data.offset.y -= p.s->font->descender; + glyphEntry.data.positionY -= p.s->font->descender; - const auto isTop = entry.key.lineRendition == lrTop; + const auto isTop = fontFaceEntry.lineRendition == LineRendition::DoubleHeightTop; - auto key2 = entry.key; - key2.lineRendition = isTop ? lrBottom : lrTop; + const AtlasFontFaceKey key2{ + .fontFace = fontFaceEntry.fontFace.get(), + .lineRendition = isTop ? LineRendition::DoubleHeightBottom : LineRendition::DoubleHeightTop, + }; - bool inserted = false; - auto& entry2 = _glyphCache.FindOrInsert(key2, inserted); - entry2.data = entry.data; + auto& glyphCache = _glyphAtlasMap.insert(key2).first.inner->glyphs; + auto& entry2 = glyphCache.insert(glyphEntry.glyphIndex).first; + entry2.data = glyphEntry.data; - auto& top = isTop ? entry : entry2; - auto& bottom = isTop ? entry2 : entry; + auto& top = isTop ? glyphEntry : entry2; + auto& bottom = isTop ? entry2 : glyphEntry; - const auto topSize = clamp(-entry.data.offset.y - p.s->font->baseline, 0, static_cast(entry.data.size.y)); - top.data.offset.y += p.s->font->cellSize.y; - top.data.size.y = topSize; - bottom.data.offset.y += topSize; - bottom.data.size.y = std::max(0, bottom.data.size.y - topSize); - bottom.data.texcoord.y += topSize; + const auto topSize = clamp(-glyphEntry.data.positionY - p.s->font->baseline, 0, static_cast(glyphEntry.data.sizeY)); + top.data.positionY += p.s->font->cellSize.y; + top.data.sizeY = topSize; + bottom.data.positionY += topSize; + bottom.data.sizeY = std::max(0, bottom.data.sizeY - topSize); + bottom.data.texcoordY += topSize; // Things like diacritics might be so small that they only exist on either half of the // double-height row. This effectively turns the other (unneeded) side into whitespace. - if (!top.data.size.y) + if (!top.data.sizeY) { top.data.shadingType = ShadingType::Default; } - if (!bottom.data.size.y) + if (!bottom.data.sizeY) { bottom.data.shadingType = ShadingType::Default; } @@ -1697,7 +1665,7 @@ void BackendD3D::_drawSelection(const RenderingPayload& p) // The way this is implemented isn't very smart, but we also don't have very many rows to iterate through. if (row->selectionFrom == lastFrom && row->selectionTo == lastTo) { - _getLastQuad().size.y += p.s->font->cellSize.y; + _getLastQuad().sizeY += p.s->font->cellSize.y; } else { diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 9756af4f2ea..a77141b49c8 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include "Backend.h" @@ -18,7 +19,6 @@ namespace Microsoft::Console::Render::Atlas bool RequiresContinuousRedraw() noexcept override; void WaitUntilCanRender() noexcept override; - private: // NOTE: D3D constant buffers sizes must be a multiple of 16 bytes. struct alignas(16) VSConstBuffer { @@ -66,87 +66,83 @@ namespace Microsoft::Console::Render::Atlas SolidFill = 5, }; - struct QuadInstance + // NOTE: Don't initialize any members in this struct. This ensures that no + // zero-initialization needs to occur when we allocate large buffers of this object. + struct alignas(u32) QuadInstance { // `position` might clip outside of the bounds of the viewport and so it needs to be a // signed coordinate. i16x2 is used as the size of the instance buffer made the largest // impact on performance and power draw. If (when?) displays with >32k resolution make their // appearance in the future, this should be changed to f32x2. But if you do so, please change // all other occurrences of i16x2 positions/offsets throughout the class to keep it consistent. - alignas(sizeof(i16x2)) i16x2 position; - alignas(sizeof(i16x2)) u16x2 size; - alignas(sizeof(i16x2)) u16x2 texcoord; - alignas(sizeof(u32)) ShadingType shadingType = ShadingType::Default; - alignas(sizeof(u32)) u32 color = 0; - }; + i16 positionX; + i16 positionY; - struct GlyphCacheKey - { - // BODGY: The IDWriteFontFace results from us calling IDWriteFontFallback::MapCharacters - // which at the time of writing returns the same IDWriteFontFace as long as someone is - // holding a reference / the reference count doesn't drop to 0 (see ActiveFaceCache). - // This allows us to hash the value of the pointer as if it was uniquely identifying the font face. - // - // This isn't using a raw pointer instead of a managed struct, because this allows - // us to construct a GlyphCacheKey for lookup without AddRef()ing the fontFace. - IDWriteFontFace2* fontFace = nullptr; - u16 lineRendition = 0; - u16 glyphIndex = 0; -#ifdef _WIN64 - u32 _padding = 0; -#endif - }; - static_assert(std::has_unique_object_representations_v); + u16 sizeX; + u16 sizeY; - // Due to padding on 64-Bit systems, sizeof(GlyphCacheKey) will be 16, - // but the actual contents of the struct still only be 12 bytes. - static constexpr size_t GlyphCacheKeyDataSize = - sizeof(GlyphCacheKey::fontFace) + - sizeof(GlyphCacheKey::lineRendition) + - sizeof(GlyphCacheKey::glyphIndex); + u16 texcoordX; + u16 texcoordY; - struct GlyphCacheData - { - i16x2 offset; - u16x2 size; - u16x2 texcoord; - ShadingType shadingType = ShadingType::Default; + ShadingType shadingType; + u32 color; }; - static_assert(std::has_unique_object_representations_v); - struct GlyphCacheEntry + // NOTE: Don't initialize any members in this struct. This ensures that no + // zero-initialization needs to occur when we allocate large buffers of this object. + struct AtlasGlyphEntry { - GlyphCacheKey key; - GlyphCacheData data; + u16 glyphIndex; + // All data in QuadInstance is u32-aligned anyways, so this simultaneously serves as padding. + u16 _occupied; + QuadInstance data; }; - static_assert(std::has_unique_object_representations_v); - struct GlyphCacheMap + // This exists so that we can look up a AtlasFontFaceEntry without AddRef()/Release()ing fontFace first. + struct AtlasFontFaceKey { - GlyphCacheMap() = default; - ~GlyphCacheMap(); + IDWriteFontFace2* fontFace; + LineRendition lineRendition; + }; - GlyphCacheMap(const GlyphCacheMap&) = delete; - GlyphCacheMap(GlyphCacheMap&&) = delete; + // Just... uh... turn around and pretend you don't see this. + // This stuffs (or extracts, below) a pointer and the line rendition into a single pointer. This works because in C (and COM) + // the minimum heap allocation alignment is at least 8 (the size of a double) and so the lowest 4 bit are free real estate. + // + // I'm doing this because it shrinks the size of AtlasFontFaceEntry by a third and simplifies + // both the hashing and comparison code for the hashmap lookup from the POV of the CPU. + static constexpr uintptr_t combineAtlasFontFaceKey(IDWriteFontFace2* fontFace, LineRendition lineRendition) noexcept + { + const auto p = std::bit_cast(fontFace); + assert((p & 7) == 0); + return p | static_cast(lineRendition); + } - const GlyphCacheMap& operator=(const GlyphCacheMap&) = delete; - GlyphCacheMap& operator=(GlyphCacheMap&& other) noexcept; + static constexpr IDWriteFontFace2* extractAtlasFontFaceKey(uintptr_t c) noexcept + { + return std::bit_cast(c & ~7); + } - size_t Size() const noexcept; - void Clear() noexcept; - GlyphCacheEntry& FindOrInsert(const GlyphCacheKey& key, bool& inserted); + struct AtlasFontFaceEntryInner + { + // BODGY: At the time of writing IDWriteFontFallback::MapCharacters returns the same IDWriteFontFace instance + // for the same font face variant as long as someone is holding a reference to the instance (see ActiveFaceCache). + // This allows us to hash the value of the pointer as if it was uniquely identifying the font face variant. + wil::com_ptr fontFace; + LineRendition lineRendition = LineRendition::SingleWidth; - private: - static size_t _hash(const GlyphCacheKey& key) noexcept; - static bool _equals(const GlyphCacheKey& lhs, const GlyphCacheKey& rhs) noexcept; - void _bumpSize(); + til::linear_flat_set glyphs; + }; - Buffer _map; - size_t _mask = 0; - size_t _capacity = 0; - size_t _size = 0; + struct AtlasFontFaceEntry + { + // This being a heap allocated allows us to insert into `glyphs` in `_splitDoubleHeightGlyph` + // (which might resize the hashmap!), while the caller `_drawText` is holding onto `glyphs`. + // If it wasn't heap allocated, all pointers into `linear_flat_set` would be invalidated. + std::unique_ptr inner; }; + private: __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); void _updateFontDependents(const RenderingPayload& p); void _recreateCustomShader(const RenderingPayload& p); @@ -170,10 +166,10 @@ namespace Microsoft::Console::Render::Atlas __declspec(noinline) void _recreateInstanceBuffers(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p); void _drawText(RenderingPayload& p); - __declspec(noinline) [[nodiscard]] bool _drawGlyph(const RenderingPayload& p, GlyphCacheEntry& entry, f32 fontEmSize); - bool _drawSoftFontGlyph(const RenderingPayload& p, GlyphCacheEntry& entry); + __declspec(noinline) [[nodiscard]] bool _drawGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); + bool _drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); void _drawGlyphPrepareRetry(const RenderingPayload& p); - void _splitDoubleHeightGlyph(const RenderingPayload& p, GlyphCacheEntry& entry); + void _splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursorPart1(const RenderingPayload& p); @@ -227,7 +223,7 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _glyphAtlas; wil::com_ptr _glyphAtlasView; - GlyphCacheMap _glyphCache; + til::linear_flat_set _glyphAtlasMap; Buffer _rectPackerData; stbrp_context _rectPacker{}; diff --git a/src/renderer/atlas/colorbrewer.h b/src/renderer/atlas/colorbrewer.h index 5724d2d37da..be5cef6b8a6 100644 --- a/src/renderer/atlas/colorbrewer.h +++ b/src/renderer/atlas/colorbrewer.h @@ -1,36 +1,36 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#pragma once - -namespace Microsoft::Console::Render::Atlas::colorbrewer -{ - // The following list of colors is only used as a debug aid and not part of the final product. - // They're licensed under: - // - // Apache-Style Software License for ColorBrewer software and ColorBrewer Color Schemes - // - // Copyright (c) 2002 Cynthia Brewer, Mark Harrower, and The Pennsylvania State University. - // - // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. - // You may obtain a copy of the License at - // - // http://www.apache.org/licenses/LICENSE-2.0 - // - // Unless required by applicable law or agreed to in writing, software distributed - // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR - // CONDITIONS OF ANY KIND, either express or implied. See the License for the - // specific language governing permissions and limitations under the License. - // - inline constexpr u32 pastel1[]{ - 0xfbb4ae, - 0xb3cde3, - 0xccebc5, - 0xdecbe4, - 0xfed9a6, - 0xffffcc, - 0xe5d8bd, - 0xfddaec, - 0xf2f2f2, - }; -} +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +namespace Microsoft::Console::Render::Atlas::colorbrewer +{ + // The following list of colors is only used as a debug aid and not part of the final product. + // They're licensed under: + // + // Apache-Style Software License for ColorBrewer software and ColorBrewer Color Schemes + // + // Copyright (c) 2002 Cynthia Brewer, Mark Harrower, and The Pennsylvania State University. + // + // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. + // You may obtain a copy of the License at + // + // http://www.apache.org/licenses/LICENSE-2.0 + // + // Unless required by applicable law or agreed to in writing, software distributed + // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR + // CONDITIONS OF ANY KIND, either express or implied. See the License for the + // specific language governing permissions and limitations under the License. + // + inline constexpr u32 pastel1[]{ + 0xfbb4ae, + 0xb3cde3, + 0xccebc5, + 0xdecbe4, + 0xfed9a6, + 0xffffcc, + 0xe5d8bd, + 0xfddaec, + 0xf2f2f2, + }; +} diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 190e52c98cb..27ea6f515f4 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -373,123 +373,9 @@ namespace Microsoft::Console::Render::Atlas }; ATLAS_FLAG_OPS(FontRelevantAttributes, u8) - // This fake IDWriteFontFace* is a place holder that is used when we draw DECDLD/DRCS soft fonts. It's wildly - // invalid C++, but I wrote the alternative, proper code with bitfields/flags and such and it turned into a - // bigger mess than this violation against the C++ consortium's conscience. It also didn't help BackendD3D, - // which hashes FontFace and an additional flag field would double the hashmap key size due to padding. - // It's a macro, because constexpr doesn't work here in C++20 and regular "const" doesn't inline. -#define IDWriteFontFace_SoftFont (static_cast(nullptr) + 1) - - // The existence of IDWriteFontFace_SoftFont unfortunately requires us to reimplement wil::com_ptr. - // - // Unfortunately this code seems to confuse MSVC's linter? The 3 smart pointer warnings are somewhat funny. - // It doesn't understand that this class is a smart pointer itself. The other 2 are valid, but don't apply here. -#pragma warning(push) -#pragma warning(disable : 26415) // Smart pointer parameter 'other' is used only to access contained pointer. Use T* or T& instead (r.30). -#pragma warning(disable : 26416) // Shared pointer parameter 'other' is passed by rvalue reference. Pass by value instead (r.34). -#pragma warning(disable : 26418) // Shared pointer parameter 'other' is not copied or moved. Use T* or T& instead (r.36). -#pragma warning(disable : 26447) // The function is declared 'noexcept' but calls function '...' which may throw exceptions (f.6).) -#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1). - struct FontFace - { - using InterfaceType = IDWriteFontFace2; - - FontFace() = default; - - ~FontFace() noexcept - { - _release(); - } - - FontFace(const FontFace& other) noexcept : - FontFace{ other.get() } - { - } - - FontFace(FontFace&& other) noexcept : - _ptr{ other.detach() } - { - } - - FontFace& operator=(const FontFace& other) noexcept - { - _release(); - _ptr = other.get(); - _addRef(); - return *this; - } - - FontFace& operator=(FontFace&& other) noexcept - { - _release(); - _ptr = other.detach(); - return *this; - } - - FontFace(InterfaceType* ptr) noexcept : - _ptr{ ptr } - { - _addRef(); - } - - FontFace(const wil::com_ptr& other) noexcept : - FontFace{ other.get() } - { - } - - FontFace(wil::com_ptr&& other) noexcept : - _ptr{ other.detach() } - { - } - - void attach(InterfaceType* other) noexcept - { - _release(); - _ptr = other; - } - - [[nodiscard]] InterfaceType* detach() noexcept - { - const auto tmp = _ptr; - _ptr = nullptr; - return tmp; - } - - InterfaceType* get() const noexcept - { - return _ptr; - } - - bool is_proper_font() const noexcept - { - return _ptr > IDWriteFontFace_SoftFont; - } - - private: - void _addRef() const noexcept - { - if (is_proper_font()) - { - _ptr->AddRef(); - } - } - - void _release() const noexcept - { - if (is_proper_font()) - { - _ptr->Release(); - } - } - - InterfaceType* _ptr = nullptr; - }; -#pragma warning(pop) - struct FontMapping { - FontFace fontFace; - f32 fontEmSize = 0; + wil::com_ptr fontFace; u32 glyphsFrom = 0; u32 glyphsTo = 0; }; diff --git a/src/renderer/atlas/shader_common.hlsl b/src/renderer/atlas/shader_common.hlsl index 15b603e1de3..3183d299585 100644 --- a/src/renderer/atlas/shader_common.hlsl +++ b/src/renderer/atlas/shader_common.hlsl @@ -1,46 +1,46 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -// clang-format off -#define SHADING_TYPE_TEXT_BACKGROUND 0 -#define SHADING_TYPE_TEXT_GRAYSCALE 1 -#define SHADING_TYPE_TEXT_CLEARTYPE 2 -#define SHADING_TYPE_PASSTHROUGH 3 -#define SHADING_TYPE_DASHED_LINE 4 -#define SHADING_TYPE_SOLID_FILL 5 -// clang-format on - -struct VSData -{ - float2 vertex : SV_Position; - int2 position : position; - uint2 size : size; - uint2 texcoord : texcoord; - uint shadingType : shadingType; - float4 color : color; -}; - -struct PSData -{ - float4 position : SV_Position; - float2 texcoord : texcoord; - nointerpolation uint shadingType : shadingType; - nointerpolation float4 color : color; -}; - -float4 premultiplyColor(float4 color) -{ - color.rgb *= color.a; - return color; -} - -float4 alphaBlendPremultiplied(float4 bottom, float4 top) -{ - bottom *= 1 - top.a; - return bottom + top; -} - -float4 decodeRGBA(uint i) -{ - return (i >> uint4(0, 8, 16, 24) & 0xff) / 255.0f; -} +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +// clang-format off +#define SHADING_TYPE_TEXT_BACKGROUND 0 +#define SHADING_TYPE_TEXT_GRAYSCALE 1 +#define SHADING_TYPE_TEXT_CLEARTYPE 2 +#define SHADING_TYPE_PASSTHROUGH 3 +#define SHADING_TYPE_DASHED_LINE 4 +#define SHADING_TYPE_SOLID_FILL 5 +// clang-format on + +struct VSData +{ + float2 vertex : SV_Position; + int2 position : position; + uint2 size : size; + uint2 texcoord : texcoord; + uint shadingType : shadingType; + float4 color : color; +}; + +struct PSData +{ + float4 position : SV_Position; + float2 texcoord : texcoord; + nointerpolation uint shadingType : shadingType; + nointerpolation float4 color : color; +}; + +float4 premultiplyColor(float4 color) +{ + color.rgb *= color.a; + return color; +} + +float4 alphaBlendPremultiplied(float4 bottom, float4 top) +{ + bottom *= 1 - top.a; + return bottom + top; +} + +float4 decodeRGBA(uint i) +{ + return (i >> uint4(0, 8, 16, 24) & 0xff) / 255.0f; +} diff --git a/src/renderer/atlas/wic.h b/src/renderer/atlas/wic.h index 1f08eded553..dce0a02bf80 100644 --- a/src/renderer/atlas/wic.h +++ b/src/renderer/atlas/wic.h @@ -1,59 +1,59 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#pragma once - -#include - -inline void SaveTextureToPNG(ID3D11DeviceContext* deviceContext, ID3D11Resource* source, double dpi, const wchar_t* fileName) -{ - __assume(deviceContext != nullptr); - __assume(source != nullptr); - - wil::com_ptr texture; - THROW_IF_FAILED(source->QueryInterface(IID_PPV_ARGS(texture.addressof()))); - - wil::com_ptr d3dDevice; - deviceContext->GetDevice(d3dDevice.addressof()); - - D3D11_TEXTURE2D_DESC desc{}; - texture->GetDesc(&desc); - desc.BindFlags = 0; - desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; - desc.Usage = D3D11_USAGE_STAGING; - - wil::com_ptr staging; - THROW_IF_FAILED(d3dDevice->CreateTexture2D(&desc, nullptr, staging.put())); - - deviceContext->CopyResource(staging.get(), source); - - static const auto wicFactory = []() { - THROW_IF_FAILED(::CoInitializeEx(nullptr, COINIT_MULTITHREADED)); - return wil::CoCreateInstance(CLSID_WICImagingFactory2); - }(); - - wil::com_ptr stream; - THROW_IF_FAILED(wicFactory->CreateStream(stream.addressof())); - THROW_IF_FAILED(stream->InitializeFromFilename(fileName, GENERIC_WRITE)); - - wil::com_ptr encoder; - THROW_IF_FAILED(wicFactory->CreateEncoder(GUID_ContainerFormatPng, nullptr, encoder.addressof())); - THROW_IF_FAILED(encoder->Initialize(stream.get(), WICBitmapEncoderNoCache)); - - wil::com_ptr frame; - wil::com_ptr props; - THROW_IF_FAILED(encoder->CreateNewFrame(frame.addressof(), props.addressof())); - THROW_IF_FAILED(frame->Initialize(props.get())); - THROW_IF_FAILED(frame->SetSize(desc.Width, desc.Height)); - THROW_IF_FAILED(frame->SetResolution(dpi, dpi)); - auto pixelFormat = GUID_WICPixelFormat32bppBGRA; - THROW_IF_FAILED(frame->SetPixelFormat(&pixelFormat)); - - D3D11_MAPPED_SUBRESOURCE mapped; - THROW_IF_FAILED(deviceContext->Map(staging.get(), 0, D3D11_MAP_READ, 0, &mapped)); - THROW_IF_FAILED(frame->WritePixels(desc.Height, mapped.RowPitch, mapped.RowPitch * desc.Height, static_cast(mapped.pData))); - deviceContext->Unmap(staging.get(), 0); - - THROW_IF_FAILED(frame->Commit()); - THROW_IF_FAILED(encoder->Commit()); -} +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT license. + +#pragma once + +#include + +inline void SaveTextureToPNG(ID3D11DeviceContext* deviceContext, ID3D11Resource* source, double dpi, const wchar_t* fileName) +{ + __assume(deviceContext != nullptr); + __assume(source != nullptr); + + wil::com_ptr texture; + THROW_IF_FAILED(source->QueryInterface(IID_PPV_ARGS(texture.addressof()))); + + wil::com_ptr d3dDevice; + deviceContext->GetDevice(d3dDevice.addressof()); + + D3D11_TEXTURE2D_DESC desc{}; + texture->GetDesc(&desc); + desc.BindFlags = 0; + desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc.Usage = D3D11_USAGE_STAGING; + + wil::com_ptr staging; + THROW_IF_FAILED(d3dDevice->CreateTexture2D(&desc, nullptr, staging.put())); + + deviceContext->CopyResource(staging.get(), source); + + static const auto wicFactory = []() { + THROW_IF_FAILED(::CoInitializeEx(nullptr, COINIT_MULTITHREADED)); + return wil::CoCreateInstance(CLSID_WICImagingFactory2); + }(); + + wil::com_ptr stream; + THROW_IF_FAILED(wicFactory->CreateStream(stream.addressof())); + THROW_IF_FAILED(stream->InitializeFromFilename(fileName, GENERIC_WRITE)); + + wil::com_ptr encoder; + THROW_IF_FAILED(wicFactory->CreateEncoder(GUID_ContainerFormatPng, nullptr, encoder.addressof())); + THROW_IF_FAILED(encoder->Initialize(stream.get(), WICBitmapEncoderNoCache)); + + wil::com_ptr frame; + wil::com_ptr props; + THROW_IF_FAILED(encoder->CreateNewFrame(frame.addressof(), props.addressof())); + THROW_IF_FAILED(frame->Initialize(props.get())); + THROW_IF_FAILED(frame->SetSize(desc.Width, desc.Height)); + THROW_IF_FAILED(frame->SetResolution(dpi, dpi)); + auto pixelFormat = GUID_WICPixelFormat32bppBGRA; + THROW_IF_FAILED(frame->SetPixelFormat(&pixelFormat)); + + D3D11_MAPPED_SUBRESOURCE mapped; + THROW_IF_FAILED(deviceContext->Map(staging.get(), 0, D3D11_MAP_READ, 0, &mapped)); + THROW_IF_FAILED(frame->WritePixels(desc.Height, mapped.RowPitch, mapped.RowPitch * desc.Height, static_cast(mapped.pData))); + deviceContext->Unmap(staging.get(), 0); + + THROW_IF_FAILED(frame->Commit()); + THROW_IF_FAILED(encoder->Commit()); +} diff --git a/src/til/ut_til/til.unit.tests.vcxproj b/src/til/ut_til/til.unit.tests.vcxproj index 5223074ec1d..7a4911435bd 100644 --- a/src/til/ut_til/til.unit.tests.vcxproj +++ b/src/til/ut_til/til.unit.tests.vcxproj @@ -43,10 +43,13 @@ + + + @@ -66,6 +69,7 @@ + @@ -85,4 +89,4 @@ - + \ No newline at end of file diff --git a/src/til/ut_til/til.unit.tests.vcxproj.filters b/src/til/ut_til/til.unit.tests.vcxproj.filters index 88fccdbd38a..24dbe38e53f 100644 --- a/src/til/ut_til/til.unit.tests.vcxproj.filters +++ b/src/til/ut_til/til.unit.tests.vcxproj.filters @@ -118,10 +118,22 @@ inc + + inc + + + inc + + + inc + + + inc + {7cf29ba4-d33d-4c3b-82e3-ab73e5a79685} - + \ No newline at end of file diff --git a/tools/ConsoleTypes.natvis b/tools/ConsoleTypes.natvis index c39fe9df840..0ba29b94382 100644 --- a/tools/ConsoleTypes.natvis +++ b/tools/ConsoleTypes.natvis @@ -113,6 +113,16 @@ + + {{ size={_load / $T2} }} + + + _capacity + _map._Mypair._Myval2 + + + + {{ size={_size} }} @@ -128,8 +138,16 @@ {{ dirtyTop={dirtyTop}, dirtyBottom={dirtyBottom} }} - - (empty) - {(void*)key.fontFace}, {key.glyphIndex} + + (empty) + {glyphIndex} + + + + (empty) + {(void*)fontFace.m_ptr}, {lineRendition} + + glyphs + From 4ef2b3ffab44a74678af5e674809630d631db57d Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Mon, 3 Apr 2023 15:43:06 +0200 Subject: [PATCH 23/37] Implement inverted cursors for D2D, Make _appendQuad a prettier & faster, Use new range --- .github/actions/spelling/expect/expect.txt | 1 + src/inc/til/flat_set.h | 16 +- src/renderer/atlas/AtlasEngine.api.cpp | 16 +- src/renderer/atlas/AtlasEngine.cpp | 37 ++- src/renderer/atlas/AtlasEngine.h | 11 +- src/renderer/atlas/Backend.h | 2 +- src/renderer/atlas/BackendD2D.cpp | 250 +++++++++++++---- src/renderer/atlas/BackendD2D.h | 24 +- src/renderer/atlas/BackendD3D.cpp | 297 ++++++++++----------- src/renderer/atlas/BackendD3D.h | 34 +-- src/renderer/atlas/common.h | 59 ++-- src/renderer/atlas/shader_common.hlsl | 2 +- 12 files changed, 450 insertions(+), 299 deletions(-) diff --git a/.github/actions/spelling/expect/expect.txt b/.github/actions/spelling/expect/expect.txt index 804c220d027..ad291cd646c 100644 --- a/.github/actions/spelling/expect/expect.txt +++ b/.github/actions/spelling/expect/expect.txt @@ -1123,6 +1123,7 @@ Mip MMBB mmcc MMCPL +MMIX mmsystem MNC MNOPQ diff --git a/src/inc/til/flat_set.h b/src/inc/til/flat_set.h index e344964ce64..1b3b8f3a3f9 100644 --- a/src/inc/til/flat_set.h +++ b/src/inc/til/flat_set.h @@ -4,7 +4,8 @@ #pragma once #pragma warning(push) -#pragma warning(suppress : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26446) // Prefer to use gsl::at() instead of unchecked subscript operator (bounds.4). +#pragma warning(disable : 26409) // Avoid calling new and delete explicitly, use std::make_unique instead (r.11). namespace til { @@ -34,26 +35,26 @@ namespace til { using T = size_t; - static size_t hash(T v) noexcept + static constexpr size_t hash(T v) noexcept { return flat_set_hash_integer(v); } // Return true if the key and existing slot in the hashmap match. - static bool equals(T slot, T key) + static constexpr bool equals(T slot, T key) { return slot == key; } // Return true if this slot can be filled with a new item. - static bool empty(T slot) + static constexpr bool empty(T slot) { return slot == -1; } // Called when a new item is inserted into the hashmap. // T::operator=(T&&) is called when the map is resized and existing items must be moved over. - static void fill(T& slot, T key) + static constexpr void fill(T& slot, T key) { slot = key; } @@ -61,14 +62,14 @@ namespace til // Called when a new backing buffer is allocated. You need to then initialize the raw memory. static std::unique_ptr allocate(size_t capacity) { - return std::unique_ptr{ new T[capacity]{ size_t(-1) } }; + return std::unique_ptr{ new T[capacity]{ static_cast(-1) } }; } static void clear(T* data, size_t capacity) noexcept { for (auto& slot : std::span{ data, capacity }) { - slot = size_t(-1); + slot = static_cast(-1); } } }; @@ -116,6 +117,7 @@ namespace til // by some prime number and divide by the number of slots. It's been shown // many times in literature that such a scheme performs the best on average. // As such, we perform the divide her to get the topmost bits down. + // See flat_set_hash_integer. const auto hash = Trait::hash(key) >> _shift; for (auto i = hash;; ++i) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index 9d678f89c25..f039f94ed50 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -47,8 +47,8 @@ constexpr HRESULT vec2_narrow(U x, U y, vec2& out) noexcept //assert(psrRegion->top < psrRegion->bottom && psrRegion->top >= 0 && psrRegion->bottom <= _api.cellCount.y); // BeginPaint() protects against invalid out of bounds numbers. - _api.invalidatedRows.x = std::min(_api.invalidatedRows.x, gsl::narrow_cast(psrRegion->top)); - _api.invalidatedRows.y = std::max(_api.invalidatedRows.y, gsl::narrow_cast(psrRegion->bottom)); + _api.invalidatedRows.start = std::min(_api.invalidatedRows.start, gsl::narrow_cast(psrRegion->top)); + _api.invalidatedRows.end = std::max(_api.invalidatedRows.end, gsl::narrow_cast(psrRegion->bottom)); return S_OK; } @@ -89,8 +89,8 @@ constexpr HRESULT vec2_narrow(U x, U y, vec2& out) noexcept // BeginPaint() protects against invalid out of bounds numbers. // TODO: rect can contain invalid out of bounds coordinates when the selection is being // dragged outside of the viewport (and the window begins scrolling automatically). - _api.invalidatedRows.x = gsl::narrow_cast(std::min(_api.invalidatedRows.x, std::max(0, rect.top))); - _api.invalidatedRows.y = gsl::narrow_cast(std::max(_api.invalidatedRows.y, std::max(0, rect.bottom))); + _api.invalidatedRows.start = gsl::narrow_cast(std::min(_api.invalidatedRows.start, std::max(0, rect.top))); + _api.invalidatedRows.end = gsl::narrow_cast(std::max(_api.invalidatedRows.end, std::max(0, rect.bottom))); } return S_OK; } @@ -118,13 +118,13 @@ constexpr HRESULT vec2_narrow(U x, U y, vec2& out) noexcept if (delta < 0) { - _api.invalidatedRows.x = gsl::narrow_cast(clamp(_api.invalidatedRows.x + delta, u16min, u16max)); - _api.invalidatedRows.y = _api.s->cellCount.y; + _api.invalidatedRows.start = gsl::narrow_cast(clamp(_api.invalidatedRows.start + delta, u16min, u16max)); + _api.invalidatedRows.end = _api.s->cellCount.y; } else { - _api.invalidatedRows.x = 0; - _api.invalidatedRows.y = gsl::narrow_cast(clamp(_api.invalidatedRows.y + delta, u16min, u16max)); + _api.invalidatedRows.start = 0; + _api.invalidatedRows.end = gsl::narrow_cast(clamp(_api.invalidatedRows.end + delta, u16min, u16max)); } } diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 758f95c7975..ee9f5fd57d9 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -83,8 +83,8 @@ try _api.invalidatedCursorArea.bottom = clamp(_api.invalidatedCursorArea.bottom, _api.invalidatedCursorArea.top, _p.s->cellCount.y); } { - _api.invalidatedRows.x = std::min(_api.invalidatedRows.x, _p.s->cellCount.y); - _api.invalidatedRows.y = clamp(_api.invalidatedRows.y, _api.invalidatedRows.x, _p.s->cellCount.y); + _api.invalidatedRows.start = std::min(_api.invalidatedRows.start, _p.s->cellCount.y); + _api.invalidatedRows.end = clamp(_api.invalidatedRows.end, _api.invalidatedRows.start, _p.s->cellCount.y); } { const auto limit = gsl::narrow_cast(_p.s->cellCount.y & 0x7fff); @@ -94,7 +94,7 @@ try // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". if (const auto offset = _api.scrollOffset) { - const auto nothingInvalid = _api.invalidatedRows.x == _api.invalidatedRows.y; + const auto nothingInvalid = _api.invalidatedRows.start == _api.invalidatedRows.end; if (offset < 0) { @@ -105,8 +105,8 @@ try // |xxxxxxx | | | // +----------+ +----------+ const u16 begRow = _p.s->cellCount.y + offset; - _api.invalidatedRows.x = nothingInvalid ? begRow : std::min(_api.invalidatedRows.x, begRow); - _api.invalidatedRows.y = _p.s->cellCount.y; + _api.invalidatedRows.start = nothingInvalid ? begRow : std::min(_api.invalidatedRows.start, begRow); + _api.invalidatedRows.end = _p.s->cellCount.y; const auto dst = std::copy_n(_p.rows.begin() - offset, _p.rows.size() + offset, _p.rowsScratch.begin()); std::copy_n(_p.rows.begin(), -offset, dst); @@ -120,8 +120,8 @@ try // | | |xxxxxxx | // +----------+ +----------+ const u16 endRow = offset; - _api.invalidatedRows.x = 0; - _api.invalidatedRows.y = nothingInvalid ? endRow : std::max(_api.invalidatedRows.y, endRow); + _api.invalidatedRows.start = 0; + _api.invalidatedRows.end = nothingInvalid ? endRow : std::max(_api.invalidatedRows.end, endRow); const auto dst = std::copy_n(_p.rows.end() - offset, offset, _p.rowsScratch.begin()); std::copy_n(_p.rows.begin(), _p.rows.size() - offset, dst); @@ -146,9 +146,9 @@ try _api.dirtyRect = { 0, - _api.invalidatedRows.x, + _api.invalidatedRows.start, _p.s->cellCount.x, - _api.invalidatedRows.y, + _api.invalidatedRows.end, }; _p.dirtyRectInPx = { @@ -161,7 +161,7 @@ try _p.cursorRect = {}; _p.scrollOffset = _api.scrollOffset; - if (_api.invalidatedRows.x != _api.invalidatedRows.y) + if (_api.invalidatedRows.start != _api.invalidatedRows.end) { const auto deltaPx = _api.scrollOffset * _p.s->font->cellSize.y; const til::CoordType targetSizeX = _p.s->targetSize.x; @@ -169,16 +169,16 @@ try u16 y = 0; _p.dirtyRectInPx.left = 0; - _p.dirtyRectInPx.top = _api.invalidatedRows.x * _p.s->font->cellSize.y; + _p.dirtyRectInPx.top = _api.invalidatedRows.start * _p.s->font->cellSize.y; _p.dirtyRectInPx.right = targetSizeX; - _p.dirtyRectInPx.bottom = _api.invalidatedRows.y * _p.s->font->cellSize.y; + _p.dirtyRectInPx.bottom = _api.invalidatedRows.end * _p.s->font->cellSize.y; for (const auto r : _p.rows) { r->dirtyTop += deltaPx; r->dirtyBottom += deltaPx; - if (y >= _api.invalidatedRows.x && y < _api.invalidatedRows.y) + if (_api.invalidatedRows.contains(y)) { const auto clampedTop = clamp(r->dirtyTop, 0, targetSizeY); const auto clampedBottom = clamp(r->dirtyBottom, 0, targetSizeY); @@ -197,7 +197,7 @@ try // I feel a little bit like this is a hack, but I'm not sure how to better express this. // This ensures that we end up calling Present1() without dirty rects if the swap chain is // recreated/resized, because DXGI requires you to then call Present1() without dirty rects. - if (_api.invalidatedRows.x == 0 && _api.invalidatedRows.y == _p.s->cellCount.y) + if (_api.invalidatedRows == range{ 0, _p.s->cellCount.y }) { _p.dirtyRectInPx.top = 0; _p.dirtyRectInPx.bottom = targetSizeY; @@ -297,7 +297,7 @@ try } { - std::fill(_api.colorsForeground.begin() + x, _api.colorsForeground.begin() + column, _api.currentColor.x); + std::fill(_api.colorsForeground.begin() + x, _api.colorsForeground.begin() + column, _api.currentForeground); } { @@ -305,7 +305,7 @@ try const auto backgroundRow = _p.backgroundBitmap.begin() + _p.backgroundBitmapStride * y; auto it = backgroundRow + x; const auto end = backgroundRow + (static_cast(column) << shift); - const auto bg = u32ColorPremultiply(_api.currentColor.y); + const auto bg = u32ColorPremultiply(_api.currentBackground); for (; it != end; ++it) { @@ -420,8 +420,6 @@ try if (!isSettingDefaultBrushes) { - const u32x2 newColors{ gsl::narrow_cast(fg), gsl::narrow_cast(bg) }; - auto attributes = FontRelevantAttributes::None; WI_SetFlagIf(attributes, FontRelevantAttributes::Bold, textAttributes.IsIntense() && renderSettings.GetRenderMode(RenderSettings::Mode::IntenseIsBold)); WI_SetFlagIf(attributes, FontRelevantAttributes::Italic, textAttributes.IsItalic()); @@ -431,7 +429,8 @@ try _flushBufferLine(); } - _api.currentColor = newColors; + _api.currentBackground = gsl::narrow_cast(bg); + _api.currentForeground = gsl::narrow_cast(fg); _api.attributes = attributes; } else if (textAttributes.BackgroundIsDefault() && bg != _api.s->misc->backgroundColor) diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 16ac18c39d9..e84700aa32d 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -100,8 +100,8 @@ namespace Microsoft::Console::Render::Atlas static constexpr i16 i16min = -0x8000; static constexpr i16 i16max = 0x7fff; static constexpr u16r invalidatedAreaNone = { u16max, u16max, u16min, u16min }; - static constexpr u16x2 invalidatedRowsNone{ u16max, u16min }; - static constexpr u16x2 invalidatedRowsAll{ u16min, u16max }; + static constexpr range invalidatedRowsNone{ u16max, u16min }; + static constexpr range invalidatedRowsAll{ u16min, u16max }; std::unique_ptr _b; RenderingPayload _p; @@ -140,9 +140,10 @@ namespace Microsoft::Console::Render::Atlas LineRendition lineRendition = LineRendition::SingleWidth; // UpdateDrawingBrushes() u32 backgroundOpaqueMixin = 0xff000000; - u32x2 currentColor; + u32 currentBackground = 0; + u32 currentForeground = 0; FontRelevantAttributes attributes = FontRelevantAttributes::None; - u16x2 lastPaintBufferLineCoord; + u16x2 lastPaintBufferLineCoord{}; // UpdateHyperlinkHoveredId() u16 hyperlinkHoveredId = 0; @@ -150,7 +151,7 @@ namespace Microsoft::Console::Render::Atlas til::rect dirtyRect; // These "invalidation" fields are reset in EndPaint() u16r invalidatedCursorArea = invalidatedAreaNone; - u16x2 invalidatedRows = invalidatedRowsNone; // x is treated as "top" and y as "bottom" + range invalidatedRows = invalidatedRowsNone; // x is treated as "top" and y as "bottom" i16 scrollOffset = 0; } _api; }; diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 933463a81e4..289bb35a3ab 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -72,7 +72,7 @@ namespace Microsoft::Console::Render::Atlas wil::unique_handle _frameLatencyWaitableObject; til::generation_t _targetGeneration; til::generation_t _fontGeneration; - u16x2 _targetSize; + u16x2 _targetSize{}; bool _waitForPresentation = false; }; diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 0d01c8ef262..624302bc436 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -23,6 +23,53 @@ TIL_FAST_MATH_BEGIN using namespace Microsoft::Console::Render::Atlas; +namespace til +{ + template<> + struct flat_set_trait + { + using T = BackendD2D::CachedBrush; + + static constexpr size_t hash(u32 key) noexcept + { + return flat_set_hash_integer(key); + } + + static constexpr size_t hash(const T& slot) noexcept + { + return flat_set_hash_integer(slot.color); + } + + static constexpr bool equals(const T& slot, u32 key) + { + return slot.color == key; + } + + static bool empty(const T& slot) + { + return !slot.brush; + } + + static constexpr void fill(T& slot, u32 key) + { + slot.color = key; + } + + static std::unique_ptr allocate(size_t capacity) + { + return std::make_unique(capacity); + } + + static void clear(T* data, size_t capacity) noexcept + { + for (auto& slot : std::span{ data, capacity }) + { + slot.brush.reset(); + } + } + }; +} + BackendD2D::BackendD2D(wil::com_ptr device, wil::com_ptr deviceContext) noexcept : _device{ std::move(device) }, _deviceContext{ std::move(deviceContext) } @@ -42,9 +89,10 @@ void BackendD2D::Render(RenderingPayload& p) _renderTarget->Clear(); #endif _drawBackground(p); + _drawCursorPart1(p); _drawText(p); _drawGridlines(p); - _drawCursor(p); + _drawCursorPart2(p); _drawSelection(p); #if ATLAS_DEBUG_SHOW_DIRTY _debugShowDirty(p); @@ -86,6 +134,7 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) const auto renderTargetChanged = !_renderTarget; const auto fontChanged = _fontGeneration != p.s->font.generation(); + const auto cursorChanged = _cursorGeneration != p.s->cursor.generation(); const auto cellCountChanged = _cellCount != p.s->cellCount; if (renderTargetChanged) @@ -99,19 +148,14 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) .dpiX = static_cast(p.s->font->dpi), .dpiY = static_cast(p.s->font->dpi), }; - wil::com_ptr renderTarget; - THROW_IF_FAILED(p.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); - _renderTarget = renderTarget.query(); - _renderTarget4 = renderTarget.try_query(); + // ID2D1RenderTarget and ID2D1DeviceContext are the same and I'm tired of pretending they're not. + THROW_IF_FAILED(p.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, reinterpret_cast(_renderTarget.addressof()))); + _renderTarget.query_to(_renderTarget4.addressof()); _renderTarget->SetUnitMode(D2D1_UNIT_MODE_PIXELS); _renderTarget->SetAntialiasMode(D2D1_ANTIALIAS_MODE_ALIASED); } - { - static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; - THROW_IF_FAILED(_renderTarget->CreateSolidColorBrush(&color, nullptr, _brush.put())); - _brushColor = 0xffffffff; - } + _brushes.clear(); } if (!_dottedStrokeStyle) @@ -152,8 +196,15 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) _backgroundBitmapGeneration = {}; } + if (fontChanged || cursorChanged) + { + _cursorBitmap.reset(); + _cursorBitmapSize = {}; + } + _generation = p.s.generation(); _fontGeneration = p.s->font.generation(); + _cursorGeneration = p.s->cursor.generation(); _cellCount = p.s->cellCount; } @@ -191,7 +242,7 @@ void BackendD2D::_drawText(RenderingPayload& p) auto baselineX = 0.0f; auto baselineY = static_cast(p.s->font->cellSize.y * y + p.s->font->baseline); - if (y >= p.invalidatedRows.x && y < p.invalidatedRows.y) + if (p.invalidatedRows.contains(y)) { for (const auto& m : row->mappings) { @@ -413,14 +464,14 @@ void BackendD2D::_drawGridlines(const RenderingPayload& p) void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y) { - const auto columnToDIP = [&](til::CoordType i) { - return i * p.s->font->cellSize.x; + const auto columnToPx = [&](til::CoordType i) { + return static_cast(i * p.s->font->cellSize.x); }; - const auto rowToDIP = [&](til::CoordType i) { - return i * p.s->font->cellSize.y; + const auto rowToPx = [&](til::CoordType i) { + return static_cast(i * p.s->font->cellSize.y); }; - const auto top = rowToDIP(y); + const auto top = rowToPx(y); const auto bottom = top + p.s->font->cellSize.y; for (const auto& r : row->gridLineRanges) @@ -428,9 +479,9 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. assert(r.lines.any()); - const auto left = columnToDIP(r.from); - const auto right = columnToDIP(r.to); - til::rect rect; + const auto left = columnToPx(r.from); + const auto right = columnToPx(r.to); + D2D1_RECT_F rect{}; if (r.lines.test(GridLines::Left)) { @@ -438,7 +489,7 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro rect.bottom = bottom; for (auto i = r.from; i < r.to; ++i) { - rect.left = columnToDIP(i); + rect.left = columnToPx(i); rect.right = rect.left + p.s->font->thinLineWidth; _fillRectangle(rect, r.color); } @@ -457,7 +508,7 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro rect.bottom = bottom; for (auto i = r.to; i > r.from; --i) { - rect.right = columnToDIP(i); + rect.right = columnToPx(i); rect.left = rect.right - p.s->font->thinLineWidth; _fillRectangle(rect, r.color); } @@ -512,64 +563,150 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro } } -void BackendD2D::_drawCursor(const RenderingPayload& p) +void BackendD2D::_drawCursorPart1(const RenderingPayload& p) { if (p.cursorRect.empty()) { return; } - // Inverted cursors could be implemented in the future using - // ID2D1DeviceContext::DrawImage and D2D1_COMPOSITE_MODE_MASK_INVERT. + const auto cursorColor = p.s->cursor->cursorColor; + if (cursorColor == 0xffffffff) + { + const auto cursorSize = p.cursorRect.size(); + if (cursorSize != _cursorBitmapSize) + { + _resizeCursorBitmap(p, cursorSize); + } + + const auto backgroundBitmapOffset = p.cursorRect.top * p.backgroundBitmapStride; + const auto cellSizeX = static_cast(p.s->font->cellSize.x); + const auto cellSizeY = static_cast(p.s->font->cellSize.y); + const auto offsetX = p.cursorRect.left * cellSizeX; + const auto offsetY = p.cursorRect.top * cellSizeY; - til::rect rect{ - p.s->font->cellSize.x * p.cursorRect.left, - p.s->font->cellSize.y * p.cursorRect.top, - p.s->font->cellSize.x * p.cursorRect.right, - p.s->font->cellSize.y * p.cursorRect.bottom, + D2D1_RECT_F srcRect{ + .bottom = cursorSize.height * cellSizeY, + }; + D2D1_RECT_F dstRect{ + .top = offsetY, + .bottom = offsetY + srcRect.bottom, + }; + + for (til::CoordType x = 0; x < cursorSize.width; ++x) + { + const auto bg = p.backgroundBitmap[backgroundBitmapOffset + x]; + const auto brush = _brushWithColor(bg ^ 0x3f3f3f); + srcRect.left = x * cellSizeX; + srcRect.right = srcRect.left + cellSizeX; + dstRect.left = srcRect.left + offsetX; + dstRect.right = srcRect.right + offsetX; + _renderTarget->FillOpacityMask(_cursorBitmap.get(), brush, &dstRect, &srcRect); + } + } +} + +void BackendD2D::_drawCursorPart2(const RenderingPayload& p) +{ + if (p.cursorRect.empty()) + { + return; + } + + const auto cursorColor = p.s->cursor->cursorColor; + const D2D1_POINT_2F target{ + static_cast(p.cursorRect.left * p.s->font->cellSize.x), + static_cast(p.cursorRect.top * p.s->font->cellSize.y), + }; + + if (cursorColor == 0xffffffff) + { + _renderTarget->DrawImage(_cursorBitmap.get(), &target, nullptr, D2D1_INTERPOLATION_MODE_NEAREST_NEIGHBOR, D2D1_COMPOSITE_MODE_MASK_INVERT); + } + else + { + const D2D1_RECT_F rect{ + target.x, + target.y, + static_cast(p.cursorRect.right * p.s->font->cellSize.x), + static_cast(p.cursorRect.bottom * p.s->font->cellSize.y), + }; + const auto brush = _brushWithColor(cursorColor); + _drawCursor(p, _renderTarget.get(), rect, brush); + } +} + +void BackendD2D::_resizeCursorBitmap(const RenderingPayload& p, const til::size newSize) +{ + const til::size newSizeInPx{ + newSize.width * p.s->font->cellSize.x, + newSize.height * p.s->font->cellSize.y, }; + // CreateCompatibleRenderTarget is a terrific API and does not adopt _any_ of the settings of the + // parent render target (like the AA mode or D2D1_UNIT_MODE_PIXELS). Not sure who came up with that, + // but fact is that we need to set both sizes to override the DPI and fake D2D1_UNIT_MODE_PIXELS. + const D2D1_SIZE_F sizeF{ static_cast(newSizeInPx.width), static_cast(newSizeInPx.height) }; + const D2D1_SIZE_U sizeU{ gsl::narrow_cast(newSizeInPx.width), gsl::narrow_cast(newSizeInPx.height) }; + wil::com_ptr cursorRenderTarget; + _renderTarget->CreateCompatibleRenderTarget(&sizeF, &sizeU, nullptr, D2D1_COMPATIBLE_RENDER_TARGET_OPTIONS_NONE, cursorRenderTarget.addressof()); + cursorRenderTarget->SetAntialiasMode(D2D1_ANTIALIAS_MODE_ALIASED); + + cursorRenderTarget->BeginDraw(); + { + const D2D1_RECT_F rect{ 0, 0, sizeF.width, sizeF.height }; + const auto brush = _brushWithColor(0xffffffff); + _drawCursor(p, cursorRenderTarget.get(), rect, brush); + } + THROW_IF_FAILED(cursorRenderTarget->EndDraw()); + + cursorRenderTarget->GetBitmap(_cursorBitmap.put()); + _cursorBitmapSize = newSize; +} + +void BackendD2D::_drawCursor(const RenderingPayload& p, ID2D1RenderTarget* renderTarget, D2D1_RECT_F rect, ID2D1Brush* brush) noexcept +{ switch (static_cast(p.s->cursor->cursorType)) { case CursorType::Legacy: - rect.top = rect.bottom - (p.s->font->cellSize.y * p.s->cursor->heightPercentage + 50) / 100; - _fillRectangle(rect, p.s->cursor->cursorColor); + { + const auto height = p.s->cursor->heightPercentage / 100.0f; + rect.top = roundf((rect.top - rect.bottom) * height + rect.bottom); + renderTarget->FillRectangle(&rect, brush); break; + } case CursorType::VerticalBar: rect.right = rect.left + p.s->font->thinLineWidth; - _fillRectangle(rect, p.s->cursor->cursorColor); + renderTarget->FillRectangle(&rect, brush); break; case CursorType::Underscore: rect.top += p.s->font->underlinePos; rect.bottom = rect.top + p.s->font->underlineWidth; - _fillRectangle(rect, p.s->cursor->cursorColor); + renderTarget->FillRectangle(&rect, brush); break; case CursorType::EmptyBox: { - const auto brush = _brushWithColor(p.s->cursor->cursorColor); - const auto w = p.s->font->thinLineWidth; + const auto w = static_cast(p.s->font->thinLineWidth); const auto wh = w / 2.0f; - const D2D1_RECT_F rectF{ - rect.left + wh, - rect.top + wh, - rect.right - wh, - rect.bottom - wh, - }; - _renderTarget->DrawRectangle(&rectF, brush, w, nullptr); + rect.left += wh; + rect.top += wh; + rect.right -= wh; + rect.bottom -= wh; + renderTarget->DrawRectangle(&rect, brush, w, nullptr); break; } case CursorType::FullBox: - _fillRectangle(rect, p.s->cursor->cursorColor); + renderTarget->FillRectangle(&rect, brush); break; case CursorType::DoubleUnderscore: { auto rect2 = rect; rect2.top = rect.top + p.s->font->doubleUnderlinePos.x; rect2.bottom = rect2.top + p.s->font->thinLineWidth; - _fillRectangle(rect2, p.s->cursor->cursorColor); + renderTarget->FillRectangle(&rect2, brush); rect.top = rect.top + p.s->font->doubleUnderlinePos.y; rect.bottom = rect.top + p.s->font->thinLineWidth; - _fillRectangle(rect, p.s->cursor->cursorColor); + renderTarget->FillRectangle(&rect, brush); break; } default: @@ -638,24 +775,19 @@ void BackendD2D::_debugDumpRenderTarget(const RenderingPayload& p) ID2D1Brush* BackendD2D::_brushWithColor(u32 color) { - if (_brushColor != color) + if (_brushes.size() >= 16) + { + _brushes.clear(); + } + + const auto [cached, inserted] = _brushes.insert(color); + if (inserted) { const auto d2dColor = colorFromU32(color); - THROW_IF_FAILED(_renderTarget->CreateSolidColorBrush(&d2dColor, nullptr, _brush.put())); - _brushColor = color; + THROW_IF_FAILED(_renderTarget->CreateSolidColorBrush(&d2dColor, nullptr, cached.brush.addressof())); } - return _brush.get(); -} -void BackendD2D::_fillRectangle(const til::rect& rect, u32 color) -{ - const D2D1_RECT_F rectF{ - static_cast(rect.left), - static_cast(rect.top), - static_cast(rect.right), - static_cast(rect.bottom), - }; - _fillRectangle(rectF, color); + return cached.brush.get(); } void BackendD2D::_fillRectangle(const D2D1_RECT_F& rect, u32 color) diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index cb230f081fa..a2cc545ed04 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -3,6 +3,8 @@ #pragma once +#include + #include "Backend.h" namespace Microsoft::Console::Render::Atlas @@ -15,6 +17,12 @@ namespace Microsoft::Console::Render::Atlas bool RequiresContinuousRedraw() noexcept override; void WaitUntilCanRender() noexcept override; + struct CachedBrush + { + wil::com_ptr brush; + u32 color; + }; + private: __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p) noexcept; @@ -24,12 +32,15 @@ namespace Microsoft::Console::Render::Atlas __declspec(noinline) f32r _getGlyphRunDesignBounds(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); - void _drawCursor(const RenderingPayload& p); + void _drawCursorWithColor(const RenderingPayload& p); + void _drawCursorPart1(const RenderingPayload& p); + void _drawCursorPart2(const RenderingPayload& p); + static void _drawCursor(const RenderingPayload& p, ID2D1RenderTarget* renderTarget, D2D1_RECT_F rect, ID2D1Brush* brush) noexcept; + void _resizeCursorBitmap(const RenderingPayload& p, til::size newSize); void _drawSelection(const RenderingPayload& p); void _debugShowDirty(const RenderingPayload& p); void _debugDumpRenderTarget(const RenderingPayload& p); ID2D1Brush* _brushWithColor(u32 color); - void _fillRectangle(const til::rect& rect, u32 color); void _fillRectangle(const D2D1_RECT_F& rect, u32 color); SwapChainManager _swapChainManager; @@ -44,14 +55,17 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _backgroundBrush; til::generation_t _backgroundBitmapGeneration; - wil::com_ptr _brush; - u32 _brushColor = 0; + wil::com_ptr _cursorBitmap; + til::size _cursorBitmapSize; // in columns/rows + + til::linear_flat_set _brushes; Buffer _glyphMetrics; til::generation_t _generation; til::generation_t _fontGeneration; - u16x2 _cellCount; + til::generation_t _cursorGeneration; + u16x2 _cellCount{}; #if ATLAS_DEBUG_SHOW_DIRTY til::rect _presentRects[9]{}; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 9d767397a53..1243225a48e 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -44,27 +44,27 @@ namespace til { using T = BackendD3D::AtlasGlyphEntry; - static size_t hash(u16 key) noexcept + static constexpr size_t hash(u16 key) noexcept { return flat_set_hash_integer(key); } - static size_t hash(const T& slot) noexcept + static constexpr size_t hash(const T& slot) noexcept { return flat_set_hash_integer(slot.glyphIndex); } - static bool equals(const T& slot, u16 key) + static constexpr bool equals(const T& slot, u16 key) { return slot.glyphIndex == key; } - static bool empty(const T& slot) + static constexpr bool empty(const T& slot) { return !slot._occupied; } - static void fill(T& slot, u16 key) + static constexpr void fill(T& slot, u16 key) { slot.glyphIndex = key; slot._occupied = 1; @@ -88,22 +88,22 @@ namespace til static size_t hash(const BackendD3D::AtlasFontFaceKey& key) noexcept { - return flat_set_hash_integer(std::bit_cast(key.fontFace) ^ static_cast(key.lineRendition)); + return flat_set_hash_integer(std::bit_cast(key.fontFace) | static_cast(key.lineRendition)); } static size_t hash(const T& slot) noexcept { const auto& inner = *slot.inner; - return flat_set_hash_integer(std::bit_cast(inner.fontFace.get()) ^ static_cast(inner.lineRendition)); + return flat_set_hash_integer(std::bit_cast(inner.fontFace.get()) | static_cast(inner.lineRendition)); } - static bool equals(const T& slot, const BackendD3D::AtlasFontFaceKey& key) + static bool equals(const T& slot, const BackendD3D::AtlasFontFaceKey& key) noexcept { const auto& inner = *slot.inner; return inner.fontFace.get() == key.fontFace && inner.lineRendition == key.lineRendition; } - static bool empty(const T& slot) + static bool empty(const T& slot) noexcept { return !slot.inner; } @@ -126,10 +126,7 @@ namespace til { for (auto& slot : std::span{ data, capacity }) { - if (!empty(slot)) - { - slot = {}; - } + slot.inner.reset(); } } }; @@ -145,10 +142,10 @@ BackendD3D::BackendD3D(wil::com_ptr device, wil::com_ptrCreateInputLayout(&layout[0], std::size(layout), &shader_vs[0], sizeof(shader_vs), _inputLayout.addressof())); @@ -591,12 +588,12 @@ void BackendD3D::_d2dRenderTargetUpdateFontSettings(const FontSettings& font) co void BackendD3D::_recreateConstBuffer(const RenderingPayload& p) const { { - VSConstBuffer data; + VSConstBuffer data{}; data.positionScale = { 2.0f / p.s->targetSize.x, -2.0f / p.s->targetSize.y }; _deviceContext->UpdateSubresource(_vsConstantBuffer.get(), 0, nullptr, &data, 0, 0); } { - PSConstBuffer data; + PSConstBuffer data{}; data.backgroundColor = colorFromU32(p.s->misc->backgroundColor); data.cellSize = { static_cast(p.s->font->cellSize.x), static_cast(p.s->font->cellSize.y) }; data.cellCount = { static_cast(p.s->cellCount.x), static_cast(p.s->cellCount.y) }; @@ -753,16 +750,19 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) static constexpr u32 maxArea = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION * D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; const auto cellArea = static_cast(p.s->font->cellSize.x) * p.s->font->cellSize.y; + const auto targetArea = static_cast(p.s->targetSize.x) * p.s->targetSize.y; const auto minAreaByFont = cellArea * 95; // Covers all printable ASCII characters const auto minAreaByGrowth = static_cast(_rectPacker.width) * _rectPacker.height * 2; const auto min = std::max(minArea, std::max(minAreaByFont, minAreaByGrowth)); - // It's hard to say what the max. size of the cache should be. Optimally I think we should use as much memory as - // is available, but the rendering code in this project is a big mess and so integrating memory pressure feedback - // (RegisterVideoMemoryBudgetChangeNotificationEvent) is rather difficult. As an alternative I'm using the size - // of 10k cells of the terminal as an estimate, which scales with the font size and thus the DPI of the display. - const auto maxAreaByFont = cellArea * 10000; + // It's hard to say what the max. size of the cache should be. Optimally I think we should use as much + // memory as is available, but the rendering code in this project is a big mess and so integrating + // memory pressure feedback (RegisterVideoMemoryBudgetChangeNotificationEvent) is rather difficult. + // As an alternative I'm using 1.25x the size of the swap chain. The 1.25x is there to avoid situations, where + // we're locked into a state, where on every render pass we're starting with a half full atlas, drawing once, + // filling it with the remaining half and drawing again, requiring two rendering passes on each frame. + const auto maxAreaByFont = targetArea + targetArea / 4; const auto area = std::min(maxArea, std::min(maxAreaByFont, min)); // This block of code calculates the size of a power-of-2 texture that has an area larger than the given `area`. @@ -771,8 +771,8 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) // every time you resize the window by a pixel. Instead it only grows/shrinks by a factor of 2. unsigned long index; _BitScanReverse(&index, area - 1); - const auto u = ::base::saturated_cast(1u << ((index + 2) / 2)); - const auto v = ::base::saturated_cast(1u << ((index + 1) / 2)); + const auto u = static_cast(1u << ((index + 2) / 2)); + const auto v = static_cast(1u << ((index + 1) / 2)); if (u != _rectPacker.width || v != _rectPacker.height) { @@ -802,10 +802,9 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) .type = D2D1_RENDER_TARGET_TYPE_DEFAULT, .pixelFormat = { DXGI_FORMAT_B8G8R8A8_UNORM, D2D1_ALPHA_MODE_PREMULTIPLIED }, }; - wil::com_ptr renderTarget; - THROW_IF_FAILED(p.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, renderTarget.addressof())); - _d2dRenderTarget = renderTarget.query(); - _d2dRenderTarget4 = renderTarget.try_query(); + // ID2D1RenderTarget and ID2D1DeviceContext are the same and I'm tired of pretending they're not. + THROW_IF_FAILED(p.d2dFactory->CreateDxgiSurfaceRenderTarget(surface.get(), &props, reinterpret_cast(_d2dRenderTarget.addressof()))); + _d2dRenderTarget.try_query_to(_d2dRenderTarget4.addressof()); _d2dRenderTarget->SetUnitMode(D2D1_UNIT_MODE_PIXELS); // We don't really use D2D for anything except DWrite, but it @@ -837,10 +836,11 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; _deviceContext->PSSetShaderResources(0, 2, &resources[0]); + + _rectPackerData = Buffer{ u }; } _glyphAtlasMap.clear(); - _rectPackerData = Buffer{ u }; stbrp_init_target(&_rectPacker, u, v, _rectPackerData.data(), _rectPackerData.size()); _d2dBeginDrawing(); @@ -861,29 +861,17 @@ BackendD3D::QuadInstance& BackendD3D::_getLastQuad() noexcept return _instances[_instancesCount - 1]; } -void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u32 color, ShadingType shadingType) -{ - _appendQuad(position, size, {}, color, shadingType); -} - // NOTE: Up to 5M calls per second -> no std::vector, no std::unordered_map. -void BackendD3D::_appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 color, ShadingType shadingType) +// This function is an easy >100x faster than std::vector, can be +// inlined and reduces overall (!) renderer CPU usage by 5%. +BackendD3D::QuadInstance& BackendD3D::_appendQuad() { if (_instancesCount >= _instances.size()) { _bumpInstancesSize(); } - _instances[_instancesCount++] = QuadInstance{ - position.x, - position.y, - size.x, - size.y, - texcoord.x, - texcoord.y, - shadingType, - color, - }; + return _instances[_instancesCount++]; } void BackendD3D::_bumpInstancesSize() @@ -1015,7 +1003,10 @@ void BackendD3D::_drawBackground(const RenderingPayload& p) _backgroundBitmapGeneration = p.backgroundBitmapGeneration; } - _appendQuad({}, p.s->targetSize, 0, ShadingType::Background); + _appendQuad() = { + .shadingType = ShadingType::Background, + .size = p.s->targetSize, + }; } void BackendD3D::_drawText(RenderingPayload& p) @@ -1061,6 +1052,7 @@ void BackendD3D::_drawText(RenderingPayload& p) // Yes, I agree, avoid goto. Sometimes. It's not my fault that C++ still doesn't // have a `continue outerloop;` like other languages had it for decades. :( #pragma warning(suppress : 26438) // Avoid 'goto' (es.76). +#pragma warning(suppress : 26448) // Consider using gsl::finally if final action is intended (gsl.util). goto drawGlyphRetry; } @@ -1068,23 +1060,24 @@ void BackendD3D::_drawText(RenderingPayload& p) { auto l = static_cast(lrintf(baselineX + row->glyphOffsets[x].advanceOffset)); auto t = static_cast(lrintf(baselineY - row->glyphOffsets[x].ascenderOffset)); + + // A non-standard line rendition will make characters appear twice as wide, which requires us to scale the baseline advance by 2. + // We need to do this before applying the glyph offset however, since the offset is already 2x scaled in case of such glyphs. l <<= lineRenditionScale; - l += glyphEntry.data.positionX; - t += glyphEntry.data.positionY; - row->dirtyTop = std::min(row->dirtyTop, t); - row->dirtyBottom = std::max(row->dirtyBottom, t + glyphEntry.data.sizeY); + l += glyphEntry.data.offset.x; + t += glyphEntry.data.offset.y; - if (_instancesCount >= _instances.size()) - { - _bumpInstancesSize(); - } - - auto& instance = _instances[_instancesCount++]; - instance = glyphEntry.data; - instance.positionX = static_cast(l); - instance.positionY = static_cast(t); - instance.color = row->colors[x]; + row->dirtyTop = std::min(row->dirtyTop, t); + row->dirtyBottom = std::max(row->dirtyBottom, t + glyphEntry.data.size.y); + + _appendQuad() = { + .shadingType = glyphEntry.data.shadingType, + .position = { static_cast(l), static_cast(t) }, + .size = glyphEntry.data.size, + .texcoord = glyphEntry.data.texcoord, + .color = row->colors[x], + }; } baselineX += row->glyphAdvances[x]; @@ -1092,7 +1085,7 @@ void BackendD3D::_drawText(RenderingPayload& p) } } - if (y >= p.invalidatedRows.x && y < p.invalidatedRows.y) + if (p.invalidatedRows.contains(y)) { dirtyTop = std::min(dirtyTop, row->dirtyTop); dirtyBottom = std::max(dirtyBottom, row->dirtyBottom); @@ -1265,13 +1258,13 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, const AtlasFontFaceEntryI _d2dBeginDrawing(); const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baselineOrigin, &glyphRun, _brush.get()); - glyphEntry.data.positionX = bl; - glyphEntry.data.positionY = bt; - glyphEntry.data.sizeX = rect.w; - glyphEntry.data.sizeY = rect.h; - glyphEntry.data.texcoordX = rect.x; - glyphEntry.data.texcoordY = rect.y; glyphEntry.data.shadingType = colorGlyph ? ShadingType::Passthrough : _textShadingType; + glyphEntry.data.offset.x = bl; + glyphEntry.data.offset.y = bt; + glyphEntry.data.size.x = rect.w; + glyphEntry.data.size.y = rect.h; + glyphEntry.data.texcoord.x = rect.x; + glyphEntry.data.texcoord.y = rect.y; if (lineRendition >= LineRendition::DoubleHeightTop) { @@ -1352,17 +1345,17 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFa _d2dBeginDrawing(); _d2dRenderTarget->DrawBitmap(_softFontBitmap.get(), &dest, 1, interpolation, nullptr, nullptr); - glyphEntry.data.positionX = 0; - glyphEntry.data.positionY = -p.s->font->baseline; - glyphEntry.data.sizeX = rect.w; - glyphEntry.data.sizeY = rect.h; - glyphEntry.data.texcoordX = rect.x; - glyphEntry.data.texcoordY = rect.y; glyphEntry.data.shadingType = ShadingType::TextGrayscale; + glyphEntry.data.offset.x = 0; + glyphEntry.data.offset.y = -p.s->font->baseline; + glyphEntry.data.size.x = rect.w; + glyphEntry.data.size.y = rect.h; + glyphEntry.data.texcoord.x = rect.x; + glyphEntry.data.texcoord.y = rect.y; if (lineRendition >= LineRendition::DoubleHeightTop) { - glyphEntry.data.positionY -= p.s->font->cellSize.y; + glyphEntry.data.offset.y -= p.s->font->cellSize.y; _splitDoubleHeightGlyph(p, fontFaceEntry, glyphEntry); } @@ -1384,7 +1377,7 @@ void BackendD3D::_drawGlyphPrepareRetry(const RenderingPayload& p) void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry) { // Twice the line height, twice the descender gap. For both. - glyphEntry.data.positionY -= p.s->font->descender; + glyphEntry.data.offset.y -= p.s->font->descender; const auto isTop = fontFaceEntry.lineRendition == LineRendition::DoubleHeightTop; @@ -1400,20 +1393,20 @@ void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasF auto& top = isTop ? glyphEntry : entry2; auto& bottom = isTop ? entry2 : glyphEntry; - const auto topSize = clamp(-glyphEntry.data.positionY - p.s->font->baseline, 0, static_cast(glyphEntry.data.sizeY)); - top.data.positionY += p.s->font->cellSize.y; - top.data.sizeY = topSize; - bottom.data.positionY += topSize; - bottom.data.sizeY = std::max(0, bottom.data.sizeY - topSize); - bottom.data.texcoordY += topSize; + const auto topSize = clamp(-glyphEntry.data.offset.y - p.s->font->baseline, 0, static_cast(glyphEntry.data.size.y)); + top.data.offset.y += p.s->font->cellSize.y; + top.data.size.y = topSize; + bottom.data.offset.y += topSize; + bottom.data.size.y = std::max(0, bottom.data.size.y - topSize); + bottom.data.texcoord.y += topSize; // Things like diacritics might be so small that they only exist on either half of the // double-height row. This effectively turns the other (unneeded) side into whitespace. - if (!top.data.sizeY) + if (!top.data.size.y) { top.data.shadingType = ShadingType::Default; } - if (!bottom.data.sizeY) + if (!bottom.data.size.y) { bottom.data.shadingType = ShadingType::Default; } @@ -1434,93 +1427,70 @@ void BackendD3D::_drawGridlines(const RenderingPayload& p) void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y) { - const auto top = p.s->font->cellSize.y * y; + const auto top = static_cast(p.s->font->cellSize.y * y); for (const auto& r : row->gridLineRanges) { // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. assert(r.lines.any()); - const auto left = r.from * p.s->font->cellSize.x; - const auto width = (r.to - r.from) * p.s->font->cellSize.x; - i16x2 position; - u16x2 size; + const auto left = static_cast(r.from * p.s->font->cellSize.x); + const auto width = static_cast((r.to - r.from) * p.s->font->cellSize.x); + const auto appendHorizontalLine = [&](u16 offsetY, u16 height) { + _appendQuad() = QuadInstance{ + .shadingType = ShadingType::SolidFill, + .position = { left, static_cast(top + offsetY) }, + .size = { width, height }, + .color = r.color, + }; + }; + const auto appendVerticalLine = [&](int col) { + _appendQuad() = QuadInstance{ + .shadingType = ShadingType::SolidFill, + .position = { static_cast(col * p.s->font->cellSize.x), top }, + .size = { p.s->font->thinLineWidth, p.s->font->cellSize.y }, + .color = r.color, + }; + }; if (r.lines.test(GridLines::Left)) { for (auto i = r.from; i < r.to; ++i) { - position.x = i * p.s->font->cellSize.x; - position.y = top; - size.x = p.s->font->thinLineWidth; - size.y = p.s->font->cellSize.y; - _appendQuad(position, size, r.color, ShadingType::SolidFill); + appendVerticalLine(i); } } if (r.lines.test(GridLines::Top)) { - position.x = left; - position.y = top; - size.x = width; - size.y = p.s->font->thinLineWidth; - _appendQuad(position, size, r.color, ShadingType::SolidFill); + appendHorizontalLine(0, p.s->font->thinLineWidth); } if (r.lines.test(GridLines::Right)) { for (auto i = r.to; i > r.from; --i) { - position.x = i * p.s->font->cellSize.x; - position.y = top; - size.x = p.s->font->thinLineWidth; - size.y = p.s->font->cellSize.y; - _appendQuad(position, size, r.color, ShadingType::SolidFill); + appendVerticalLine(i); } } if (r.lines.test(GridLines::Bottom)) { - position.x = left; - position.y = top + p.s->font->cellSize.y - p.s->font->thinLineWidth; - size.x = width; - size.y = p.s->font->thinLineWidth; - _appendQuad(position, size, r.color, ShadingType::SolidFill); + appendHorizontalLine(p.s->font->cellSize.y - p.s->font->thinLineWidth, p.s->font->thinLineWidth); } if (r.lines.test(GridLines::Underline)) { - position.x = left; - position.y = top + p.s->font->underlinePos; - size.x = width; - size.y = p.s->font->underlineWidth; - _appendQuad(position, size, r.color, ShadingType::SolidFill); + appendHorizontalLine(p.s->font->underlinePos, p.s->font->underlineWidth); } if (r.lines.test(GridLines::HyperlinkUnderline)) { - position.x = left; - position.y = top + p.s->font->underlinePos; - size.x = width; - size.y = p.s->font->underlineWidth; - _appendQuad(position, size, r.color, ShadingType::DashedLine); + appendHorizontalLine(p.s->font->underlinePos, p.s->font->underlineWidth); } if (r.lines.test(GridLines::DoubleUnderline)) { - position.x = left; - position.y = top + p.s->font->doubleUnderlinePos.x; - size.x = width; - size.y = p.s->font->thinLineWidth; - _appendQuad(position, size, r.color, ShadingType::SolidFill); - - position.x = left; - position.y = top + p.s->font->doubleUnderlinePos.y; - size.x = width; - size.y = p.s->font->thinLineWidth; - _appendQuad(position, size, r.color, ShadingType::SolidFill); + appendHorizontalLine(p.s->font->doubleUnderlinePos.x, p.s->font->thinLineWidth); + appendHorizontalLine(p.s->font->doubleUnderlinePos.y, p.s->font->thinLineWidth); } if (r.lines.test(GridLines::Strikethrough)) { - position.x = left; - position.y = top + p.s->font->strikethroughPos; - size.x = width; - size.y = p.s->font->strikethroughWidth; - _appendQuad(position, size, r.color, ShadingType::SolidFill); + appendHorizontalLine(p.s->font->strikethroughPos, p.s->font->strikethroughWidth); } } } @@ -1535,7 +1505,7 @@ void BackendD3D::_drawCursorPart1(const RenderingPayload& p) } const auto cursorColor = p.s->cursor->cursorColor; - const auto offset = p.cursorRect.top * static_cast(p.s->cellCount.x); + const auto offset = p.cursorRect.top * p.backgroundBitmapStride; for (auto x1 = p.cursorRect.left; x1 < p.cursorRect.right; ++x1) { @@ -1620,7 +1590,12 @@ void BackendD3D::_drawCursorPart1(const RenderingPayload& p) { for (auto& c : _cursorRects) { - _appendQuad(c.position, c.size, c.color, ShadingType::SolidFill); + _appendQuad() = { + .shadingType = ShadingType::SolidFill, + .position = c.position, + .size = c.size, + .color = c.color, + }; c.color = 0xffffffff; } } @@ -1642,7 +1617,12 @@ void BackendD3D::_drawCursorPart2(const RenderingPayload& p) for (const auto& c : _cursorRects) { - _appendQuad(c.position, c.size, c.color, ShadingType::SolidFill); + _appendQuad() = { + .shadingType = ShadingType::SolidFill, + .position = c.position, + .size = c.size, + .color = c.color, + }; } if (color == 0xffffffff) @@ -1665,19 +1645,22 @@ void BackendD3D::_drawSelection(const RenderingPayload& p) // The way this is implemented isn't very smart, but we also don't have very many rows to iterate through. if (row->selectionFrom == lastFrom && row->selectionTo == lastTo) { - _getLastQuad().sizeY += p.s->font->cellSize.y; + _getLastQuad().size.y += p.s->font->cellSize.y; } else { - const i16x2 position{ - p.s->font->cellSize.x * row->selectionFrom, - p.s->font->cellSize.y * y, + _appendQuad() = { + .shadingType = ShadingType::SolidFill, + .position = { + p.s->font->cellSize.x * row->selectionFrom, + p.s->font->cellSize.y * y, + }, + .size = { + static_cast(p.s->font->cellSize.x * (row->selectionTo - row->selectionFrom)), + p.s->font->cellSize.y, + }, + .color = p.s->misc->selectionColor, }; - const u16x2 size{ - (p.s->font->cellSize.x * (row->selectionTo - row->selectionFrom)), - p.s->font->cellSize.y, - }; - _appendQuad(position, size, p.s->misc->selectionColor, ShadingType::SolidFill); lastFrom = row->selectionFrom; lastTo = row->selectionTo; } @@ -1697,16 +1680,18 @@ void BackendD3D::_debugShowDirty(const RenderingPayload& p) { if (const auto& rect = _presentRects[i]) { - const i16x2 position{ - static_cast(rect.left), - static_cast(rect.top), - }; - const u16x2 size{ - static_cast(rect.right - rect.left), - static_cast(rect.bottom - rect.top), + _appendQuad() = { + .shadingType = ShadingType::SolidFill, + .position = { + static_cast(rect.left), + static_cast(rect.top), + }, + .size = { + static_cast(rect.right - rect.left), + static_cast(rect.bottom - rect.top), + }, + .color = colorbrewer::pastel1[i] | 0x1f000000, }; - const auto color = colorbrewer::pastel1[i] | 0x1f000000; - _appendQuad(position, size, color, ShadingType::SolidFill); } } } diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index a77141b49c8..723b9255940 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -68,24 +68,26 @@ namespace Microsoft::Console::Render::Atlas // NOTE: Don't initialize any members in this struct. This ensures that no // zero-initialization needs to occur when we allocate large buffers of this object. - struct alignas(u32) QuadInstance + struct QuadInstance { // `position` might clip outside of the bounds of the viewport and so it needs to be a // signed coordinate. i16x2 is used as the size of the instance buffer made the largest // impact on performance and power draw. If (when?) displays with >32k resolution make their // appearance in the future, this should be changed to f32x2. But if you do so, please change // all other occurrences of i16x2 positions/offsets throughout the class to keep it consistent. - i16 positionX; - i16 positionY; - - u16 sizeX; - u16 sizeY; - - u16 texcoordX; - u16 texcoordY; + alignas(u32) ShadingType shadingType; + alignas(u32) i16x2 position; + alignas(u32) u16x2 size; + alignas(u32) u16x2 texcoord; + alignas(u32) u32 color; + }; + struct alignas(u32) AtlasGlyphEntryData + { ShadingType shadingType; - u32 color; + i16x2 offset; + u16x2 size; + u16x2 texcoord; }; // NOTE: Don't initialize any members in this struct. This ensures that no @@ -95,7 +97,8 @@ namespace Microsoft::Console::Render::Atlas u16 glyphIndex; // All data in QuadInstance is u32-aligned anyways, so this simultaneously serves as padding. u16 _occupied; - QuadInstance data; + + AtlasGlyphEntryData data; }; // This exists so that we can look up a AtlasFontFaceEntry without AddRef()/Release()ing fontFace first. @@ -159,8 +162,7 @@ namespace Microsoft::Console::Render::Atlas void _resetGlyphAtlas(const RenderingPayload& p); void _markStateChange(ID3D11BlendState* blendState); QuadInstance& _getLastQuad() noexcept; - void _appendQuad(i16x2 position, u16x2 size, u32 color, ShadingType shadingType); - void _appendQuad(i16x2 position, u16x2 size, u16x2 texcoord, u32 color, ShadingType shadingType); + QuadInstance& _appendQuad(); __declspec(noinline) void _bumpInstancesSize(); void _flushQuads(const RenderingPayload& p); __declspec(noinline) void _recreateInstanceBuffers(const RenderingPayload& p); @@ -242,8 +244,8 @@ namespace Microsoft::Console::Render::Atlas til::generation_t _generation; til::generation_t _fontGeneration; til::generation_t _miscGeneration; - u16x2 _targetSize; - u16x2 _cellCount; + u16x2 _targetSize{}; + u16x2 _cellCount{}; ShadingType _textShadingType = ShadingType::Default; // An empty-box cursor spanning a wide glyph that has different @@ -252,7 +254,7 @@ namespace Microsoft::Console::Render::Atlas { i16x2 position; u16x2 size; - u32 color = 0; + u32 color; }; til::small_vector _cursorRects; diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 27ea6f515f4..4cec1a6775c 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -56,8 +56,10 @@ namespace Microsoft::Console::Render::Atlas template struct vec2 { - T x{}; - T y{}; + // These members aren't zero-initialized to make these trivial types, + // and allow the compiler to quickly memset() allocations, etc. + T x; + T y; ATLAS_POD_OPS(vec2) }; @@ -65,10 +67,12 @@ namespace Microsoft::Console::Render::Atlas template struct vec4 { - T x{}; - T y{}; - T z{}; - T w{}; + // These members aren't zero-initialized to make these trivial types, + // and allow the compiler to quickly memset() allocations, etc. + T x; + T y; + T z; + T w; ATLAS_POD_OPS(vec4) }; @@ -76,38 +80,44 @@ namespace Microsoft::Console::Render::Atlas template struct rect { - T left{}; - T top{}; - T right{}; - T bottom{}; + // These members aren't zero-initialized to make these trivial types, + // and allow the compiler to quickly memset() allocations, etc. + T left; + T top; + T right; + T bottom; ATLAS_POD_OPS(rect) constexpr bool empty() const noexcept { - return (left >= right) || (top >= bottom); + return left >= right || top >= bottom; } constexpr bool non_empty() const noexcept { - return (left < right) && (top < bottom); + return left < right && top < bottom; } }; template struct range { - T start{}; - T end{}; + T start; + T end; ATLAS_POD_OPS(range) + + constexpr bool contains(T v) const noexcept + { + return v >= start && v < end; + } }; using u8 = uint8_t; using u16 = uint16_t; using u16x2 = vec2; - using u16x4 = vec4; using u16r = rect; using i16 = int16_t; @@ -316,7 +326,7 @@ namespace Microsoft::Console::Render::Atlas u16 underlineWidth = 0; u16 strikethroughPos = 0; u16 strikethroughWidth = 0; - u16x2 doubleUnderlinePos; + u16x2 doubleUnderlinePos{}; u16 thinLineWidth = 0; u16 dpi = 96; AntialiasingMode antialiasingMode = DefaultAntialiasingMode; @@ -348,8 +358,8 @@ namespace Microsoft::Console::Render::Atlas til::generational font; til::generational cursor; til::generational misc; - u16x2 targetSize; - u16x2 cellCount; + u16x2 targetSize{}; + u16x2 cellCount{}; }; using GenerationalSettings = til::generational; @@ -448,6 +458,9 @@ namespace Microsoft::Console::Render::Atlas Buffer unorderedRows; // This is used as a scratch buffer during scrolling. Buffer rowsScratch; + // This contains the rows in the right order from row 0 to N. + // They get rotated around when we scroll the buffer. Technically + // we could also implement scrolling by using a circular array. Buffer rows; // This stride (width) of the backgroundBitmap is a "count" of u32 and not in bytes. size_t backgroundBitmapStride = 0; @@ -455,11 +468,13 @@ namespace Microsoft::Console::Render::Atlas // 1 ensures that the backends redraw the background, even if the background is // entirely black, just like `backgroundBitmap` is all back after it gets created. til::generation_t backgroundBitmapGeneration{ 1 }; - - u16r cursorRect; - + // In columns/rows. + til::rect cursorRect; + // In pixel. til::rect dirtyRectInPx; - u16x2 invalidatedRows; + // In rows. + range invalidatedRows{}; + // In pixel. i16 scrollOffset = 0; void MarkAllAsDirty() noexcept diff --git a/src/renderer/atlas/shader_common.hlsl b/src/renderer/atlas/shader_common.hlsl index 3183d299585..878b7c21a84 100644 --- a/src/renderer/atlas/shader_common.hlsl +++ b/src/renderer/atlas/shader_common.hlsl @@ -13,10 +13,10 @@ struct VSData { float2 vertex : SV_Position; + uint shadingType : shadingType; int2 position : position; uint2 size : size; uint2 texcoord : texcoord; - uint shadingType : shadingType; float4 color : color; }; From f8f0ea13291c5a6aa02c262c11a4c6b775522d54 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 4 Apr 2023 01:01:38 +0200 Subject: [PATCH 24/37] Fix background color alpha --- src/renderer/atlas/Backend.h | 10 ++++++++++ src/renderer/atlas/BackendD3D.cpp | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 289bb35a3ab..f89e70d41a8 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -86,6 +86,16 @@ namespace Microsoft::Console::Render::Atlas return { r, g, b, a }; } + template + constexpr T colorFromU32Premultiply(u32 rgba) + { + const auto r = static_cast((rgba >> 0) & 0xff) / 255.0f; + const auto g = static_cast((rgba >> 8) & 0xff) / 255.0f; + const auto b = static_cast((rgba >> 16) & 0xff) / 255.0f; + const auto a = static_cast((rgba >> 24) & 0xff) / 255.0f; + return { r * a, g * a, b * a, a }; + } + constexpr u32 u32ColorPremultiply(u32 rgba) { auto rb = rgba & 0x00ff00ff; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 1243225a48e..7548b84f518 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -594,7 +594,7 @@ void BackendD3D::_recreateConstBuffer(const RenderingPayload& p) const } { PSConstBuffer data{}; - data.backgroundColor = colorFromU32(p.s->misc->backgroundColor); + data.backgroundColor = colorFromU32Premultiply(p.s->misc->backgroundColor); data.cellSize = { static_cast(p.s->font->cellSize.x), static_cast(p.s->font->cellSize.y) }; data.cellCount = { static_cast(p.s->cellCount.x), static_cast(p.s->cellCount.y) }; DWrite_GetGammaRatios(_gamma, data.gammaRatios); @@ -1723,7 +1723,7 @@ void BackendD3D::_executeCustomShader(RenderingPayload& p) static_cast(_cellCount.x * p.s->font->cellSize.x), static_cast(_cellCount.y * p.s->font->cellSize.y), }, - .background = colorFromU32(p.s->misc->backgroundColor), + .background = colorFromU32Premultiply(p.s->misc->backgroundColor), }; D3D11_MAPPED_SUBRESOURCE mapped{}; From 7f1707b7ac30e368b7bdef84d2d40c462d9c827b Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 4 Apr 2023 21:30:49 +0200 Subject: [PATCH 25/37] Integrate changes to linear_flat_set from main --- src/renderer/atlas/BackendD2D.cpp | 65 +++++----------- src/renderer/atlas/BackendD2D.h | 17 +++++ src/renderer/atlas/BackendD3D.cpp | 118 ++++++++---------------------- src/renderer/atlas/BackendD3D.h | 55 +++++++++----- 4 files changed, 103 insertions(+), 152 deletions(-) diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 624302bc436..ba283d07b6f 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -23,52 +23,19 @@ TIL_FAST_MATH_BEGIN using namespace Microsoft::Console::Render::Atlas; -namespace til +template<> +struct ::std::hash { - template<> - struct flat_set_trait + constexpr size_t operator()(u32 key) const noexcept { - using T = BackendD2D::CachedBrush; - - static constexpr size_t hash(u32 key) noexcept - { - return flat_set_hash_integer(key); - } - - static constexpr size_t hash(const T& slot) noexcept - { - return flat_set_hash_integer(slot.color); - } - - static constexpr bool equals(const T& slot, u32 key) - { - return slot.color == key; - } - - static bool empty(const T& slot) - { - return !slot.brush; - } - - static constexpr void fill(T& slot, u32 key) - { - slot.color = key; - } - - static std::unique_ptr allocate(size_t capacity) - { - return std::make_unique(capacity); - } + return til::flat_set_hash_integer(key); + } - static void clear(T* data, size_t capacity) noexcept - { - for (auto& slot : std::span{ data, capacity }) - { - slot.brush.reset(); - } - } - }; -} + constexpr size_t operator()(const BackendD2D::CachedBrush& slot) const noexcept + { + return til::flat_set_hash_integer(slot.color); + } +}; BackendD2D::BackendD2D(wil::com_ptr device, wil::com_ptr deviceContext) noexcept : _device{ std::move(device) }, @@ -155,7 +122,7 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) _renderTarget->SetUnitMode(D2D1_UNIT_MODE_PIXELS); _renderTarget->SetAntialiasMode(D2D1_ANTIALIAS_MODE_ALIASED); } - _brushes.clear(); + _clearBrushes(); } if (!_dottedStrokeStyle) @@ -777,7 +744,7 @@ ID2D1Brush* BackendD2D::_brushWithColor(u32 color) { if (_brushes.size() >= 16) { - _brushes.clear(); + _clearBrushes(); } const auto [cached, inserted] = _brushes.insert(color); @@ -790,6 +757,14 @@ ID2D1Brush* BackendD2D::_brushWithColor(u32 color) return cached.brush.get(); } +void BackendD2D::_clearBrushes() +{ + for (auto& slot : _brushes.container()) + { + slot.brush.reset(); + } +} + void BackendD2D::_fillRectangle(const D2D1_RECT_F& rect, u32 color) { const auto brush = _brushWithColor(color); diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index a2cc545ed04..838d56a6a9f 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -21,6 +21,22 @@ namespace Microsoft::Console::Render::Atlas { wil::com_ptr brush; u32 color; + + constexpr bool operator==(u32 key) const noexcept + { + return color == key; + } + + operator bool() const noexcept + { + return static_cast(brush); + } + + constexpr CachedBrush& operator=(u32 key) noexcept + { + color = key; + return *this; + } }; private: @@ -41,6 +57,7 @@ namespace Microsoft::Console::Render::Atlas void _debugShowDirty(const RenderingPayload& p); void _debugDumpRenderTarget(const RenderingPayload& p); ID2D1Brush* _brushWithColor(u32 color); + __declspec(noinline) void _clearBrushes(); void _fillRectangle(const D2D1_RECT_F& rect, u32 color); SwapChainManager _swapChainManager; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 7548b84f518..e8fd2dbd7c0 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -37,100 +37,36 @@ TIL_FAST_MATH_BEGIN using namespace Microsoft::Console::Render::Atlas; -namespace til +template<> +struct ::std::hash { - template<> - struct flat_set_trait + constexpr size_t operator()(u16 key) const noexcept { - using T = BackendD3D::AtlasGlyphEntry; - - static constexpr size_t hash(u16 key) noexcept - { - return flat_set_hash_integer(key); - } - - static constexpr size_t hash(const T& slot) noexcept - { - return flat_set_hash_integer(slot.glyphIndex); - } - - static constexpr bool equals(const T& slot, u16 key) - { - return slot.glyphIndex == key; - } - - static constexpr bool empty(const T& slot) - { - return !slot._occupied; - } - - static constexpr void fill(T& slot, u16 key) - { - slot.glyphIndex = key; - slot._occupied = 1; - } - - static std::unique_ptr allocate(size_t capacity) - { - return std::make_unique(capacity); - } - - static void clear(T* data, size_t capacity) noexcept - { - memset(data, 0, capacity * sizeof(T)); - } - }; + return til::flat_set_hash_integer(key); + } - template<> - struct flat_set_trait + constexpr size_t operator()(const BackendD3D::AtlasGlyphEntry& slot) const noexcept { - using T = BackendD3D::AtlasFontFaceEntry; - - static size_t hash(const BackendD3D::AtlasFontFaceKey& key) noexcept - { - return flat_set_hash_integer(std::bit_cast(key.fontFace) | static_cast(key.lineRendition)); - } - - static size_t hash(const T& slot) noexcept - { - const auto& inner = *slot.inner; - return flat_set_hash_integer(std::bit_cast(inner.fontFace.get()) | static_cast(inner.lineRendition)); - } - - static bool equals(const T& slot, const BackendD3D::AtlasFontFaceKey& key) noexcept - { - const auto& inner = *slot.inner; - return inner.fontFace.get() == key.fontFace && inner.lineRendition == key.lineRendition; - } - - static bool empty(const T& slot) noexcept - { - return !slot.inner; - } - - static void fill(T& slot, const BackendD3D::AtlasFontFaceKey& key) - { - slot.inner = std::make_unique(); + return til::flat_set_hash_integer(slot.glyphIndex); + } +}; - auto& inner = *slot.inner; - inner.fontFace = key.fontFace; - inner.lineRendition = key.lineRendition; - } +template<> +struct ::std::hash +{ + using T = BackendD3D::AtlasFontFaceEntry; - static std::unique_ptr allocate(size_t capacity) - { - return std::make_unique(capacity); - } + size_t operator()(const BackendD3D::AtlasFontFaceKey& key) const noexcept + { + return til::flat_set_hash_integer(std::bit_cast(key.fontFace) | static_cast(key.lineRendition)); + } - static void clear(T* data, size_t capacity) noexcept - { - for (auto& slot : std::span{ data, capacity }) - { - slot.inner.reset(); - } - } - }; -} + size_t operator()(const BackendD3D::AtlasFontFaceEntry& slot) const noexcept + { + const auto& inner = *slot.inner; + return til::flat_set_hash_integer(std::bit_cast(inner.fontFace.get()) | static_cast(inner.lineRendition)); + } +}; BackendD3D::BackendD3D(wil::com_ptr device, wil::com_ptr deviceContext) : _device{ std::move(device) }, @@ -840,9 +776,13 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) _rectPackerData = Buffer{ u }; } - _glyphAtlasMap.clear(); stbrp_init_target(&_rectPacker, u, v, _rectPackerData.data(), _rectPackerData.size()); + for (auto& slot : _glyphAtlasMap.container()) + { + slot.inner.reset(); + } + _d2dBeginDrawing(); _d2dRenderTarget->Clear(); } @@ -1364,7 +1304,7 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFa void BackendD3D::_drawGlyphPrepareRetry(const RenderingPayload& p) { - THROW_HR_IF_MSG(E_UNEXPECTED, _glyphAtlasMap.load() == 0, "BackendD3D::_drawGlyph deadlock"); + THROW_HR_IF_MSG(E_UNEXPECTED, _glyphAtlasMap.empty(), "BackendD3D::_drawGlyph deadlock"); _d2dEndDrawing(); _flushQuads(p); _resetGlyphAtlas(p); diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 723b9255940..a5899fcf656 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -99,6 +99,23 @@ namespace Microsoft::Console::Render::Atlas u16 _occupied; AtlasGlyphEntryData data; + + constexpr bool operator==(u16 key) const noexcept + { + return glyphIndex == key; + } + + constexpr operator bool() const noexcept + { + return _occupied != 0; + } + + constexpr AtlasGlyphEntry& operator=(u16 key) noexcept + { + glyphIndex = key; + _occupied = 1; + return *this; + } }; // This exists so that we can look up a AtlasFontFaceEntry without AddRef()/Release()ing fontFace first. @@ -108,24 +125,6 @@ namespace Microsoft::Console::Render::Atlas LineRendition lineRendition; }; - // Just... uh... turn around and pretend you don't see this. - // This stuffs (or extracts, below) a pointer and the line rendition into a single pointer. This works because in C (and COM) - // the minimum heap allocation alignment is at least 8 (the size of a double) and so the lowest 4 bit are free real estate. - // - // I'm doing this because it shrinks the size of AtlasFontFaceEntry by a third and simplifies - // both the hashing and comparison code for the hashmap lookup from the POV of the CPU. - static constexpr uintptr_t combineAtlasFontFaceKey(IDWriteFontFace2* fontFace, LineRendition lineRendition) noexcept - { - const auto p = std::bit_cast(fontFace); - assert((p & 7) == 0); - return p | static_cast(lineRendition); - } - - static constexpr IDWriteFontFace2* extractAtlasFontFaceKey(uintptr_t c) noexcept - { - return std::bit_cast(c & ~7); - } - struct AtlasFontFaceEntryInner { // BODGY: At the time of writing IDWriteFontFallback::MapCharacters returns the same IDWriteFontFace instance @@ -143,6 +142,26 @@ namespace Microsoft::Console::Render::Atlas // (which might resize the hashmap!), while the caller `_drawText` is holding onto `glyphs`. // If it wasn't heap allocated, all pointers into `linear_flat_set` would be invalidated. std::unique_ptr inner; + + bool operator==(const AtlasFontFaceKey& key) const noexcept + { + const auto& i = *inner; + return i.fontFace.get() == key.fontFace && i.lineRendition == key.lineRendition; + } + + operator bool() const noexcept + { + return static_cast(inner); + } + + AtlasFontFaceEntry& operator=(const AtlasFontFaceKey& key) + { + inner = std::make_unique(); + auto& i = *inner; + i.fontFace = key.fontFace; + i.lineRendition = key.lineRendition; + return *this; + } }; private: From 2602fa3983623e0b312c65e578e9987bee4ea162 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 4 Apr 2023 21:34:24 +0200 Subject: [PATCH 26/37] Fix AuditMode failures --- src/renderer/atlas/BackendD2D.cpp | 2 +- src/renderer/atlas/BackendD2D.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index ba283d07b6f..57e26bb71a7 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -757,7 +757,7 @@ ID2D1Brush* BackendD2D::_brushWithColor(u32 color) return cached.brush.get(); } -void BackendD2D::_clearBrushes() +void BackendD2D::_clearBrushes() const noexcept { for (auto& slot : _brushes.container()) { diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index 838d56a6a9f..5efb7e643a0 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -20,7 +20,7 @@ namespace Microsoft::Console::Render::Atlas struct CachedBrush { wil::com_ptr brush; - u32 color; + u32 color = 0; constexpr bool operator==(u32 key) const noexcept { @@ -57,7 +57,7 @@ namespace Microsoft::Console::Render::Atlas void _debugShowDirty(const RenderingPayload& p); void _debugDumpRenderTarget(const RenderingPayload& p); ID2D1Brush* _brushWithColor(u32 color); - __declspec(noinline) void _clearBrushes(); + __declspec(noinline) void _clearBrushes() const noexcept; void _fillRectangle(const D2D1_RECT_F& rect, u32 color); SwapChainManager _swapChainManager; From d9b66abe65d9fdd46fd6a3bf288736f886ec2aa7 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Thu, 6 Apr 2023 22:00:04 +0200 Subject: [PATCH 27/37] Some cleanup, Ligature per-cell coloring --- src/renderer/atlas/AtlasEngine.api.cpp | 56 ++++++------ src/renderer/atlas/AtlasEngine.cpp | 96 ++++++++++++-------- src/renderer/atlas/AtlasEngine.h | 1 - src/renderer/atlas/Backend.h | 2 +- src/renderer/atlas/BackendD2D.cpp | 10 +-- src/renderer/atlas/BackendD3D.cpp | 117 ++++++++++++++++--------- src/renderer/atlas/BackendD3D.h | 21 +++-- src/renderer/atlas/common.h | 37 ++++---- src/renderer/atlas/shader_common.hlsl | 1 + src/renderer/atlas/shader_ps.hlsl | 46 ++++++++-- tools/ConsoleTypes.natvis | 7 +- 11 files changed, 252 insertions(+), 142 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index f039f94ed50..66ec7f1b34b 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -184,8 +184,8 @@ constexpr HRESULT vec2_narrow(U x, U y, vec2& out) noexcept [[nodiscard]] HRESULT AtlasEngine::UpdateViewport(const til::inclusive_rect& srNewViewport) noexcept { const u16x2 cellCount{ - gsl::narrow_cast(srNewViewport.right - srNewViewport.left + 1), - gsl::narrow_cast(srNewViewport.bottom - srNewViewport.top + 1), + gsl::narrow_cast(std::max(1, srNewViewport.right - srNewViewport.left + 1)), + gsl::narrow_cast(std::max(1, srNewViewport.bottom - srNewViewport.top + 1)), }; if (_api.s->cellCount != cellCount) { @@ -537,9 +537,9 @@ void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fo // AtlasEngine::_recreateFontDependentResources() relies on these fields to // exist in this particular order in order to create appropriate default axes. - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_WEIGHT, NAN }); - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_ITALIC, NAN }); - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_SLANT, NAN }); + fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_WEIGHT, -1.0f }); + fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_ITALIC, -1.0f }); + fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_SLANT, -1.0f }); for (const auto& p : axes) { @@ -615,18 +615,6 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo DWRITE_FONT_METRICS metrics{}; fontFace->GetMetrics(&metrics); - // According to Wikipedia: - // > One em was traditionally defined as the width of the capital 'M' in the current typeface and point size, - // > because the 'M' was commonly cast the full-width of the square blocks [...] which are used in printing presses. - // Even today M is often the widest character in a font that supports ASCII. - // In the future a more robust solution could be written, until then this simple solution works for most cases. - static constexpr u32 codePoint = L'M'; - u16 glyphIndex; - THROW_IF_FAILED(fontFace->GetGlyphIndicesW(&codePoint, 1, &glyphIndex)); - - DWRITE_GLYPH_METRICS glyphMetrics{}; - THROW_IF_FAILED(fontFace->GetDesignGlyphMetrics(&glyphIndex, 1, &glyphMetrics)); - // Point sizes are commonly treated at a 72 DPI scale // (including by OpenType), whereas DirectWrite uses 96 DPI. // Since we want the height in px we multiply by the display's DPI. @@ -641,9 +629,23 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo const auto underlineThickness = static_cast(metrics.underlineThickness) * designUnitsPerPx; const auto strikethroughPosition = static_cast(-metrics.strikethroughPosition) * designUnitsPerPx; const auto strikethroughThickness = static_cast(metrics.strikethroughThickness) * designUnitsPerPx; - const auto advanceWidth = static_cast(glyphMetrics.advanceWidth) * designUnitsPerPx; const auto advanceHeight = ascent + descent + lineGap; + // We use the same character to determine the advance width as CSS for its "ch" unit ("0"). + // According to the CSS spec, if it's impossible to determine the advance width, + // it must be assumed to be 0.5em wide. em in CSS refers to the computed font-size. + auto advanceWidth = 0.5f * fontSizeInPx; + { + static constexpr u32 codePoint = '0'; + u16 glyphIndex; + if (SUCCEEDED(fontFace->GetGlyphIndicesW(&codePoint, 1, &glyphIndex))) + { + DWRITE_GLYPH_METRICS glyphMetrics{}; + THROW_IF_FAILED(fontFace->GetDesignGlyphMetrics(&glyphIndex, 1, &glyphMetrics, FALSE)); + advanceWidth = static_cast(glyphMetrics.advanceWidth) * designUnitsPerPx; + } + } + auto adjustedWidth = std::roundf(fontInfoDesired.GetCellWidth().Resolve(advanceWidth, dpi, fontSizeInPx, advanceWidth)); auto adjustedHeight = std::roundf(fontInfoDesired.GetCellHeight().Resolve(advanceHeight, dpi, fontSizeInPx, advanceWidth)); @@ -704,15 +706,15 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo { std::wstring fontName{ requestedFaceName }; const auto fontWeightU16 = gsl::narrow_cast(requestedWeight); - const auto baselineU16 = static_cast(baseline + 0.5f); + const auto baselineU16 = static_cast(lrintf(baseline)); const auto descenderU16 = gsl::narrow_cast(cellHeight - baselineU16); - const auto underlinePosU16 = static_cast(underlinePos + 0.5f); - const auto underlineWidthU16 = static_cast(underlineWidth + 0.5f); - const auto strikethroughPosU16 = static_cast(strikethroughPos + 0.5f); - const auto strikethroughWidthU16 = static_cast(strikethroughWidth + 0.5f); - const auto doubleUnderlinePosTopU16 = static_cast(doubleUnderlinePosTop + 0.5f); - const auto doubleUnderlinePosBottomU16 = static_cast(doubleUnderlinePosBottom + 0.5f); - const auto thinLineWidthU16 = static_cast(thinLineWidth + 0.5f); + const auto underlinePosU16 = static_cast(lrintf(underlinePos)); + const auto underlineWidthU16 = static_cast(lrintf(underlineWidth)); + const auto strikethroughPosU16 = static_cast(lrintf(strikethroughPos)); + const auto strikethroughWidthU16 = static_cast(lrintf(strikethroughWidth)); + const auto doubleUnderlinePosTopU16 = static_cast(lrintf(doubleUnderlinePosTop)); + const auto doubleUnderlinePosBottomU16 = static_cast(lrintf(doubleUnderlinePosBottom)); + const auto thinLineWidthU16 = static_cast(lrintf(thinLineWidth)); // NOTE: From this point onward no early returns or throwing code should exist, // as we might cause _api to be in an inconsistent state otherwise. @@ -734,5 +736,7 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo fontMetrics->doubleUnderlinePos.x = doubleUnderlinePosTopU16; fontMetrics->doubleUnderlinePos.y = doubleUnderlinePosBottomU16; fontMetrics->thinLineWidth = thinLineWidthU16; + fontMetrics->ligatureOverhangTriggerLeft = cellWidth / -2; + fontMetrics->ligatureOverhangTriggerRight = cellWidth + cellWidth / 2; } } diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index ee9f5fd57d9..9016f787a55 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -132,15 +132,30 @@ try // Scrolling the background bitmap is a lot easier because we can rely on memmove which works // with both forwards and backwards copying. It's a mystery why the STL doesn't have this. { - const auto beg = _p.backgroundBitmap.begin(); - const auto end = _p.backgroundBitmap.end(); - const auto src = beg - std::min(0, offset) * _p.backgroundBitmapStride; - const auto dst = beg + std::max(0, offset) * _p.backgroundBitmapStride; - const auto count = end - std::max(src, dst); - assert(dst >= beg && dst + count <= end); - assert(src >= beg && src + count <= end); - memmove(dst, src, count * sizeof(u32)); - _p.backgroundBitmapGeneration.bump(); + const auto srcOffset = std::max(0, -offset) * static_cast(_p.colorBitmapRowStride); + const auto dstOffset = std::max(0, offset) * static_cast(_p.colorBitmapRowStride); + const auto count = _p.colorBitmapDepthStride - std::max(srcOffset, dstOffset); + assert(dstOffset >= 0 && dstOffset + count <= _p.colorBitmapDepthStride); + assert(srcOffset >= 0 && srcOffset + count <= _p.colorBitmapDepthStride); + + auto src = _p.colorBitmap.data() + srcOffset; + auto dst = _p.colorBitmap.data() + dstOffset; + const auto bytes = count * sizeof(u32); + + for (size_t i = 0; i < 2; ++i) + { + // Avoid bumping the colorBitmapGeneration unless necessary. This approx. further halves + // the (already small) GPU load. This could easily be replaced with some custom SIMD + // to avoid going over the memory twice, but... that's a story for another day. + if (memcmp(dst, src, bytes) != 0) + { + memmove(dst, src, bytes); + _p.colorBitmapGenerations[i].bump(); + } + + src += _p.colorBitmapDepthStride; + dst += _p.colorBitmapDepthStride; + } } } @@ -277,7 +292,7 @@ try } const auto x = gsl::narrow_cast(clamp(coord.x, 0, _p.s->cellCount.x)); - auto column = x; + auto columnEnd = x; // Due to the current IRenderEngine interface (that wasn't refactored yet) we need to assemble // the current buffer line first as the remaining function operates on whole lines of text. @@ -287,34 +302,42 @@ try for (const auto& ch : cluster.GetText()) { _api.bufferLine.emplace_back(ch); - _api.bufferLineColumn.emplace_back(column); + _api.bufferLineColumn.emplace_back(columnEnd); } - column += gsl::narrow_cast(cluster.GetColumns()); + columnEnd += gsl::narrow_cast(cluster.GetColumns()); } - _api.bufferLineColumn.emplace_back(column); - } - - { - std::fill(_api.colorsForeground.begin() + x, _api.colorsForeground.begin() + column, _api.currentForeground); + _api.bufferLineColumn.emplace_back(columnEnd); } { const auto shift = _api.lineRendition >= LineRendition::DoubleWidth ? 1 : 0; - const auto backgroundRow = _p.backgroundBitmap.begin() + _p.backgroundBitmapStride * y; - auto it = backgroundRow + x; - const auto end = backgroundRow + (static_cast(column) << shift); - const auto bg = u32ColorPremultiply(_api.currentBackground); + const auto row = _p.colorBitmap.begin() + _p.colorBitmapRowStride * y; + auto beg = row + x; + auto end = row + (static_cast(columnEnd) << shift); + + const u32 colors[] = { + u32ColorPremultiply(_api.currentBackground), + u32ColorPremultiply(_api.currentForeground), + }; - for (; it != end; ++it) + for (size_t i = 0; i < 2; ++i) { - if (*it != bg) + const auto color = colors[i]; + + for (auto it = beg; it != end; ++it) { - _p.backgroundBitmapGeneration.bump(); - std::fill(it, end, bg); - break; + if (*it != color) + { + _p.colorBitmapGenerations[i].bump(); + std::fill(it, end, color); + break; + } } + + beg += _p.colorBitmapDepthStride; + end += _p.colorBitmapDepthStride; } } @@ -509,11 +532,11 @@ void AtlasEngine::_recreateFontDependentResources() const auto bold = (i & static_cast(FontRelevantAttributes::Bold)) != 0; const auto italic = (i & static_cast(FontRelevantAttributes::Italic)) != 0; // The wght axis defaults to the font weight. - fontAxisValues[0].value = bold ? DWRITE_FONT_WEIGHT_BOLD : (isnan(standardAxes[0].value) ? static_cast(_p.s->font->fontWeight) : standardAxes[0].value); + fontAxisValues[0].value = bold ? DWRITE_FONT_WEIGHT_BOLD : (standardAxes[0].value < 0 ? static_cast(_p.s->font->fontWeight) : standardAxes[0].value); // The ital axis defaults to 1 if this is italic and 0 otherwise. - fontAxisValues[1].value = italic ? 1.0f : (isnan(standardAxes[1].value) ? 0.0f : standardAxes[1].value); + fontAxisValues[1].value = italic ? 1.0f : (standardAxes[1].value < 0 ? 0.0f : standardAxes[1].value); // The slnt axis defaults to -12 if this is italic and 0 otherwise. - fontAxisValues[2].value = italic ? -12.0f : (isnan(standardAxes[2].value) ? 0.0f : standardAxes[2].value); + fontAxisValues[2].value = italic ? -12.0f : (standardAxes[2].value < 0 ? 0.0f : standardAxes[2].value); _api.textFormatAxes[i] = { fontAxisValues.data(), fontAxisValues.size() }; } } @@ -530,7 +553,6 @@ void AtlasEngine::_recreateCellCountDependentResources() _api.bufferLine = std::vector{}; _api.bufferLine.reserve(projectedTextSize); _api.bufferLineColumn.reserve(projectedTextSize + 1); - _api.colorsForeground = Buffer(_p.s->cellCount.x); _api.analysisResults = std::vector{}; _api.clusterMap = Buffer{ projectedTextSize }; @@ -548,9 +570,11 @@ void AtlasEngine::_recreateCellCountDependentResources() // and 40x (AMD) faster for allocations with an alignment of 32 or greater. // backgroundBitmapStride is a "count" of u32 and not in bytes, // so we round up to multiple of 8 because 8 * sizeof(u32) == 32. - _p.backgroundBitmapStride = (static_cast(_p.s->cellCount.x) + 7) & ~7; - _p.backgroundBitmap = Buffer(_p.backgroundBitmapStride * _p.s->cellCount.y); - memset(_p.backgroundBitmap.data(), 0, _p.backgroundBitmap.size() * sizeof(u32)); + _p.colorBitmapRowStride = (static_cast(_p.s->cellCount.x) + 7) & ~7; + _p.colorBitmapDepthStride = _p.colorBitmapRowStride * _p.s->cellCount.y; + _p.colorBitmap = Buffer(2 * _p.colorBitmapDepthStride); + + memset(_p.colorBitmap.data(), 0, _p.colorBitmap.size() * sizeof(u32)); auto it = _p.unorderedRows.data(); for (auto& r : _p.rows) @@ -614,9 +638,9 @@ void AtlasEngine::_flushBufferLine() for (size_t i = 0; i < complexityLength; ++i) { const auto col1 = _api.bufferLineColumn[idx + i + 0]; - const auto fg = _api.colorsForeground[col1]; const auto col2 = _api.bufferLineColumn[idx + i + 1]; const auto glyphAdvance = (col2 - col1) * _p.s->font->cellSize.x; + const auto fg = _p.colorBitmap[_p.colorBitmapDepthStride + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y + col1]; row.glyphIndices.emplace_back(_api.glyphIndices[i]); row.glyphAdvances.emplace_back(static_cast(glyphAdvance)); row.glyphOffsets.emplace_back(); @@ -810,7 +834,7 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng const auto col1 = _api.bufferLineColumn[a.textPosition + beg]; const auto col2 = _api.bufferLineColumn[a.textPosition + i]; - const auto fg = _api.colorsForeground[col1]; + const auto fg = _p.colorBitmap[_p.colorBitmapDepthStride + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y + col1]; const auto expectedAdvance = (col2 - col1) * _p.s->font->cellSize.x; f32 actualAdvance = 0; @@ -888,7 +912,7 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) row.glyphIndices.emplace_back(nowMappingSoftFont ? ch : _api.replacementCharacterGlyphIndex); row.glyphAdvances.emplace_back(static_cast(cols * _p.s->font->cellSize.x)); row.glyphOffsets.emplace_back(DWRITE_GLYPH_OFFSET{}); - row.colors.emplace_back(_api.colorsForeground[col1]); + row.colors.emplace_back(_p.colorBitmap[_p.colorBitmapDepthStride + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y + col1]); if (currentlyMappingSoftFont != nowMappingSoftFont) { diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index e84700aa32d..16590bdb26c 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -121,7 +121,6 @@ namespace Microsoft::Console::Render::Atlas std::vector bufferLine; std::vector bufferLineColumn; - Buffer colorsForeground; std::array, 4> textFormatAxes; std::vector analysisResults; diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index f89e70d41a8..06fd0a86026 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -12,7 +12,7 @@ namespace Microsoft::Console::Render::Atlas #define ATLAS_DEBUG_DISABLE_PARTIAL_INVALIDATION 0 // Redraw at display refresh rate at all times. This helps with shader debugging. -#define ATLAS_DEBUG_CONTINUOUS_REDRAW 0 +#define ATLAS_DEBUG_CONTINUOUS_REDRAW 1 // Disables the use of DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT. // This helps with benchmarking the application as it'll run beyond display refresh rate. diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 57e26bb71a7..ea11bfd7c73 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -177,10 +177,10 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) void BackendD2D::_drawBackground(const RenderingPayload& p) noexcept { - if (_backgroundBitmapGeneration != p.backgroundBitmapGeneration) + if (_backgroundBitmapGeneration != p.colorBitmapGenerations[0]) { - _backgroundBitmap->CopyFromMemory(nullptr, p.backgroundBitmap.data(), gsl::narrow_cast(p.backgroundBitmapStride * sizeof(u32))); - _backgroundBitmapGeneration = p.backgroundBitmapGeneration; + _backgroundBitmap->CopyFromMemory(nullptr, p.colorBitmap.data(), gsl::narrow_cast(p.colorBitmapRowStride * sizeof(u32))); + _backgroundBitmapGeneration = p.colorBitmapGenerations[0]; } // If the terminal was 120x30 cells and 1200x600 pixels large, this would draw the @@ -546,7 +546,7 @@ void BackendD2D::_drawCursorPart1(const RenderingPayload& p) _resizeCursorBitmap(p, cursorSize); } - const auto backgroundBitmapOffset = p.cursorRect.top * p.backgroundBitmapStride; + const auto backgroundBitmapOffset = p.cursorRect.top * p.colorBitmapRowStride; const auto cellSizeX = static_cast(p.s->font->cellSize.x); const auto cellSizeY = static_cast(p.s->font->cellSize.y); const auto offsetX = p.cursorRect.left * cellSizeX; @@ -562,7 +562,7 @@ void BackendD2D::_drawCursorPart1(const RenderingPayload& p) for (til::CoordType x = 0; x < cursorSize.width; ++x) { - const auto bg = p.backgroundBitmap[backgroundBitmapOffset + x]; + const auto bg = p.colorBitmap[backgroundBitmapOffset + x]; const auto brush = _brushWithColor(bg ^ 0x3f3f3f); srcRect.left = x * cellSizeX; srcRect.right = srcRect.left + cellSizeX; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index e8fd2dbd7c0..f11457ee048 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -301,7 +301,7 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) } if (cellCountChanged) { - _recreateBackgroundColorBitmap(p.s->cellCount); + _recreateColorBitmap(p.s->cellCount); } // Similar to _renderTargetView above, we might have to recreate the _customRenderTargetView whenever _swapChainManager @@ -493,15 +493,15 @@ void BackendD3D::_recreateCustomRenderTargetView(u16x2 targetSize) THROW_IF_FAILED(_device->CreateRenderTargetView(_customOffscreenTexture.get(), nullptr, _renderTargetView.addressof())); } -void BackendD3D::_recreateBackgroundColorBitmap(u16x2 cellCount) +void BackendD3D::_recreateColorBitmap(u16x2 cellCount) { // Avoid memory usage spikes by releasing memory first. - _backgroundBitmap.reset(); - _backgroundBitmapView.reset(); + _colorBitmap.reset(); + _colorBitmapView.reset(); const D3D11_TEXTURE2D_DESC desc{ .Width = cellCount.x, - .Height = cellCount.y, + .Height = cellCount.y * 2u, .MipLevels = 1, .ArraySize = 1, .Format = DXGI_FORMAT_R8G8B8A8_UNORM, @@ -510,9 +510,9 @@ void BackendD3D::_recreateBackgroundColorBitmap(u16x2 cellCount) .BindFlags = D3D11_BIND_SHADER_RESOURCE, .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, }; - THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _backgroundBitmap.addressof())); - THROW_IF_FAILED(_device->CreateShaderResourceView(_backgroundBitmap.get(), nullptr, _backgroundBitmapView.addressof())); - _backgroundBitmapGeneration = {}; + THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _colorBitmap.addressof())); + THROW_IF_FAILED(_device->CreateShaderResourceView(_colorBitmap.get(), nullptr, _colorBitmapView.addressof())); + _colorBitmapGenerations = {}; } void BackendD3D::_d2dRenderTargetUpdateFontSettings(const FontSettings& font) const noexcept @@ -562,7 +562,7 @@ void BackendD3D::_setupDeviceContextState(const RenderingPayload& p) _deviceContext->RSSetViewports(1, &viewport); // PS: Pixel Shader - ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; + ID3D11ShaderResourceView* resources[]{ _colorBitmapView.get(), _glyphAtlasView.get() }; _deviceContext->PSSetShader(_pixelShader.get(), nullptr, 0); _deviceContext->PSSetConstantBuffers(0, 1, _psConstantBuffer.addressof()); _deviceContext->PSSetShaderResources(0, 2, &resources[0]); @@ -770,7 +770,7 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _brush.put())); } - ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; + ID3D11ShaderResourceView* resources[]{ _colorBitmapView.get(), _glyphAtlasView.get() }; _deviceContext->PSSetShaderResources(0, 2, &resources[0]); _rectPackerData = Buffer{ u }; @@ -785,6 +785,8 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) _d2dBeginDrawing(); _d2dRenderTarget->Clear(); + + _fontChangedResetGlyphAtlas = false; } void BackendD3D::_markStateChange(ID3D11BlendState* blendState) @@ -834,6 +836,8 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) return; } + _uploadColorBitmap(p); + // TODO: Shrink instances buffer if (_instancesCount > _instanceBufferCapacity) { @@ -887,6 +891,37 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) _instancesCount = 0; } +void BackendD3D::_uploadColorBitmap(const RenderingPayload& p) +{ + // Not uploading the bitmap halves (!) the GPU load for any given frame. + // We don't need to upload if the background and foreground bitmaps are the same + // or when the _drawText() function determined that no glyph has the LigatureMarker, + // because then the pixel shader doesn't need to access the foreground bitmap anyways. + if (_colorBitmapGenerations[0] == p.colorBitmapGenerations[0] && + (_colorBitmapGenerations[1] == p.colorBitmapGenerations[1] || _skipForegroundBitmapUpload)) + { + return; + } + + D3D11_MAPPED_SUBRESOURCE mapped{}; + THROW_IF_FAILED(_deviceContext->Map(_colorBitmap.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + + auto src = std::bit_cast(&*p.colorBitmap.begin()); + const auto srcEnd = std::bit_cast(&*p.colorBitmap.end()); + const auto srcStride = p.colorBitmapRowStride * sizeof(u32); + auto dst = static_cast(mapped.pData); + + while (src < srcEnd) + { + memcpy(dst, src, srcStride); + src += srcStride; + dst += mapped.RowPitch; + } + + _deviceContext->Unmap(_colorBitmap.get(), 0); + _colorBitmapGenerations = p.colorBitmapGenerations; +} + void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) { // We use the viewport size of the terminal as the initial estimate for the amount of instances we'll see. @@ -922,27 +957,6 @@ void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) void BackendD3D::_drawBackground(const RenderingPayload& p) { - if (_backgroundBitmapGeneration != p.backgroundBitmapGeneration) - { - D3D11_MAPPED_SUBRESOURCE mapped{}; - THROW_IF_FAILED(_deviceContext->Map(_backgroundBitmap.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - - const auto srcStride = p.backgroundBitmapStride * sizeof(u32); -#pragma warning(suppress : 26490) // Don't use reinterpret_cast (type.1). - auto src = reinterpret_cast(p.backgroundBitmap.data()); - auto dst = static_cast(mapped.pData); - - for (size_t i = 0; i < p.s->cellCount.y; ++i) - { - memcpy(dst, src, srcStride); - src += srcStride; - dst += mapped.RowPitch; - } - - _deviceContext->Unmap(_backgroundBitmap.get(), 0); - _backgroundBitmapGeneration = p.backgroundBitmapGeneration; - } - _appendQuad() = { .shadingType = ShadingType::Background, .size = p.s->targetSize, @@ -953,10 +967,12 @@ void BackendD3D::_drawText(RenderingPayload& p) { if (_fontChangedResetGlyphAtlas) { - _fontChangedResetGlyphAtlas = false; _resetGlyphAtlas(p); } + auto shadingTypeAccumulator = ShadingType::Default; + _skipForegroundBitmapUpload = false; + til::CoordType dirtyTop = til::CoordTypeMax; til::CoordType dirtyBottom = til::CoordTypeMin; @@ -985,7 +1001,7 @@ void BackendD3D::_drawText(RenderingPayload& p) { const auto [glyphEntry, inserted] = fontFaceEntry.glyphs.insert(row->glyphIndices[x]); - if (inserted && !_drawGlyph(p, fontFaceEntry, glyphEntry)) + if (inserted && !_drawGlyph(p, row->glyphAdvances[x], fontFaceEntry, glyphEntry)) { // A deadlock in this retry loop is detected in _drawGlyphPrepareRetry. // @@ -1018,6 +1034,8 @@ void BackendD3D::_drawText(RenderingPayload& p) .texcoord = glyphEntry.data.texcoord, .color = row->colors[x], }; + + shadingTypeAccumulator |= glyphEntry.data.shadingType; } baselineX += row->glyphAdvances[x]; @@ -1041,9 +1059,11 @@ void BackendD3D::_drawText(RenderingPayload& p) } _d2dEndDrawing(); + + _skipForegroundBitmapUpload = WI_IsFlagClear(shadingTypeAccumulator, ShadingType::LigatureMarker); } -bool BackendD3D::_drawGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry) +bool BackendD3D::_drawGlyph(const RenderingPayload& p, f32 glyphAdvance, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry) { if (!fontFaceEntry.fontFace) { @@ -1134,11 +1154,15 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, const AtlasFontFaceEntryI t.m11 = 2.0f; t.m22 = lineRendition >= LineRendition::DoubleHeightTop ? 2.0f : 1.0f; _d2dRenderTarget->SetTransform(&t); + glyphAdvance *= 2; } const auto restoreTransform = wil::scope_exit([&]() noexcept { - static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1, .m22 = 1 }; - _d2dRenderTarget->SetTransform(&identity); + if (transform) + { + static constexpr D2D1_MATRIX_3X2_F identity{ .m11 = 1, .m22 = 1 }; + _d2dRenderTarget->SetTransform(&identity); + } }); // This calculates the black box of the glyph, or in other words, @@ -1197,8 +1221,19 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, const AtlasFontFaceEntryI _d2dBeginDrawing(); const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baselineOrigin, &glyphRun, _brush.get()); + auto shadingType = colorGlyph ? ShadingType::Passthrough : _textShadingType; + + // Ligatures are drawn with strict cell-wise foreground color, while other text allows colors to overhang + // their cells. This makes sure that italics and such retain their color and don't look "cut off". + // + // The former condition makes sure to exclude diacritics and such from being considered a ligature, + // while the latter condition-pair makes sure to exclude regular BMP wide glyphs that overlap a little. + if (rect.w >= p.s->font->cellSize.x && (bl <= p.s->font->ligatureOverhangTriggerLeft || br >= p.s->font->ligatureOverhangTriggerRight)) + { + shadingType |= ShadingType::LigatureMarker; + } - glyphEntry.data.shadingType = colorGlyph ? ShadingType::Passthrough : _textShadingType; + glyphEntry.data.shadingType = shadingType; glyphEntry.data.offset.x = bl; glyphEntry.data.offset.y = bt; glyphEntry.data.size.x = rect.w; @@ -1445,14 +1480,14 @@ void BackendD3D::_drawCursorPart1(const RenderingPayload& p) } const auto cursorColor = p.s->cursor->cursorColor; - const auto offset = p.cursorRect.top * p.backgroundBitmapStride; + const auto offset = p.cursorRect.top * p.colorBitmapRowStride; for (auto x1 = p.cursorRect.left; x1 < p.cursorRect.right; ++x1) { const auto x0 = x1; - const auto bg = p.backgroundBitmap[offset + x1] | 0xff000000; + const auto bg = p.colorBitmap[offset + x1] | 0xff000000; - for (; x1 < p.cursorRect.right && (p.backgroundBitmap[offset + x1] | 0xff000000) == bg; ++x1) + for (; x1 < p.cursorRect.right && (p.colorBitmap[offset + x1] | 0xff000000) == bg; ++x1) { } @@ -1714,7 +1749,7 @@ void BackendD3D::_executeCustomShader(RenderingPayload& p) _deviceContext->VSSetConstantBuffers(0, 1, _vsConstantBuffer.addressof()); // PS: Pixel Shader - ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; + ID3D11ShaderResourceView* resources[]{ _colorBitmapView.get(), _glyphAtlasView.get() }; _deviceContext->PSSetShader(_pixelShader.get(), nullptr, 0); _deviceContext->PSSetConstantBuffers(0, 1, _psConstantBuffer.addressof()); _deviceContext->PSSetShaderResources(0, 2, &resources[0]); diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index a5899fcf656..946e3ac05ab 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -64,7 +64,14 @@ namespace Microsoft::Console::Render::Atlas Passthrough = 3, DashedLine = 4, SolidFill = 5, + + // The shader normally uses per-glyph coloring for text, because this allows us to retain the color of + // glyphs even if they're slightly out of their cell's bounds. This improves the look of fonts with large descenders/ascenders, etc., in particular for complex Unicode text. + // The shader uses strict per-cell coloring for common coding ligatures like === + // because they're drawn as + LigatureMarker = 0x80000000, }; + ATLAS_FLAG_OPS(ShadingType, u32, friend); // NOTE: Don't initialize any members in this struct. This ensures that no // zero-initialization needs to occur when we allocate large buffers of this object. @@ -170,7 +177,7 @@ namespace Microsoft::Console::Render::Atlas void _recreateCustomShader(const RenderingPayload& p); void _recreateCustomRenderTargetView(u16x2 targetSize); void _d2dRenderTargetUpdateFontSettings(const FontSettings& font) const noexcept; - void _recreateBackgroundColorBitmap(u16x2 cellCount); + void _recreateColorBitmap(u16x2 cellCount); void _recreateConstBuffer(const RenderingPayload& p) const; void _setupDeviceContextState(const RenderingPayload& p); void _debugUpdateShaders(const RenderingPayload& p) noexcept; @@ -178,16 +185,17 @@ namespace Microsoft::Console::Render::Atlas void _debugDumpRenderTarget(const RenderingPayload& p); void _d2dBeginDrawing() noexcept; void _d2dEndDrawing(); - void _resetGlyphAtlas(const RenderingPayload& p); + __declspec(noinline) void _resetGlyphAtlas(const RenderingPayload& p); void _markStateChange(ID3D11BlendState* blendState); QuadInstance& _getLastQuad() noexcept; QuadInstance& _appendQuad(); __declspec(noinline) void _bumpInstancesSize(); void _flushQuads(const RenderingPayload& p); __declspec(noinline) void _recreateInstanceBuffers(const RenderingPayload& p); + void _uploadColorBitmap(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p); void _drawText(RenderingPayload& p); - __declspec(noinline) [[nodiscard]] bool _drawGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); + __declspec(noinline) [[nodiscard]] bool _drawGlyph(const RenderingPayload& p, f32 glyphAdvance, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); bool _drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); void _drawGlyphPrepareRetry(const RenderingPayload& p); void _splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); @@ -238,9 +246,10 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _customShaderSamplerState; std::chrono::steady_clock::time_point _customShaderStartTime; - wil::com_ptr _backgroundBitmap; - wil::com_ptr _backgroundBitmapView; - til::generation_t _backgroundBitmapGeneration; + wil::com_ptr _colorBitmap; + wil::com_ptr _colorBitmapView; + std::array _colorBitmapGenerations; + bool _skipForegroundBitmapUpload = false; wil::com_ptr _glyphAtlas; wil::com_ptr _glyphAtlasView; diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 4cec1a6775c..fd72fb950f6 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -12,32 +12,32 @@ namespace Microsoft::Console::Render::Atlas { -#define ATLAS_FLAG_OPS(type, underlying) \ - constexpr type operator~(type v) noexcept \ +#define ATLAS_FLAG_OPS(type, underlying, attr) \ + attr constexpr type operator~(type v) noexcept \ { \ return static_cast(~static_cast(v)); \ } \ - constexpr type operator|(type lhs, type rhs) noexcept \ + attr constexpr type operator|(type lhs, type rhs) noexcept \ { \ return static_cast(static_cast(lhs) | static_cast(rhs)); \ } \ - constexpr type operator&(type lhs, type rhs) noexcept \ + attr constexpr type operator&(type lhs, type rhs) noexcept \ { \ return static_cast(static_cast(lhs) & static_cast(rhs)); \ } \ - constexpr type operator^(type lhs, type rhs) noexcept \ + attr constexpr type operator^(type lhs, type rhs) noexcept \ { \ return static_cast(static_cast(lhs) ^ static_cast(rhs)); \ } \ - constexpr void operator|=(type& lhs, type rhs) noexcept \ + attr constexpr void operator|=(type& lhs, type rhs) noexcept \ { \ lhs = lhs | rhs; \ } \ - constexpr void operator&=(type& lhs, type rhs) noexcept \ + attr constexpr void operator&=(type& lhs, type rhs) noexcept \ { \ lhs = lhs & rhs; \ } \ - constexpr void operator^=(type& lhs, type rhs) noexcept \ + attr constexpr void operator^=(type& lhs, type rhs) noexcept \ { \ lhs = lhs ^ rhs; \ } @@ -330,6 +330,8 @@ namespace Microsoft::Console::Render::Atlas u16 thinLineWidth = 0; u16 dpi = 96; AntialiasingMode antialiasingMode = DefaultAntialiasingMode; + til::CoordType ligatureOverhangTriggerLeft = 0; + til::CoordType ligatureOverhangTriggerRight = 0; std::vector softFontPattern; til::size softFontCellSize; @@ -381,7 +383,7 @@ namespace Microsoft::Console::Render::Atlas Bold = 0b01, Italic = 0b10, }; - ATLAS_FLAG_OPS(FontRelevantAttributes, u8) + ATLAS_FLAG_OPS(FontRelevantAttributes, u8, ) struct FontMapping { @@ -462,12 +464,17 @@ namespace Microsoft::Console::Render::Atlas // They get rotated around when we scroll the buffer. Technically // we could also implement scrolling by using a circular array. Buffer rows; - // This stride (width) of the backgroundBitmap is a "count" of u32 and not in bytes. - size_t backgroundBitmapStride = 0; - Buffer backgroundBitmap; - // 1 ensures that the backends redraw the background, even if the background is - // entirely black, just like `backgroundBitmap` is all back after it gets created. - til::generation_t backgroundBitmapGeneration{ 1 }; + // This contains two viewport-sized bitmaps back to back, sort of like a Texture2DArray. + // The first NxM (for instance 120x30 pixel) chunk contains background colors and the + // second chunk contains foreground colors. The distance in u32 items between the start + // and the begin of the foreground bitmap is equal to colorBitmapDepthStride. + Buffer colorBitmap; + // This stride of the colorBitmap is a "count" of u32 and not in bytes. + size_t colorBitmapRowStride = 0; + size_t colorBitmapDepthStride = 0; + // A generation of 1 ensures that the backends redraw the background on the first Present(). + // The 1st entry in this array corresponds to the background and the 2nd to the foreground bitmap. + std::array colorBitmapGenerations{ 1, 1 }; // In columns/rows. til::rect cursorRect; // In pixel. diff --git a/src/renderer/atlas/shader_common.hlsl b/src/renderer/atlas/shader_common.hlsl index 878b7c21a84..78cb81119ae 100644 --- a/src/renderer/atlas/shader_common.hlsl +++ b/src/renderer/atlas/shader_common.hlsl @@ -8,6 +8,7 @@ #define SHADING_TYPE_PASSTHROUGH 3 #define SHADING_TYPE_DASHED_LINE 4 #define SHADING_TYPE_SOLID_FILL 5 +#define SHADING_TYPE_LIGATURE_MARKER 0x80000000 // clang-format on struct VSData diff --git a/src/renderer/atlas/shader_ps.hlsl b/src/renderer/atlas/shader_ps.hlsl index 169d47a085c..2b47068412f 100644 --- a/src/renderer/atlas/shader_ps.hlsl +++ b/src/renderer/atlas/shader_ps.hlsl @@ -30,21 +30,39 @@ Output main(PSData data) : SV_Target float4 color; float4 weights; - switch (data.shadingType) + switch (data.shadingType & ~SHADING_TYPE_LIGATURE_MARKER) { case SHADING_TYPE_TEXT_BACKGROUND: - float2 pos = data.texcoord / cellSize; - color = all(pos < cellCount) ? background[pos] : backgroundColor; + { + const float2 cell = data.position.xy / cellSize; + color = all(cell < cellCount) ? background[cell] : backgroundColor; weights = float4(1, 1, 1, 1); break; + } case SHADING_TYPE_TEXT_GRAYSCALE: { - // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things. - const float4 foreground = premultiplyColor(data.color); + // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things. Part 1... + float4 foreground = premultiplyColor(data.color); + + if (data.shadingType & SHADING_TYPE_LIGATURE_MARKER) + { + float2 cell = data.position.xy / cellSize; + cell.y += cellCount.y; + foreground = background[cell]; + + if (foreground.a == 0) + { + discard; + } + + data.color = float4(foreground.rgb / foreground.a, foreground.a); + } + + // ...Part 2: const float blendEnhancedContrast = DWrite_ApplyLightOnDarkContrastAdjustment(enhancedContrast, data.color.rgb); const float intensity = DWrite_CalcColorIntensity(data.color.rgb); // These aren't. - const float4 glyph = glyphAtlas[data.texcoord]; + float4 glyph = glyphAtlas[data.texcoord]; const float contrasted = DWrite_EnhanceContrast(glyph.a, blendEnhancedContrast); const float alphaCorrected = DWrite_ApplyAlphaCorrection(contrasted, intensity, gammaRatios); color = alphaCorrected * foreground; @@ -53,10 +71,24 @@ Output main(PSData data) : SV_Target } case SHADING_TYPE_TEXT_CLEARTYPE: { + if (data.shadingType & SHADING_TYPE_LIGATURE_MARKER) + { + float2 cell = data.position.xy / cellSize; + cell.y += cellCount.y; + data.color = background[cell]; + + if (data.color.a == 0) + { + discard; + } + + data.color.rgb /= data.color.a; + } + // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things. const float blendEnhancedContrast = DWrite_ApplyLightOnDarkContrastAdjustment(enhancedContrast, data.color.rgb); // These aren't. - const float4 glyph = glyphAtlas[data.texcoord]; + float4 glyph = glyphAtlas[data.texcoord]; const float3 contrasted = DWrite_EnhanceContrast3(glyph.rgb, blendEnhancedContrast); const float3 alphaCorrected = DWrite_ApplyAlphaCorrection3(contrasted, data.color.rgb, gammaRatios); weights = float4(alphaCorrected * data.color.a, 1); diff --git a/tools/ConsoleTypes.natvis b/tools/ConsoleTypes.natvis index ea98f20d572..79a2fe7abd0 100644 --- a/tools/ConsoleTypes.natvis +++ b/tools/ConsoleTypes.natvis @@ -107,10 +107,9 @@ - {{ gen={_generation._value}, {_value} }} - - _value - + + &_value + {{ generation={_generation._value} }} From da93dbd8068694028fcb6310960f64a65c51e5b1 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 7 Apr 2023 18:11:46 +0200 Subject: [PATCH 28/37] Improve vertical coloring of overhangs, Fix hyperlink underline --- src/renderer/atlas/AtlasEngine.api.cpp | 9 +- src/renderer/atlas/AtlasEngine.cpp | 38 ++-- src/renderer/atlas/AtlasEngine.h | 4 +- src/renderer/atlas/Backend.h | 2 +- src/renderer/atlas/BackendD2D.cpp | 20 +-- src/renderer/atlas/BackendD2D.h | 6 +- src/renderer/atlas/BackendD3D.cpp | 233 ++++++++++++++++--------- src/renderer/atlas/BackendD3D.h | 43 ++--- src/renderer/atlas/common.h | 39 +++-- src/renderer/atlas/shader_common.hlsl | 3 +- src/renderer/atlas/shader_ps.hlsl | 42 +---- 11 files changed, 255 insertions(+), 184 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index 66ec7f1b34b..2ec2f49f0d4 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -705,9 +705,10 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo if (fontMetrics) { std::wstring fontName{ requestedFaceName }; - const auto fontWeightU16 = gsl::narrow_cast(requestedWeight); + const auto fontWeightU16 = static_cast(requestedWeight); + const auto advanceWidthU16 = static_cast(lrintf(advanceWidth)); const auto baselineU16 = static_cast(lrintf(baseline)); - const auto descenderU16 = gsl::narrow_cast(cellHeight - baselineU16); + const auto descenderU16 = static_cast(cellHeight - baselineU16); const auto underlinePosU16 = static_cast(lrintf(underlinePos)); const auto underlineWidthU16 = static_cast(lrintf(underlineWidth)); const auto strikethroughPosU16 = static_cast(lrintf(strikethroughPos)); @@ -723,10 +724,10 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo fontMetrics->fontFamily = std::move(fontFamily); fontMetrics->fontName = std::move(fontName); fontMetrics->fontSize = fontSizeInPx; - fontMetrics->advanceScale = cellWidth / advanceWidth; fontMetrics->cellSize.x = cellWidth; fontMetrics->cellSize.y = cellHeight; fontMetrics->fontWeight = fontWeightU16; + fontMetrics->advanceWidth = advanceWidthU16; fontMetrics->baseline = baselineU16; fontMetrics->descender = descenderU16; fontMetrics->underlinePos = underlinePosU16; @@ -736,7 +737,5 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo fontMetrics->doubleUnderlinePos.x = doubleUnderlinePosTopU16; fontMetrics->doubleUnderlinePos.y = doubleUnderlinePosBottomU16; fontMetrics->thinLineWidth = thinLineWidthU16; - fontMetrics->ligatureOverhangTriggerLeft = cellWidth / -2; - fontMetrics->ligatureOverhangTriggerRight = cellWidth + cellWidth / 2; } } diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 9016f787a55..eb298fcd36c 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -284,6 +284,9 @@ try _flushBufferLine(); } + const auto x = gsl::narrow_cast(clamp(coord.x, 0, _p.s->cellCount.x)); + auto columnEnd = x; + // _api.bufferLineColumn contains 1 more item than _api.bufferLine, as it represents the // past-the-end index. It'll get appended again later once we built our new _api.bufferLine. if (!_api.bufferLineColumn.empty()) @@ -291,9 +294,6 @@ try _api.bufferLineColumn.pop_back(); } - const auto x = gsl::narrow_cast(clamp(coord.x, 0, _p.s->cellCount.x)); - auto columnEnd = x; - // Due to the current IRenderEngine interface (that wasn't refactored yet) we need to assemble // the current buffer line first as the remaining function operates on whole lines of text. { @@ -304,7 +304,6 @@ try _api.bufferLine.emplace_back(ch); _api.bufferLineColumn.emplace_back(columnEnd); } - columnEnd += gsl::narrow_cast(cluster.GetColumns()); } @@ -319,7 +318,7 @@ try const u32 colors[] = { u32ColorPremultiply(_api.currentBackground), - u32ColorPremultiply(_api.currentForeground), + _api.currentForeground, }; for (size_t i = 0; i < 2; ++i) @@ -572,7 +571,9 @@ void AtlasEngine::_recreateCellCountDependentResources() // so we round up to multiple of 8 because 8 * sizeof(u32) == 32. _p.colorBitmapRowStride = (static_cast(_p.s->cellCount.x) + 7) & ~7; _p.colorBitmapDepthStride = _p.colorBitmapRowStride * _p.s->cellCount.y; - _p.colorBitmap = Buffer(2 * _p.colorBitmapDepthStride); + _p.colorBitmap = Buffer(_p.colorBitmapDepthStride * 2); + _p.backgroundBitmap = { _p.colorBitmap.data(), _p.colorBitmapDepthStride }; + _p.foregroundBitmap = { _p.colorBitmap.data() + _p.colorBitmapDepthStride, _p.colorBitmapDepthStride }; memset(_p.colorBitmap.data(), 0, _p.colorBitmap.size() * sizeof(u32)); @@ -635,12 +636,14 @@ void AtlasEngine::_flushBufferLine() if (isTextSimple) { + const auto colors = _p.foregroundBitmap.begin() + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y; + for (size_t i = 0; i < complexityLength; ++i) { const auto col1 = _api.bufferLineColumn[idx + i + 0]; const auto col2 = _api.bufferLineColumn[idx + i + 1]; const auto glyphAdvance = (col2 - col1) * _p.s->font->cellSize.x; - const auto fg = _p.colorBitmap[_p.colorBitmapDepthStride + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y + col1]; + const auto fg = colors[col1]; row.glyphIndices.emplace_back(_api.glyphIndices[i]); row.glyphAdvances.emplace_back(static_cast(glyphAdvance)); row.glyphOffsets.emplace_back(); @@ -656,7 +659,16 @@ void AtlasEngine::_flushBufferLine() const auto indicesCount = row.glyphIndices.size(); if (indicesCount > initialIndicesCount) { - row.mappings.emplace_back(std::move(mappedFontFace), gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); + // IDWriteFontFallback::MapCharacters() isn't just awfully slow, + // it can also repeatedly return the same font face again and again. :) + if (row.mappings.empty() || row.mappings.back().fontFace != mappedFontFace) + { + row.mappings.emplace_back(std::move(mappedFontFace), gsl::narrow_cast(initialIndicesCount), gsl::narrow_cast(indicesCount)); + } + else + { + row.mappings.back().glyphsTo = gsl::narrow_cast(indicesCount); + } } } } @@ -708,6 +720,10 @@ void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* THROW_IF_FAILED(font->CreateFontFace(reinterpret_cast(mappedFontFace))); } } + + // Oh wow! You found a case where scale isn't 1! I tried every font and none + // returned something besides 1. I just couldn't figure out why this exists. + assert(scale == 1); } void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 length, ShapedRow& row) @@ -821,6 +837,7 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng _api.clusterMap[a.textLength] = gsl::narrow_cast(actualGlyphCount); + const auto colors = _p.foregroundBitmap.begin() + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y; auto prevCluster = _api.clusterMap[0]; size_t beg = 0; @@ -834,7 +851,7 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng const auto col1 = _api.bufferLineColumn[a.textPosition + beg]; const auto col2 = _api.bufferLineColumn[a.textPosition + i]; - const auto fg = _p.colorBitmap[_p.colorBitmapDepthStride + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y + col1]; + const auto fg = colors[col1]; const auto expectedAdvance = (col2 - col1) * _p.s->font->cellSize.x; f32 actualAdvance = 0; @@ -896,6 +913,7 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) auto initialIndicesCount = row.glyphIndices.size(); const auto softFontAvailable = !_p.s->font->softFontPattern.empty(); auto currentlyMappingSoftFont = isSoftFontChar(_api.bufferLine[pos1]); + const auto colors = _p.foregroundBitmap.begin() + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y; while (pos2 < to) { @@ -912,7 +930,7 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) row.glyphIndices.emplace_back(nowMappingSoftFont ? ch : _api.replacementCharacterGlyphIndex); row.glyphAdvances.emplace_back(static_cast(cols * _p.s->font->cellSize.x)); row.glyphOffsets.emplace_back(DWRITE_GLYPH_OFFSET{}); - row.colors.emplace_back(_p.colorBitmap[_p.colorBitmapDepthStride + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y + col1]); + row.colors.emplace_back(colors[col1]); if (currentlyMappingSoftFont != nowMappingSoftFont) { diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 16590bdb26c..02dde6320e5 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -78,13 +78,13 @@ namespace Microsoft::Console::Render::Atlas private: // AtlasEngine.cpp - __declspec(noinline) void _handleSettingsUpdate(); + ATLAS_ATTR_COLD void _handleSettingsUpdate(); void _recreateFontDependentResources(); void _recreateCellCountDependentResources(); void _flushBufferLine(); void _mapCharacters(const wchar_t* text, u32 textLength, u32* mappedLength, IDWriteFontFace2** mappedFontFace) const; void _mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 length, ShapedRow& row); - __declspec(noinline) void _mapReplacementCharacter(u32 from, u32 to, ShapedRow& row); + ATLAS_ATTR_COLD void _mapReplacementCharacter(u32 from, u32 to, ShapedRow& row); // AtlasEngine.api.cpp void _resolveTransparencySettings() noexcept; diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index 06fd0a86026..f89e70d41a8 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -12,7 +12,7 @@ namespace Microsoft::Console::Render::Atlas #define ATLAS_DEBUG_DISABLE_PARTIAL_INVALIDATION 0 // Redraw at display refresh rate at all times. This helps with shader debugging. -#define ATLAS_DEBUG_CONTINUOUS_REDRAW 1 +#define ATLAS_DEBUG_CONTINUOUS_REDRAW 0 // Disables the use of DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT. // This helps with benchmarking the application as it'll run beyond display refresh rate. diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index ea11bfd7c73..07548ff74f2 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -179,7 +179,7 @@ void BackendD2D::_drawBackground(const RenderingPayload& p) noexcept { if (_backgroundBitmapGeneration != p.colorBitmapGenerations[0]) { - _backgroundBitmap->CopyFromMemory(nullptr, p.colorBitmap.data(), gsl::narrow_cast(p.colorBitmapRowStride * sizeof(u32))); + _backgroundBitmap->CopyFromMemory(nullptr, p.backgroundBitmap.data(), gsl::narrow_cast(p.colorBitmapRowStride * sizeof(u32))); _backgroundBitmapGeneration = p.colorBitmapGenerations[0]; } @@ -461,14 +461,6 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro _fillRectangle(rect, r.color); } } - if (r.lines.test(GridLines::Top)) - { - rect.left = left; - rect.top = top; - rect.right = right; - rect.bottom = rect.top + p.s->font->thinLineWidth; - _fillRectangle(rect, r.color); - } if (r.lines.test(GridLines::Right)) { rect.top = top; @@ -480,6 +472,14 @@ void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro _fillRectangle(rect, r.color); } } + if (r.lines.test(GridLines::Top)) + { + rect.left = left; + rect.top = top; + rect.right = right; + rect.bottom = rect.top + p.s->font->thinLineWidth; + _fillRectangle(rect, r.color); + } if (r.lines.test(GridLines::Bottom)) { rect.left = left; @@ -562,7 +562,7 @@ void BackendD2D::_drawCursorPart1(const RenderingPayload& p) for (til::CoordType x = 0; x < cursorSize.width; ++x) { - const auto bg = p.colorBitmap[backgroundBitmapOffset + x]; + const auto bg = p.backgroundBitmap[backgroundBitmapOffset + x]; const auto brush = _brushWithColor(bg ^ 0x3f3f3f); srcRect.left = x * cellSizeX; srcRect.right = srcRect.left + cellSizeX; diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index 5efb7e643a0..ddeb938bc43 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -40,12 +40,12 @@ namespace Microsoft::Console::Render::Atlas }; private: - __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); + ATLAS_ATTR_COLD void _handleSettingsUpdate(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p) noexcept; void _drawText(RenderingPayload& p); f32 _drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const noexcept; void _drawTextResetLineRendition() const noexcept; - __declspec(noinline) f32r _getGlyphRunDesignBounds(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); + ATLAS_ATTR_COLD f32r _getGlyphRunDesignBounds(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursorWithColor(const RenderingPayload& p); @@ -57,7 +57,7 @@ namespace Microsoft::Console::Render::Atlas void _debugShowDirty(const RenderingPayload& p); void _debugDumpRenderTarget(const RenderingPayload& p); ID2D1Brush* _brushWithColor(u32 color); - __declspec(noinline) void _clearBrushes() const noexcept; + ATLAS_ATTR_COLD void _clearBrushes() const noexcept; void _fillRectangle(const D2D1_RECT_F& rect, u32 color); SwapChainManager _swapChainManager; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index f11457ee048..4bfad4225f1 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -293,7 +293,7 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) if (fontChanged) { - _updateFontDependents(p); + _updateFontDependents(p.dwriteFactory.get(), *p.s->font); } if (miscChanged) { @@ -301,7 +301,7 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) } if (cellCountChanged) { - _recreateColorBitmap(p.s->cellCount); + _recreateBackgroundColorBitmap(p.s->cellCount); } // Similar to _renderTargetView above, we might have to recreate the _customRenderTargetView whenever _swapChainManager @@ -321,16 +321,40 @@ void BackendD3D::_handleSettingsUpdate(const RenderingPayload& p) _cellCount = p.s->cellCount; } -void BackendD3D::_updateFontDependents(const RenderingPayload& p) +void BackendD3D::_updateFontDependents(IDWriteFactory2* dwriteFactory, const FontSettings& font) { - DWrite_GetRenderParams(p.dwriteFactory.get(), &_gamma, &_cleartypeEnhancedContrast, &_grayscaleEnhancedContrast, _textRenderingParams.put()); + DWrite_GetRenderParams(dwriteFactory, &_gamma, &_cleartypeEnhancedContrast, &_grayscaleEnhancedContrast, _textRenderingParams.put()); // Clearing the atlas requires BeginDraw(), which is expensive. Defer this until we need Direct2D anyways. _fontChangedResetGlyphAtlas = true; - _textShadingType = p.s->font->antialiasingMode == AntialiasingMode::ClearType ? ShadingType::TextClearType : ShadingType::TextGrayscale; + _textShadingType = font.antialiasingMode == AntialiasingMode::ClearType ? ShadingType::TextClearType : ShadingType::TextGrayscale; + + { + auto ligaturesDisabled = false; + for (const auto& feature : font.fontFeatures) + { + if (feature.nameTag == DWRITE_FONT_FEATURE_TAG_STANDARD_LIGATURES) + { + ligaturesDisabled = !feature.parameter; + break; + } + } + + if (ligaturesDisabled) + { + _ligatureOverhangTriggerLeft = til::CoordTypeMin; + _ligatureOverhangTriggerRight = til::CoordTypeMax; + } + else + { + const auto halfCellWidth = font.cellSize.x / 2; + _ligatureOverhangTriggerLeft = -halfCellWidth; + _ligatureOverhangTriggerRight = font.advanceWidth + halfCellWidth; + } + } if (_d2dRenderTarget) { - _d2dRenderTargetUpdateFontSettings(*p.s->font); + _d2dRenderTargetUpdateFontSettings(font); } _softFontBitmap.reset(); @@ -493,15 +517,15 @@ void BackendD3D::_recreateCustomRenderTargetView(u16x2 targetSize) THROW_IF_FAILED(_device->CreateRenderTargetView(_customOffscreenTexture.get(), nullptr, _renderTargetView.addressof())); } -void BackendD3D::_recreateColorBitmap(u16x2 cellCount) +void BackendD3D::_recreateBackgroundColorBitmap(u16x2 cellCount) { // Avoid memory usage spikes by releasing memory first. - _colorBitmap.reset(); - _colorBitmapView.reset(); + _backgroundBitmap.reset(); + _backgroundBitmapView.reset(); const D3D11_TEXTURE2D_DESC desc{ .Width = cellCount.x, - .Height = cellCount.y * 2u, + .Height = cellCount.y, .MipLevels = 1, .ArraySize = 1, .Format = DXGI_FORMAT_R8G8B8A8_UNORM, @@ -510,9 +534,9 @@ void BackendD3D::_recreateColorBitmap(u16x2 cellCount) .BindFlags = D3D11_BIND_SHADER_RESOURCE, .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, }; - THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _colorBitmap.addressof())); - THROW_IF_FAILED(_device->CreateShaderResourceView(_colorBitmap.get(), nullptr, _colorBitmapView.addressof())); - _colorBitmapGenerations = {}; + THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _backgroundBitmap.addressof())); + THROW_IF_FAILED(_device->CreateShaderResourceView(_backgroundBitmap.get(), nullptr, _backgroundBitmapView.addressof())); + _backgroundBitmapGeneration = {}; } void BackendD3D::_d2dRenderTargetUpdateFontSettings(const FontSettings& font) const noexcept @@ -562,7 +586,7 @@ void BackendD3D::_setupDeviceContextState(const RenderingPayload& p) _deviceContext->RSSetViewports(1, &viewport); // PS: Pixel Shader - ID3D11ShaderResourceView* resources[]{ _colorBitmapView.get(), _glyphAtlasView.get() }; + ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; _deviceContext->PSSetShader(_pixelShader.get(), nullptr, 0); _deviceContext->PSSetConstantBuffers(0, 1, _psConstantBuffer.addressof()); _deviceContext->PSSetShaderResources(0, 2, &resources[0]); @@ -770,7 +794,7 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _brush.put())); } - ID3D11ShaderResourceView* resources[]{ _colorBitmapView.get(), _glyphAtlasView.get() }; + ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; _deviceContext->PSSetShaderResources(0, 2, &resources[0]); _rectPackerData = Buffer{ u }; @@ -836,8 +860,6 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) return; } - _uploadColorBitmap(p); - // TODO: Shrink instances buffer if (_instancesCount > _instanceBufferCapacity) { @@ -891,37 +913,6 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) _instancesCount = 0; } -void BackendD3D::_uploadColorBitmap(const RenderingPayload& p) -{ - // Not uploading the bitmap halves (!) the GPU load for any given frame. - // We don't need to upload if the background and foreground bitmaps are the same - // or when the _drawText() function determined that no glyph has the LigatureMarker, - // because then the pixel shader doesn't need to access the foreground bitmap anyways. - if (_colorBitmapGenerations[0] == p.colorBitmapGenerations[0] && - (_colorBitmapGenerations[1] == p.colorBitmapGenerations[1] || _skipForegroundBitmapUpload)) - { - return; - } - - D3D11_MAPPED_SUBRESOURCE mapped{}; - THROW_IF_FAILED(_deviceContext->Map(_colorBitmap.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); - - auto src = std::bit_cast(&*p.colorBitmap.begin()); - const auto srcEnd = std::bit_cast(&*p.colorBitmap.end()); - const auto srcStride = p.colorBitmapRowStride * sizeof(u32); - auto dst = static_cast(mapped.pData); - - while (src < srcEnd) - { - memcpy(dst, src, srcStride); - src += srcStride; - dst += mapped.RowPitch; - } - - _deviceContext->Unmap(_colorBitmap.get(), 0); - _colorBitmapGenerations = p.colorBitmapGenerations; -} - void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) { // We use the viewport size of the terminal as the initial estimate for the amount of instances we'll see. @@ -957,10 +948,38 @@ void BackendD3D::_recreateInstanceBuffers(const RenderingPayload& p) void BackendD3D::_drawBackground(const RenderingPayload& p) { + // Not uploading the bitmap halves (!) the GPU load for any given frame on 2023 hardware. + if (_backgroundBitmapGeneration != p.colorBitmapGenerations[0]) + { + _uploadBackgroundBitmap(p); + } + _appendQuad() = { .shadingType = ShadingType::Background, .size = p.s->targetSize, }; + _flushQuads(p); +} + +void BackendD3D::_uploadBackgroundBitmap(const RenderingPayload& p) +{ + D3D11_MAPPED_SUBRESOURCE mapped{}; + THROW_IF_FAILED(_deviceContext->Map(_backgroundBitmap.get(), 0, D3D11_MAP_WRITE_DISCARD, 0, &mapped)); + + auto src = std::bit_cast(p.backgroundBitmap.data()); + const auto srcEnd = std::bit_cast(p.backgroundBitmap.data() + p.backgroundBitmap.size()); + const auto srcStride = p.colorBitmapRowStride * sizeof(u32); + auto dst = static_cast(mapped.pData); + + while (src < srcEnd) + { + memcpy(dst, src, srcStride); + src += srcStride; + dst += mapped.RowPitch; + } + + _deviceContext->Unmap(_backgroundBitmap.get(), 0); + _backgroundBitmapGeneration = p.colorBitmapGenerations[0]; } void BackendD3D::_drawText(RenderingPayload& p) @@ -970,9 +989,6 @@ void BackendD3D::_drawText(RenderingPayload& p) _resetGlyphAtlas(p); } - auto shadingTypeAccumulator = ShadingType::Default; - _skipForegroundBitmapUpload = false; - til::CoordType dirtyTop = til::CoordTypeMax; til::CoordType dirtyBottom = til::CoordTypeMin; @@ -1012,7 +1028,7 @@ void BackendD3D::_drawText(RenderingPayload& p) goto drawGlyphRetry; } - if (glyphEntry.data.shadingType != ShadingType::Default) + if (glyphEntry.data.GetShadingType() != ShadingType::Default) { auto l = static_cast(lrintf(baselineX + row->glyphOffsets[x].advanceOffset)); auto t = static_cast(lrintf(baselineY - row->glyphOffsets[x].ascenderOffset)); @@ -1028,14 +1044,17 @@ void BackendD3D::_drawText(RenderingPayload& p) row->dirtyBottom = std::max(row->dirtyBottom, t + glyphEntry.data.size.y); _appendQuad() = { - .shadingType = glyphEntry.data.shadingType, + .shadingType = glyphEntry.data.GetShadingType(), .position = { static_cast(l), static_cast(t) }, .size = glyphEntry.data.size, .texcoord = glyphEntry.data.texcoord, .color = row->colors[x], }; - shadingTypeAccumulator |= glyphEntry.data.shadingType; + if (glyphEntry.data.overlapSplit) + { + _drawTextOverlapSplit(p, y); + } } baselineX += row->glyphAdvances[x]; @@ -1059,8 +1078,59 @@ void BackendD3D::_drawText(RenderingPayload& p) } _d2dEndDrawing(); +} + +// There are a number of coding-oriented fonts that feature ligatures which (for instance) +// translate text like "!=" into a glyph that looks like "≠" (just 2 columns wide and not 1). +// Glyphs like that still need to be colored in potentially multiple colors however, so this +// function will handle these ligatures by splitting them up into multiple QuadInstances. +void BackendD3D::_drawTextOverlapSplit(const RenderingPayload& p, u16 y) +{ + const auto& originalQuad = _getLastQuad(); + + const int cellCountX{ p.s->cellCount.x }; + const int cellSizeX{ p.s->font->cellSize.x }; + + // We must ensure to exit the loop below while `column` is less than `cellCount.x`, + // otherwise we cause a potential out of bounds access into foregroundBitmap. + // This may happen with glyphs that are severely overlapping their cells, + // outside of the viewport. In other words, the `clipLeft < clipEnd` condition + // doubles as a `column < cellCount.x` condition with this trick. + const auto limitRight = (cellCountX - 1) * cellSizeX; + const auto clipEnd = std::min(limitRight, originalQuad.position.x + originalQuad.size.x); + + auto column = std::max(1, (originalQuad.position.x + cellSizeX) / cellSizeX); + auto clipLeft = column * cellSizeX; + const auto colors = &p.foregroundBitmap[p.colorBitmapRowStride * y]; + auto lastFg = originalQuad.color; - _skipForegroundBitmapUpload = WI_IsFlagClear(shadingTypeAccumulator, ShadingType::LigatureMarker); + for (; clipLeft < clipEnd; ++column, clipLeft += cellSizeX) + { + const auto fg = colors[column]; + + if (lastFg != fg) + { + // NOTE: _appendQuad might reallocate and any pointers + // acquired before calling this function are now invalid. + auto& next = _appendQuad(); + // The item at -1 is the quad we've just appended, which means + // that the previous quad we want to split up is at -2. + auto& prev = _instances[_instancesCount - 2]; + + const auto prevWidth = clipLeft - prev.position.x; + const auto nextWidth = prev.size.x - prevWidth; + + prev.size.x = gsl::narrow(prevWidth); + + next = prev; + next.position.x = gsl::narrow(next.position.x + prevWidth); + next.texcoord.x = gsl::narrow(next.texcoord.x + prevWidth); + next.size.x = gsl::narrow(nextWidth); + next.color = fg; + + lastFg = fg; + } + } } bool BackendD3D::_drawGlyph(const RenderingPayload& p, f32 glyphAdvance, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry) @@ -1221,19 +1291,17 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, f32 glyphAdvance, const A _d2dBeginDrawing(); const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baselineOrigin, &glyphRun, _brush.get()); - auto shadingType = colorGlyph ? ShadingType::Passthrough : _textShadingType; + auto shadingType = colorGlyph ? ShadingType::TextPassthrough : _textShadingType; // Ligatures are drawn with strict cell-wise foreground color, while other text allows colors to overhang // their cells. This makes sure that italics and such retain their color and don't look "cut off". // // The former condition makes sure to exclude diacritics and such from being considered a ligature, // while the latter condition-pair makes sure to exclude regular BMP wide glyphs that overlap a little. - if (rect.w >= p.s->font->cellSize.x && (bl <= p.s->font->ligatureOverhangTriggerLeft || br >= p.s->font->ligatureOverhangTriggerRight)) - { - shadingType |= ShadingType::LigatureMarker; - } + const auto overlapSplit = rect.w >= p.s->font->cellSize.x && (bl <= _ligatureOverhangTriggerLeft || br >= _ligatureOverhangTriggerRight); - glyphEntry.data.shadingType = shadingType; + glyphEntry.data.shadingType = static_cast(shadingType); + glyphEntry.data.overlapSplit = overlapSplit; glyphEntry.data.offset.x = bl; glyphEntry.data.offset.y = bt; glyphEntry.data.size.x = rect.w; @@ -1320,7 +1388,8 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFa _d2dBeginDrawing(); _d2dRenderTarget->DrawBitmap(_softFontBitmap.get(), &dest, 1, interpolation, nullptr, nullptr); - glyphEntry.data.shadingType = ShadingType::TextGrayscale; + glyphEntry.data.shadingType = static_cast(ShadingType::TextGrayscale); + glyphEntry.data.overlapSplit = 0; glyphEntry.data.offset.x = 0; glyphEntry.data.offset.y = -p.s->font->baseline; glyphEntry.data.size.x = rect.w; @@ -1379,11 +1448,11 @@ void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasF // double-height row. This effectively turns the other (unneeded) side into whitespace. if (!top.data.size.y) { - top.data.shadingType = ShadingType::Default; + top.data.shadingType = static_cast(ShadingType::Default); } if (!bottom.data.size.y) { - bottom.data.shadingType = ShadingType::Default; + bottom.data.shadingType = static_cast(ShadingType::Default); } } @@ -1411,16 +1480,16 @@ void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro const auto left = static_cast(r.from * p.s->font->cellSize.x); const auto width = static_cast((r.to - r.from) * p.s->font->cellSize.x); - const auto appendHorizontalLine = [&](u16 offsetY, u16 height) { - _appendQuad() = QuadInstance{ - .shadingType = ShadingType::SolidFill, + const auto appendHorizontalLine = [&](u16 offsetY, u16 height, ShadingType shadingType) { + _appendQuad() = { + .shadingType = shadingType, .position = { left, static_cast(top + offsetY) }, .size = { width, height }, .color = r.color, }; }; - const auto appendVerticalLine = [&](int col) { - _appendQuad() = QuadInstance{ + const auto appendVerticalLine = [&](u16 col) { + _appendQuad() = { .shadingType = ShadingType::SolidFill, .position = { static_cast(col * p.s->font->cellSize.x), top }, .size = { p.s->font->thinLineWidth, p.s->font->cellSize.y }, @@ -1435,10 +1504,6 @@ void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro appendVerticalLine(i); } } - if (r.lines.test(GridLines::Top)) - { - appendHorizontalLine(0, p.s->font->thinLineWidth); - } if (r.lines.test(GridLines::Right)) { for (auto i = r.to; i > r.from; --i) @@ -1446,26 +1511,30 @@ void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro appendVerticalLine(i); } } + if (r.lines.test(GridLines::Top)) + { + appendHorizontalLine(0, p.s->font->thinLineWidth, ShadingType::SolidFill); + } if (r.lines.test(GridLines::Bottom)) { - appendHorizontalLine(p.s->font->cellSize.y - p.s->font->thinLineWidth, p.s->font->thinLineWidth); + appendHorizontalLine(p.s->font->cellSize.y - p.s->font->thinLineWidth, p.s->font->thinLineWidth, ShadingType::SolidFill); } if (r.lines.test(GridLines::Underline)) { - appendHorizontalLine(p.s->font->underlinePos, p.s->font->underlineWidth); + appendHorizontalLine(p.s->font->underlinePos, p.s->font->underlineWidth, ShadingType::SolidFill); } if (r.lines.test(GridLines::HyperlinkUnderline)) { - appendHorizontalLine(p.s->font->underlinePos, p.s->font->underlineWidth); + appendHorizontalLine(p.s->font->underlinePos, p.s->font->underlineWidth, ShadingType::DashedLine); } if (r.lines.test(GridLines::DoubleUnderline)) { - appendHorizontalLine(p.s->font->doubleUnderlinePos.x, p.s->font->thinLineWidth); - appendHorizontalLine(p.s->font->doubleUnderlinePos.y, p.s->font->thinLineWidth); + appendHorizontalLine(p.s->font->doubleUnderlinePos.x, p.s->font->thinLineWidth, ShadingType::SolidFill); + appendHorizontalLine(p.s->font->doubleUnderlinePos.y, p.s->font->thinLineWidth, ShadingType::SolidFill); } if (r.lines.test(GridLines::Strikethrough)) { - appendHorizontalLine(p.s->font->strikethroughPos, p.s->font->strikethroughWidth); + appendHorizontalLine(p.s->font->strikethroughPos, p.s->font->strikethroughWidth, ShadingType::SolidFill); } } } @@ -1485,9 +1554,9 @@ void BackendD3D::_drawCursorPart1(const RenderingPayload& p) for (auto x1 = p.cursorRect.left; x1 < p.cursorRect.right; ++x1) { const auto x0 = x1; - const auto bg = p.colorBitmap[offset + x1] | 0xff000000; + const auto bg = p.backgroundBitmap[offset + x1] | 0xff000000; - for (; x1 < p.cursorRect.right && (p.colorBitmap[offset + x1] | 0xff000000) == bg; ++x1) + for (; x1 < p.cursorRect.right && (p.backgroundBitmap[offset + x1] | 0xff000000) == bg; ++x1) { } @@ -1749,7 +1818,7 @@ void BackendD3D::_executeCustomShader(RenderingPayload& p) _deviceContext->VSSetConstantBuffers(0, 1, _vsConstantBuffer.addressof()); // PS: Pixel Shader - ID3D11ShaderResourceView* resources[]{ _colorBitmapView.get(), _glyphAtlasView.get() }; + ID3D11ShaderResourceView* resources[]{ _backgroundBitmapView.get(), _glyphAtlasView.get() }; _deviceContext->PSSetShader(_pixelShader.get(), nullptr, 0); _deviceContext->PSSetConstantBuffers(0, 1, _psConstantBuffer.addressof()); _deviceContext->PSSetShaderResources(0, 2, &resources[0]); diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 946e3ac05ab..f8c34068da6 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -61,17 +61,10 @@ namespace Microsoft::Console::Render::Atlas Background = 0, TextGrayscale = 1, TextClearType = 2, - Passthrough = 3, + TextPassthrough = 3, DashedLine = 4, SolidFill = 5, - - // The shader normally uses per-glyph coloring for text, because this allows us to retain the color of - // glyphs even if they're slightly out of their cell's bounds. This improves the look of fonts with large descenders/ascenders, etc., in particular for complex Unicode text. - // The shader uses strict per-cell coloring for common coding ligatures like === - // because they're drawn as - LigatureMarker = 0x80000000, }; - ATLAS_FLAG_OPS(ShadingType, u32, friend); // NOTE: Don't initialize any members in this struct. This ensures that no // zero-initialization needs to occur when we allocate large buffers of this object. @@ -91,10 +84,16 @@ namespace Microsoft::Console::Render::Atlas struct alignas(u32) AtlasGlyphEntryData { - ShadingType shadingType; + u16 shadingType; + u16 overlapSplit; i16x2 offset; u16x2 size; u16x2 texcoord; + + constexpr ShadingType GetShadingType() const noexcept + { + return static_cast(shadingType); + } }; // NOTE: Don't initialize any members in this struct. This ensures that no @@ -172,12 +171,12 @@ namespace Microsoft::Console::Render::Atlas }; private: - __declspec(noinline) void _handleSettingsUpdate(const RenderingPayload& p); - void _updateFontDependents(const RenderingPayload& p); + ATLAS_ATTR_COLD void _handleSettingsUpdate(const RenderingPayload& p); + void _updateFontDependents(IDWriteFactory2* dwriteFactory, const FontSettings& font); void _recreateCustomShader(const RenderingPayload& p); void _recreateCustomRenderTargetView(u16x2 targetSize); void _d2dRenderTargetUpdateFontSettings(const FontSettings& font) const noexcept; - void _recreateColorBitmap(u16x2 cellCount); + void _recreateBackgroundColorBitmap(u16x2 cellCount); void _recreateConstBuffer(const RenderingPayload& p) const; void _setupDeviceContextState(const RenderingPayload& p); void _debugUpdateShaders(const RenderingPayload& p) noexcept; @@ -185,17 +184,18 @@ namespace Microsoft::Console::Render::Atlas void _debugDumpRenderTarget(const RenderingPayload& p); void _d2dBeginDrawing() noexcept; void _d2dEndDrawing(); - __declspec(noinline) void _resetGlyphAtlas(const RenderingPayload& p); + ATLAS_ATTR_COLD void _resetGlyphAtlas(const RenderingPayload& p); void _markStateChange(ID3D11BlendState* blendState); QuadInstance& _getLastQuad() noexcept; QuadInstance& _appendQuad(); - __declspec(noinline) void _bumpInstancesSize(); + ATLAS_ATTR_COLD void _bumpInstancesSize(); void _flushQuads(const RenderingPayload& p); - __declspec(noinline) void _recreateInstanceBuffers(const RenderingPayload& p); - void _uploadColorBitmap(const RenderingPayload& p); + ATLAS_ATTR_COLD void _recreateInstanceBuffers(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p); + void _uploadBackgroundBitmap(const RenderingPayload& p); void _drawText(RenderingPayload& p); - __declspec(noinline) [[nodiscard]] bool _drawGlyph(const RenderingPayload& p, f32 glyphAdvance, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); + ATLAS_ATTR_COLD void _drawTextOverlapSplit(const RenderingPayload& p, u16 y); + ATLAS_ATTR_COLD [[nodiscard]] bool _drawGlyph(const RenderingPayload& p, f32 glyphAdvance, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); bool _drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); void _drawGlyphPrepareRetry(const RenderingPayload& p); void _splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); @@ -246,16 +246,17 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _customShaderSamplerState; std::chrono::steady_clock::time_point _customShaderStartTime; - wil::com_ptr _colorBitmap; - wil::com_ptr _colorBitmapView; - std::array _colorBitmapGenerations; - bool _skipForegroundBitmapUpload = false; + wil::com_ptr _backgroundBitmap; + wil::com_ptr _backgroundBitmapView; + til::generation_t _backgroundBitmapGeneration; wil::com_ptr _glyphAtlas; wil::com_ptr _glyphAtlasView; til::linear_flat_set _glyphAtlasMap; Buffer _rectPackerData; stbrp_context _rectPacker{}; + til::CoordType _ligatureOverhangTriggerLeft = 0; + til::CoordType _ligatureOverhangTriggerRight = 0; wil::com_ptr _d2dRenderTarget; wil::com_ptr _d2dRenderTarget4; // Optional. Supported since Windows 10 14393. diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index fd72fb950f6..445147a3da4 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -12,32 +12,32 @@ namespace Microsoft::Console::Render::Atlas { -#define ATLAS_FLAG_OPS(type, underlying, attr) \ - attr constexpr type operator~(type v) noexcept \ +#define ATLAS_FLAG_OPS(type, underlying) \ + constexpr type operator~(type v) noexcept \ { \ return static_cast(~static_cast(v)); \ } \ - attr constexpr type operator|(type lhs, type rhs) noexcept \ + constexpr type operator|(type lhs, type rhs) noexcept \ { \ return static_cast(static_cast(lhs) | static_cast(rhs)); \ } \ - attr constexpr type operator&(type lhs, type rhs) noexcept \ + constexpr type operator&(type lhs, type rhs) noexcept \ { \ return static_cast(static_cast(lhs) & static_cast(rhs)); \ } \ - attr constexpr type operator^(type lhs, type rhs) noexcept \ + constexpr type operator^(type lhs, type rhs) noexcept \ { \ return static_cast(static_cast(lhs) ^ static_cast(rhs)); \ } \ - attr constexpr void operator|=(type& lhs, type rhs) noexcept \ + constexpr void operator|=(type& lhs, type rhs) noexcept \ { \ lhs = lhs | rhs; \ } \ - attr constexpr void operator&=(type& lhs, type rhs) noexcept \ + constexpr void operator&=(type& lhs, type rhs) noexcept \ { \ lhs = lhs & rhs; \ } \ - attr constexpr void operator^=(type& lhs, type rhs) noexcept \ + constexpr void operator^=(type& lhs, type rhs) noexcept \ { \ lhs = lhs ^ rhs; \ } @@ -53,6 +53,9 @@ namespace Microsoft::Console::Render::Atlas return !(*this == rhs); \ } + // My best effort of replicating __attribute__((cold)) from gcc/clang. +#define ATLAS_ATTR_COLD __declspec(noinline) + template struct vec2 { @@ -317,9 +320,9 @@ namespace Microsoft::Console::Render::Atlas std::vector fontFeatures; std::vector fontAxisValues; f32 fontSize = 0; - f32 advanceScale = 0; u16x2 cellSize; u16 fontWeight = 0; + u16 advanceWidth = 0; u16 baseline = 0; u16 descender = 0; u16 underlinePos = 0; @@ -330,8 +333,6 @@ namespace Microsoft::Console::Render::Atlas u16 thinLineWidth = 0; u16 dpi = 96; AntialiasingMode antialiasingMode = DefaultAntialiasingMode; - til::CoordType ligatureOverhangTriggerLeft = 0; - til::CoordType ligatureOverhangTriggerRight = 0; std::vector softFontPattern; til::size softFontCellSize; @@ -383,7 +384,7 @@ namespace Microsoft::Console::Render::Atlas Bold = 0b01, Italic = 0b10, }; - ATLAS_FLAG_OPS(FontRelevantAttributes, u8, ) + ATLAS_FLAG_OPS(FontRelevantAttributes, u8) struct FontMapping { @@ -468,9 +469,23 @@ namespace Microsoft::Console::Render::Atlas // The first NxM (for instance 120x30 pixel) chunk contains background colors and the // second chunk contains foreground colors. The distance in u32 items between the start // and the begin of the foreground bitmap is equal to colorBitmapDepthStride. + // + // The background part is in premultiplied alpha, whereas the foreground part is in straight + // alpha. This is mostly because of Direct2D being annoying, as the former is the only thing + // it supports for bitmaps, whereas the latter is the only thing it supports for text. + // Since we implement Direct2D's text blending algorithm, we're equally dependent on + // straight alpha for BackendD3D, as straight alpha is used in the pixel shader there. Buffer colorBitmap; + // This exists as a convenience access to colorBitmap and + // contains a view into the background color bitmap. + std::span backgroundBitmap; + // This exists as a convenience access to colorBitmap and + // contains a view into the foreground color bitmap. + std::span foregroundBitmap; // This stride of the colorBitmap is a "count" of u32 and not in bytes. size_t colorBitmapRowStride = 0; + // FYI depth refers to the `colorBitmapRowStride * height` size of each bitmap contained + // in colorBitmap. colorBitmap contains 2 bitmaps (background and foreground colors). size_t colorBitmapDepthStride = 0; // A generation of 1 ensures that the backends redraw the background on the first Present(). // The 1st entry in this array corresponds to the background and the 2nd to the foreground bitmap. diff --git a/src/renderer/atlas/shader_common.hlsl b/src/renderer/atlas/shader_common.hlsl index 78cb81119ae..a6c0c3aa38c 100644 --- a/src/renderer/atlas/shader_common.hlsl +++ b/src/renderer/atlas/shader_common.hlsl @@ -5,10 +5,9 @@ #define SHADING_TYPE_TEXT_BACKGROUND 0 #define SHADING_TYPE_TEXT_GRAYSCALE 1 #define SHADING_TYPE_TEXT_CLEARTYPE 2 -#define SHADING_TYPE_PASSTHROUGH 3 +#define SHADING_TYPE_TEXT_PASSTHROUGH 3 #define SHADING_TYPE_DASHED_LINE 4 #define SHADING_TYPE_SOLID_FILL 5 -#define SHADING_TYPE_LIGATURE_MARKER 0x80000000 // clang-format on struct VSData diff --git a/src/renderer/atlas/shader_ps.hlsl b/src/renderer/atlas/shader_ps.hlsl index 2b47068412f..8f4cb6c8de4 100644 --- a/src/renderer/atlas/shader_ps.hlsl +++ b/src/renderer/atlas/shader_ps.hlsl @@ -30,7 +30,7 @@ Output main(PSData data) : SV_Target float4 color; float4 weights; - switch (data.shadingType & ~SHADING_TYPE_LIGATURE_MARKER) + switch (data.shadingType) { case SHADING_TYPE_TEXT_BACKGROUND: { @@ -41,28 +41,12 @@ Output main(PSData data) : SV_Target } case SHADING_TYPE_TEXT_GRAYSCALE: { - // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things. Part 1... - float4 foreground = premultiplyColor(data.color); - - if (data.shadingType & SHADING_TYPE_LIGATURE_MARKER) - { - float2 cell = data.position.xy / cellSize; - cell.y += cellCount.y; - foreground = background[cell]; - - if (foreground.a == 0) - { - discard; - } - - data.color = float4(foreground.rgb / foreground.a, foreground.a); - } - - // ...Part 2: + // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things. + const float4 foreground = premultiplyColor(data.color); const float blendEnhancedContrast = DWrite_ApplyLightOnDarkContrastAdjustment(enhancedContrast, data.color.rgb); const float intensity = DWrite_CalcColorIntensity(data.color.rgb); // These aren't. - float4 glyph = glyphAtlas[data.texcoord]; + const float4 glyph = glyphAtlas[data.texcoord]; const float contrasted = DWrite_EnhanceContrast(glyph.a, blendEnhancedContrast); const float alphaCorrected = DWrite_ApplyAlphaCorrection(contrasted, intensity, gammaRatios); color = alphaCorrected * foreground; @@ -71,31 +55,17 @@ Output main(PSData data) : SV_Target } case SHADING_TYPE_TEXT_CLEARTYPE: { - if (data.shadingType & SHADING_TYPE_LIGATURE_MARKER) - { - float2 cell = data.position.xy / cellSize; - cell.y += cellCount.y; - data.color = background[cell]; - - if (data.color.a == 0) - { - discard; - } - - data.color.rgb /= data.color.a; - } - // These are independent of the glyph texture and could be moved to the vertex shader or CPU side of things. const float blendEnhancedContrast = DWrite_ApplyLightOnDarkContrastAdjustment(enhancedContrast, data.color.rgb); // These aren't. - float4 glyph = glyphAtlas[data.texcoord]; + const float4 glyph = glyphAtlas[data.texcoord]; const float3 contrasted = DWrite_EnhanceContrast3(glyph.rgb, blendEnhancedContrast); const float3 alphaCorrected = DWrite_ApplyAlphaCorrection3(contrasted, data.color.rgb, gammaRatios); weights = float4(alphaCorrected * data.color.a, 1); color = weights * data.color; break; } - case SHADING_TYPE_PASSTHROUGH: + case SHADING_TYPE_TEXT_PASSTHROUGH: { color = glyphAtlas[data.texcoord]; weights = color.aaaa; From 1b9cd8d78bf7260fd5933bc62bd40c558b018405 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 7 Apr 2023 19:10:17 +0200 Subject: [PATCH 29/37] Fix overlap split for double width glyphs --- src/renderer/atlas/BackendD3D.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 4bfad4225f1..c3a7120953e 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -1088,8 +1088,14 @@ void BackendD3D::_drawTextOverlapSplit(const RenderingPayload& p, u16 y) { const auto& originalQuad = _getLastQuad(); - const int cellCountX{ p.s->cellCount.x }; - const int cellSizeX{ p.s->font->cellSize.x }; + int cellCountX{ p.s->cellCount.x }; + int cellSizeX{ p.s->font->cellSize.x }; + + if (p.rows[y]->lineRendition != LineRendition::SingleWidth) + { + cellCountX >>= 1; + cellSizeX <<= 1; + } // We must ensure to exit the loop below while `column` is less than `cellCount.x`, // otherwise we cause a potential out of bounds access into foregroundBitmap. @@ -1298,7 +1304,10 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, f32 glyphAdvance, const A // // The former condition makes sure to exclude diacritics and such from being considered a ligature, // while the latter condition-pair makes sure to exclude regular BMP wide glyphs that overlap a little. - const auto overlapSplit = rect.w >= p.s->font->cellSize.x && (bl <= _ligatureOverhangTriggerLeft || br >= _ligatureOverhangTriggerRight); + const auto horizontalScale = lineRendition != LineRendition::SingleWidth ? 2 : 1; + const auto triggerLeft = _ligatureOverhangTriggerLeft * horizontalScale; + const auto triggerRight = _ligatureOverhangTriggerRight * horizontalScale; + const auto overlapSplit = rect.w >= p.s->font->cellSize.x && (bl <= triggerLeft || br >= triggerRight); glyphEntry.data.shadingType = static_cast(shadingType); glyphEntry.data.overlapSplit = overlapSplit; From 039e27fc22c0abf051a1fb2914512565747923f0 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Fri, 7 Apr 2023 22:34:13 +0200 Subject: [PATCH 30/37] Fix AuditMode, Fix DRCS baseline, Fix DECDWL color bitmaps --- src/renderer/atlas/AtlasEngine.api.cpp | 22 +++++++++--------- src/renderer/atlas/AtlasEngine.cpp | 31 ++++++++++++++------------ src/renderer/atlas/BackendD3D.cpp | 11 +++++---- src/tools/RenderingTests/main.cpp | 10 ++++----- 4 files changed, 40 insertions(+), 34 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index 2ec2f49f0d4..9f33002e9d3 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -705,17 +705,17 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo if (fontMetrics) { std::wstring fontName{ requestedFaceName }; - const auto fontWeightU16 = static_cast(requestedWeight); - const auto advanceWidthU16 = static_cast(lrintf(advanceWidth)); - const auto baselineU16 = static_cast(lrintf(baseline)); - const auto descenderU16 = static_cast(cellHeight - baselineU16); - const auto underlinePosU16 = static_cast(lrintf(underlinePos)); - const auto underlineWidthU16 = static_cast(lrintf(underlineWidth)); - const auto strikethroughPosU16 = static_cast(lrintf(strikethroughPos)); - const auto strikethroughWidthU16 = static_cast(lrintf(strikethroughWidth)); - const auto doubleUnderlinePosTopU16 = static_cast(lrintf(doubleUnderlinePosTop)); - const auto doubleUnderlinePosBottomU16 = static_cast(lrintf(doubleUnderlinePosBottom)); - const auto thinLineWidthU16 = static_cast(lrintf(thinLineWidth)); + const auto fontWeightU16 = gsl::narrow_cast(requestedWeight); + const auto advanceWidthU16 = gsl::narrow_cast(lrintf(advanceWidth)); + const auto baselineU16 = gsl::narrow_cast(lrintf(baseline)); + const auto descenderU16 = gsl::narrow_cast(cellHeight - baselineU16); + const auto underlinePosU16 = gsl::narrow_cast(lrintf(underlinePos)); + const auto underlineWidthU16 = gsl::narrow_cast(lrintf(underlineWidth)); + const auto strikethroughPosU16 = gsl::narrow_cast(lrintf(strikethroughPos)); + const auto strikethroughWidthU16 = gsl::narrow_cast(lrintf(strikethroughWidth)); + const auto doubleUnderlinePosTopU16 = gsl::narrow_cast(lrintf(doubleUnderlinePosTop)); + const auto doubleUnderlinePosBottomU16 = gsl::narrow_cast(lrintf(doubleUnderlinePosBottom)); + const auto thinLineWidthU16 = gsl::narrow_cast(lrintf(thinLineWidth)); // NOTE: From this point onward no early returns or throwing code should exist, // as we might cause _api to be in an inconsistent state otherwise. diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index eb298fcd36c..72143c31cae 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -132,8 +132,8 @@ try // Scrolling the background bitmap is a lot easier because we can rely on memmove which works // with both forwards and backwards copying. It's a mystery why the STL doesn't have this. { - const auto srcOffset = std::max(0, -offset) * static_cast(_p.colorBitmapRowStride); - const auto dstOffset = std::max(0, offset) * static_cast(_p.colorBitmapRowStride); + const auto srcOffset = std::max(0, -offset) * gsl::narrow_cast(_p.colorBitmapRowStride); + const auto dstOffset = std::max(0, offset) * gsl::narrow_cast(_p.colorBitmapRowStride); const auto count = _p.colorBitmapDepthStride - std::max(srcOffset, dstOffset); assert(dstOffset >= 0 && dstOffset + count <= _p.colorBitmapDepthStride); assert(srcOffset >= 0 && srcOffset + count <= _p.colorBitmapDepthStride); @@ -311,9 +311,9 @@ try } { - const auto shift = _api.lineRendition >= LineRendition::DoubleWidth ? 1 : 0; + const auto shift = gsl::narrow_cast(_api.lineRendition != LineRendition::SingleWidth); const auto row = _p.colorBitmap.begin() + _p.colorBitmapRowStride * y; - auto beg = row + x; + auto beg = row + (static_cast(x) << shift); auto end = row + (static_cast(columnEnd) << shift); const u32 colors[] = { @@ -348,7 +348,7 @@ CATCH_RETURN() [[nodiscard]] HRESULT AtlasEngine::PaintBufferGridLines(const GridLineSet lines, const COLORREF color, const size_t cchLine, const til::point coordTarget) noexcept try { - const auto shift = _api.lineRendition >= LineRendition::DoubleWidth ? 1 : 0; + const auto shift = gsl::narrow_cast(_api.lineRendition != LineRendition::SingleWidth); const auto y = gsl::narrow_cast(clamp(coordTarget.y, 0, _p.s->cellCount.y)); const auto from = gsl::narrow_cast(clamp(coordTarget.x << shift, 0, _p.s->cellCount.x - 1)); const auto to = gsl::narrow_cast(clamp((coordTarget.x + cchLine) << shift, from, _p.s->cellCount.x)); @@ -636,14 +636,15 @@ void AtlasEngine::_flushBufferLine() if (isTextSimple) { + const auto shift = gsl::narrow_cast(row.lineRendition != LineRendition::SingleWidth); const auto colors = _p.foregroundBitmap.begin() + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y; for (size_t i = 0; i < complexityLength; ++i) { - const auto col1 = _api.bufferLineColumn[idx + i + 0]; - const auto col2 = _api.bufferLineColumn[idx + i + 1]; + const size_t col1 = _api.bufferLineColumn[idx + i + 0]; + const size_t col2 = _api.bufferLineColumn[idx + i + 1]; const auto glyphAdvance = (col2 - col1) * _p.s->font->cellSize.x; - const auto fg = colors[col1]; + const auto fg = colors[col1 << shift]; row.glyphIndices.emplace_back(_api.glyphIndices[i]); row.glyphAdvances.emplace_back(static_cast(glyphAdvance)); row.glyphOffsets.emplace_back(); @@ -837,6 +838,7 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng _api.clusterMap[a.textLength] = gsl::narrow_cast(actualGlyphCount); + const auto shift = gsl::narrow_cast(row.lineRendition != LineRendition::SingleWidth); const auto colors = _p.foregroundBitmap.begin() + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y; auto prevCluster = _api.clusterMap[0]; size_t beg = 0; @@ -849,9 +851,9 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng continue; } - const auto col1 = _api.bufferLineColumn[a.textPosition + beg]; - const auto col2 = _api.bufferLineColumn[a.textPosition + i]; - const auto fg = colors[col1]; + const size_t col1 = _api.bufferLineColumn[a.textPosition + beg]; + const size_t col2 = _api.bufferLineColumn[a.textPosition + i]; + const auto fg = colors[col1 << shift]; const auto expectedAdvance = (col2 - col1) * _p.s->font->cellSize.x; f32 actualAdvance = 0; @@ -908,11 +910,12 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) auto pos1 = from; auto pos2 = pos1; - auto col1 = _api.bufferLineColumn[from]; - auto col2 = col1; + size_t col1 = _api.bufferLineColumn[from]; + size_t col2 = col1; auto initialIndicesCount = row.glyphIndices.size(); const auto softFontAvailable = !_p.s->font->softFontPattern.empty(); auto currentlyMappingSoftFont = isSoftFontChar(_api.bufferLine[pos1]); + const auto shift = gsl::narrow_cast(row.lineRendition != LineRendition::SingleWidth); const auto colors = _p.foregroundBitmap.begin() + _p.colorBitmapRowStride * _api.lastPaintBufferLineCoord.y; while (pos2 < to) @@ -930,7 +933,7 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) row.glyphIndices.emplace_back(nowMappingSoftFont ? ch : _api.replacementCharacterGlyphIndex); row.glyphAdvances.emplace_back(static_cast(cols * _p.s->font->cellSize.x)); row.glyphOffsets.emplace_back(DWRITE_GLYPH_OFFSET{}); - row.colors.emplace_back(colors[col1]); + row.colors.emplace_back(colors[col1 << shift]); if (currentlyMappingSoftFont != nowMappingSoftFont) { diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index c3a7120953e..7e69d5ec144 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -1297,7 +1297,7 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, f32 glyphAdvance, const A _d2dBeginDrawing(); const auto colorGlyph = DrawGlyphRun(_d2dRenderTarget.get(), _d2dRenderTarget4.get(), p.dwriteFactory4.get(), baselineOrigin, &glyphRun, _brush.get()); - auto shadingType = colorGlyph ? ShadingType::TextPassthrough : _textShadingType; + const auto shadingType = colorGlyph ? ShadingType::TextPassthrough : _textShadingType; // Ligatures are drawn with strict cell-wise foreground color, while other text allows colors to overhang // their cells. This makes sure that italics and such retain their color and don't look "cut off". @@ -1334,10 +1334,14 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFa }; const auto lineRendition = static_cast(fontFaceEntry.lineRendition); + auto baseline = p.s->font->baseline; + if (lineRendition != LineRendition::SingleWidth) { + const auto heightShift = static_cast(lineRendition >= LineRendition::DoubleHeightTop); rect.w <<= 1; - rect.h <<= static_cast(lineRendition >= LineRendition::DoubleHeightTop); + rect.h <<= heightShift; + baseline <<= heightShift; } if (!stbrp_pack_rects(&_rectPacker, &rect, 1)) @@ -1400,7 +1404,7 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFa glyphEntry.data.shadingType = static_cast(ShadingType::TextGrayscale); glyphEntry.data.overlapSplit = 0; glyphEntry.data.offset.x = 0; - glyphEntry.data.offset.y = -p.s->font->baseline; + glyphEntry.data.offset.y = -baseline; glyphEntry.data.size.x = rect.w; glyphEntry.data.size.y = rect.h; glyphEntry.data.texcoord.x = rect.x; @@ -1408,7 +1412,6 @@ bool BackendD3D::_drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFa if (lineRendition >= LineRendition::DoubleHeightTop) { - glyphEntry.data.offset.y -= p.s->font->cellSize.y; _splitDoubleHeightGlyph(p, fontFaceEntry, glyphEntry); } diff --git a/src/tools/RenderingTests/main.cpp b/src/tools/RenderingTests/main.cpp index 49cac079889..32365d207f3 100644 --- a/src/tools/RenderingTests/main.cpp +++ b/src/tools/RenderingTests/main.cpp @@ -172,11 +172,11 @@ int main() { printUTF16( - L"\x1B[3;5HDECDWL Double Width \U0001FAE0 A\u0353\u0353 B\u036F\u036F" - L"\x1B[4;5H\x1b#6DECDWL Double Width \U0001FAE0 A\u0353\u0353 B\u036F\u036F" - L"\x1B[8;5HDECDHL Double Height \U0001F642\U0001F6C1 A\u0353\u0353 B\u036F\u036F X\u0353\u0353 Y\u036F\u036F" - L"\x1B[9;5H\x1b#3DECDHL Double Height Top \U0001F642 A\u0353\u0353 B\u036F\u036F" - L"\x1B[10;5H\x1b#4DECDHL Double Height Bottom \U0001F6C1 X\u0353\u0353 Y\u036F\u036F"); + L"\x1B[3;5HDECDWL Double Width \U0001FAE0 \x1B[43;36mA\u0353\u0353\x1B[m B\u036F\u036F" + L"\x1B[4;5H\x1b#6DECDWL Double Width \U0001FAE0 \x1B[43;36mA\u0353\u0353\x1B[m B\u036F\u036F" + L"\x1B[8;5HDECDHL Double Height \U0001F952\U0001F6C1 A\u0353\u0353 \x1B[43;36mB\u036F\u036F\x1B[m \x1B[43;36mX\u0353\u0353\x1B[m Y\u036F\u036F" + L"\x1B[9;5H\x1b#3DECDHL Double Height Top \U0001F952 A\u0353\u0353 \x1B[43;36mB\u036F\u036F\x1B[m" + L"\x1B[10;5H\x1b#4DECDHL Double Height Bottom \U0001F6C1 \x1B[43;36mX\u0353\u0353\x1B[m Y\u036F\u036F"); wait(); clear(); From 93722f8da2468e5d4e1497882468b7d47d06545b Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 11 Apr 2023 18:20:14 +0200 Subject: [PATCH 31/37] Lots and lots and lots of fixes * Some minor cleanup with C++20isms * Text on later rows are now drawn on top of prior text decorations * Fix emoji size measuring * On DECDWL/DECDHL rows * Fix dotted underline decoration scale * Fix text decoration positioning and clipping * Fix overlapping glyph splitting * Fix cursor size/position --- src/renderer/atlas/AtlasEngine.api.cpp | 42 ++-- src/renderer/atlas/AtlasEngine.cpp | 42 ++-- src/renderer/atlas/AtlasEngine.h | 2 +- src/renderer/atlas/Backend.cpp | 127 ++++------ src/renderer/atlas/Backend.h | 10 +- src/renderer/atlas/BackendD2D.cpp | 286 ++++++++++------------ src/renderer/atlas/BackendD2D.h | 31 +-- src/renderer/atlas/BackendD3D.cpp | 280 +++++++++++++-------- src/renderer/atlas/BackendD3D.h | 13 +- src/renderer/atlas/DWriteTextAnalysis.cpp | 3 +- src/renderer/atlas/DWriteTextAnalysis.h | 12 +- src/renderer/atlas/common.h | 22 +- src/renderer/atlas/shader_common.hlsl | 5 +- src/renderer/atlas/shader_ps.hlsl | 13 +- src/tools/RenderingTests/main.cpp | 42 +++- 15 files changed, 498 insertions(+), 432 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.api.cpp b/src/renderer/atlas/AtlasEngine.api.cpp index 9f33002e9d3..2d0708bb82e 100644 --- a/src/renderer/atlas/AtlasEngine.api.cpp +++ b/src/renderer/atlas/AtlasEngine.api.cpp @@ -502,9 +502,9 @@ void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fo // Gsub is for GetGlyphs() and Gpos for GetGlyphPlacements(). // // GH#10774: Apparently specifying all of the features is just redundant. - fontFeatures.emplace_back(DWRITE_FONT_FEATURE{ DWRITE_FONT_FEATURE_TAG_STANDARD_LIGATURES, 1 }); - fontFeatures.emplace_back(DWRITE_FONT_FEATURE{ DWRITE_FONT_FEATURE_TAG_CONTEXTUAL_LIGATURES, 1 }); - fontFeatures.emplace_back(DWRITE_FONT_FEATURE{ DWRITE_FONT_FEATURE_TAG_CONTEXTUAL_ALTERNATES, 1 }); + fontFeatures.emplace_back(DWRITE_FONT_FEATURE_TAG_STANDARD_LIGATURES, 1); + fontFeatures.emplace_back(DWRITE_FONT_FEATURE_TAG_CONTEXTUAL_LIGATURES, 1); + fontFeatures.emplace_back(DWRITE_FONT_FEATURE_TAG_CONTEXTUAL_ALTERNATES, 1); for (const auto& p : features) { @@ -523,7 +523,7 @@ void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fo fontFeatures[2].parameter = p.second; break; default: - fontFeatures.emplace_back(DWRITE_FONT_FEATURE{ tag, p.second }); + fontFeatures.emplace_back(tag, p.second); break; } } @@ -537,9 +537,9 @@ void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fo // AtlasEngine::_recreateFontDependentResources() relies on these fields to // exist in this particular order in order to create appropriate default axes. - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_WEIGHT, -1.0f }); - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_ITALIC, -1.0f }); - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ DWRITE_FONT_AXIS_TAG_SLANT, -1.0f }); + fontAxisValues.emplace_back(DWRITE_FONT_AXIS_TAG_WEIGHT, -1.0f); + fontAxisValues.emplace_back(DWRITE_FONT_AXIS_TAG_ITALIC, -1.0f); + fontAxisValues.emplace_back(DWRITE_FONT_AXIS_TAG_SLANT, -1.0f); for (const auto& p : axes) { @@ -558,7 +558,7 @@ void AtlasEngine::_updateFont(const wchar_t* faceName, const FontInfoDesired& fo fontAxisValues[2].value = p.second; break; default: - fontAxisValues.emplace_back(DWRITE_FONT_AXIS_VALUE{ tag, p.second }); + fontAxisValues.emplace_back(tag, p.second); break; } } @@ -709,13 +709,17 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo const auto advanceWidthU16 = gsl::narrow_cast(lrintf(advanceWidth)); const auto baselineU16 = gsl::narrow_cast(lrintf(baseline)); const auto descenderU16 = gsl::narrow_cast(cellHeight - baselineU16); + const auto thinLineWidthU16 = gsl::narrow_cast(lrintf(thinLineWidth)); + + const auto gridBottomPositionU16 = gsl::narrow_cast(cellHeight - thinLineWidth); + const auto gridRightPositionU16 = gsl::narrow_cast(cellWidth - thinLineWidth); + const auto underlinePosU16 = gsl::narrow_cast(lrintf(underlinePos)); const auto underlineWidthU16 = gsl::narrow_cast(lrintf(underlineWidth)); const auto strikethroughPosU16 = gsl::narrow_cast(lrintf(strikethroughPos)); const auto strikethroughWidthU16 = gsl::narrow_cast(lrintf(strikethroughWidth)); const auto doubleUnderlinePosTopU16 = gsl::narrow_cast(lrintf(doubleUnderlinePosTop)); const auto doubleUnderlinePosBottomU16 = gsl::narrow_cast(lrintf(doubleUnderlinePosBottom)); - const auto thinLineWidthU16 = gsl::narrow_cast(lrintf(thinLineWidth)); // NOTE: From this point onward no early returns or throwing code should exist, // as we might cause _api to be in an inconsistent state otherwise. @@ -724,18 +728,22 @@ void AtlasEngine::_resolveFontMetrics(const wchar_t* requestedFaceName, const Fo fontMetrics->fontFamily = std::move(fontFamily); fontMetrics->fontName = std::move(fontName); fontMetrics->fontSize = fontSizeInPx; - fontMetrics->cellSize.x = cellWidth; - fontMetrics->cellSize.y = cellHeight; + fontMetrics->cellSize = { cellWidth, cellHeight }; fontMetrics->fontWeight = fontWeightU16; fontMetrics->advanceWidth = advanceWidthU16; fontMetrics->baseline = baselineU16; fontMetrics->descender = descenderU16; - fontMetrics->underlinePos = underlinePosU16; - fontMetrics->underlineWidth = underlineWidthU16; - fontMetrics->strikethroughPos = strikethroughPosU16; - fontMetrics->strikethroughWidth = strikethroughWidthU16; - fontMetrics->doubleUnderlinePos.x = doubleUnderlinePosTopU16; - fontMetrics->doubleUnderlinePos.y = doubleUnderlinePosBottomU16; fontMetrics->thinLineWidth = thinLineWidthU16; + + fontMetrics->gridTop = { 0, thinLineWidthU16 }; + fontMetrics->gridBottom = { gridBottomPositionU16, thinLineWidthU16 }; + fontMetrics->gridLeft = { 0, thinLineWidthU16 }; + fontMetrics->gridRight = { gridRightPositionU16, thinLineWidthU16 }; + + fontMetrics->underline = { underlinePosU16, underlineWidthU16 }; + fontMetrics->strikethrough = { strikethroughPosU16, strikethroughWidthU16 }; + fontMetrics->doubleUnderline[0] = { doubleUnderlinePosTopU16, thinLineWidthU16 }; + fontMetrics->doubleUnderline[1] = { doubleUnderlinePosBottomU16, thinLineWidthU16 }; + fontMetrics->overline = { 0, underlineWidthU16 }; } } diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index 72143c31cae..9f6bf1ef9ff 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -417,14 +417,25 @@ try const auto point = options.coordCursor; // TODO: options.coordCursor can contain invalid out of bounds coordinates when // the window is being resized and the cursor is on the last line of the viewport. - const auto x = gsl::narrow_cast(clamp(point.x, 0, _p.s->cellCount.x - 1)); - const auto y = gsl::narrow_cast(clamp(point.y, 0, _p.s->cellCount.y - 1)); + const auto top = clamp(point.y, 0, _p.s->cellCount.y - 1); + const auto bottom = top + 1; const auto cursorWidth = 1 + (options.fIsDoubleWidth & (options.cursorType != CursorType::VerticalBar)); - const auto right = gsl::narrow_cast(clamp(x + cursorWidth, 0, _p.s->cellCount.x - 0)); - const auto bottom = gsl::narrow_cast(y + 1); - _p.cursorRect = { x, y, right, bottom }; - _p.dirtyRectInPx.left = std::min(_p.dirtyRectInPx.left, x * _p.s->font->cellSize.x); - _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, y * _p.s->font->cellSize.y); + + auto left = std::max(point.x, 0); + auto right = std::max(left + cursorWidth, 0); + + if (_p.rows[top]->lineRendition != LineRendition::SingleWidth) + { + left <<= 1; + right <<= 1; + } + + left = std::min(left, _p.s->cellCount.x - cursorWidth); + right = std::min(right, i32{ _p.s->cellCount.x }); + + _p.cursorRect = { left, top, right, bottom }; + _p.dirtyRectInPx.left = std::min(_p.dirtyRectInPx.left, left * _p.s->font->cellSize.x); + _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, top * _p.s->font->cellSize.y); _p.dirtyRectInPx.right = std::max(_p.dirtyRectInPx.right, right * _p.s->font->cellSize.x); _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, bottom * _p.s->font->cellSize.y); } @@ -520,9 +531,7 @@ void AtlasEngine::_recreateFontDependentResources() { // See AtlasEngine::UpdateFont. // It hardcodes indices 0/1/2 in fontAxisValues to the weight/italic/slant axes. - // If they're NAN they haven't been set by the user and must be filled by us. - // When we call SetFontAxisValues() we basically override (disable) DirectWrite's internal font axes, - // and if either of the 3 aren't set we'd make it impossible for the user to see bold/italic text. + // If they're -1 they haven't been set by the user and must be filled by us. const auto& standardAxes = _p.s->font->fontAxisValues; auto fontAxisValues = _p.s->font->fontAxisValues; @@ -727,7 +736,7 @@ void AtlasEngine::_mapCharacters(const wchar_t* text, const u32 textLength, u32* assert(scale == 1); } -void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 length, ShapedRow& row) +void AtlasEngine::_mapComplex(IDWriteFontFace2* mappedFontFace, u32 idx, u32 length, ShapedRow& row) { _api.analysisResults.clear(); @@ -737,7 +746,6 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng for (const auto& a : _api.analysisResults) { - const DWRITE_SCRIPT_ANALYSIS scriptAnalysis{ a.script, static_cast(a.shapes) }; u32 actualGlyphCount = 0; #pragma warning(push) @@ -775,8 +783,8 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng /* textLength */ a.textLength, /* fontFace */ mappedFontFace, /* isSideways */ false, - /* isRightToLeft */ a.bidiLevel & 1, - /* scriptAnalysis */ &scriptAnalysis, + /* isRightToLeft */ 0, + /* scriptAnalysis */ &a.analysis, /* localeName */ nullptr, /* numberSubstitution */ nullptr, /* features */ &features, @@ -827,8 +835,8 @@ void AtlasEngine::_mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 leng /* fontFace */ mappedFontFace, /* fontEmSize */ _p.s->font->fontSize, /* isSideways */ false, - /* isRightToLeft */ a.bidiLevel & 1, - /* scriptAnalysis */ &scriptAnalysis, + /* isRightToLeft */ 0, + /* scriptAnalysis */ &a.analysis, /* localeName */ nullptr, /* features */ &features, /* featureRangeLengths */ &featureRangeLengths, @@ -932,7 +940,7 @@ void AtlasEngine::_mapReplacementCharacter(u32 from, u32 to, ShapedRow& row) row.glyphIndices.emplace_back(nowMappingSoftFont ? ch : _api.replacementCharacterGlyphIndex); row.glyphAdvances.emplace_back(static_cast(cols * _p.s->font->cellSize.x)); - row.glyphOffsets.emplace_back(DWRITE_GLYPH_OFFSET{}); + row.glyphOffsets.emplace_back(); row.colors.emplace_back(colors[col1 << shift]); if (currentlyMappingSoftFont != nowMappingSoftFont) diff --git a/src/renderer/atlas/AtlasEngine.h b/src/renderer/atlas/AtlasEngine.h index 02dde6320e5..5b298c3a8a6 100644 --- a/src/renderer/atlas/AtlasEngine.h +++ b/src/renderer/atlas/AtlasEngine.h @@ -83,7 +83,7 @@ namespace Microsoft::Console::Render::Atlas void _recreateCellCountDependentResources(); void _flushBufferLine(); void _mapCharacters(const wchar_t* text, u32 textLength, u32* mappedLength, IDWriteFontFace2** mappedFontFace) const; - void _mapComplex(IDWriteFontFace* mappedFontFace, u32 idx, u32 length, ShapedRow& row); + void _mapComplex(IDWriteFontFace2* mappedFontFace, u32 idx, u32 length, ShapedRow& row); ATLAS_ATTR_COLD void _mapReplacementCharacter(u32 from, u32 to, ShapedRow& row); // AtlasEngine.api.cpp diff --git a/src/renderer/atlas/Backend.cpp b/src/renderer/atlas/Backend.cpp index 56f6edd92b2..5b97246ca27 100644 --- a/src/renderer/atlas/Backend.cpp +++ b/src/renderer/atlas/Backend.cpp @@ -170,6 +170,19 @@ void SwapChainManager::_updateMatrixTransform(const RenderingPayload& p) _fontGeneration = p.s->font.generation(); } +void Microsoft::Console::Render::Atlas::GlyphRunAccumulateBounds(const ID2D1DeviceContext* d2dRenderTarget, D2D1_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, D2D1_RECT_F& bounds) +{ + D2D1_RECT_F rect{}; + THROW_IF_FAILED(d2dRenderTarget->GetGlyphRunWorldBounds(baselineOrigin, glyphRun, DWRITE_MEASURING_MODE_NATURAL, &rect)); + if (rect.top < rect.bottom) + { + bounds.left = std::min(bounds.left, rect.left); + bounds.top = std::min(bounds.top, rect.top); + bounds.right = std::max(bounds.right, rect.right); + bounds.bottom = std::max(bounds.bottom, rect.bottom); + } +} + wil::com_ptr Microsoft::Console::Render::Atlas::TranslateColorGlyphRun(IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun) noexcept { static constexpr auto formats = @@ -192,91 +205,57 @@ wil::com_ptr Microsoft::Console::Render::Atlas: return enumerator; } -// Draws a `DWRITE_GLYPH_RUN` at `baselineOrigin` into the given `ID2D1DeviceContext`. -// `d2dRenderTarget4` and `dwriteFactory4` are optional and used to draw colored glyphs. -// Returns true if the `DWRITE_GLYPH_RUN` contained a color glyph. -bool Microsoft::Console::Render::Atlas::DrawGlyphRun(ID2D1DeviceContext* d2dRenderTarget, ID2D1DeviceContext4* d2dRenderTarget4, IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) +bool Microsoft::Console::Render::Atlas::ColorGlyphRunMoveNext(IDWriteColorGlyphRunEnumerator1* enumerator) { - // Support for ID2D1DeviceContext4 implies support for IDWriteFactory4 and vice versa. - if (const auto enumerator = TranslateColorGlyphRun(dwriteFactory4, baselineOrigin, glyphRun)) - { - DrawColorGlyphRun(d2dRenderTarget4, enumerator.get(), foregroundBrush); - return true; - } - else - { - DrawBasicGlyphRun(d2dRenderTarget, baselineOrigin, glyphRun, foregroundBrush); - return false; - } + BOOL hasRun; + THROW_IF_FAILED(enumerator->MoveNext(&hasRun)); + return hasRun; } -void Microsoft::Console::Render::Atlas::DrawBasicGlyphRun(ID2D1DeviceContext* d2dRenderTarget, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) noexcept +const DWRITE_COLOR_GLYPH_RUN1* Microsoft::Console::Render::Atlas::ColorGlyphRunGetCurrentRun(IDWriteColorGlyphRunEnumerator1* enumerator) { - d2dRenderTarget->DrawGlyphRun(baselineOrigin, glyphRun, foregroundBrush, DWRITE_MEASURING_MODE_NATURAL); + const DWRITE_COLOR_GLYPH_RUN1* colorGlyphRun = nullptr; + THROW_IF_FAILED(enumerator->GetCurrentRun(&colorGlyphRun)); + return colorGlyphRun; } -void Microsoft::Console::Render::Atlas::DrawColorGlyphRun(ID2D1DeviceContext4* d2dRenderTarget4, IDWriteColorGlyphRunEnumerator1* enumerator, ID2D1Brush* foregroundBrush) +void Microsoft::Console::Render::Atlas::ColorGlyphRunAccumulateBounds(const ID2D1DeviceContext* d2dRenderTarget, const DWRITE_COLOR_GLYPH_RUN1* colorGlyphRun, D2D1_RECT_F& bounds) { - const auto previousAntialiasingMode = d2dRenderTarget4->GetTextAntialiasMode(); - d2dRenderTarget4->SetTextAntialiasMode(D2D1_TEXT_ANTIALIAS_MODE_GRAYSCALE); - const auto cleanup = wil::scope_exit([&]() { - d2dRenderTarget4->SetTextAntialiasMode(previousAntialiasingMode); - }); - - wil::com_ptr solidBrush; + const D2D1_POINT_2F baselineOrigin{ colorGlyphRun->baselineOriginX, colorGlyphRun->baselineOriginY }; + GlyphRunAccumulateBounds(d2dRenderTarget, baselineOrigin, &colorGlyphRun->glyphRun, bounds); +} - for (;;) +void Microsoft::Console::Render::Atlas::ColorGlyphRunDraw(ID2D1DeviceContext4* d2dRenderTarget4, ID2D1SolidColorBrush* emojiBrush, ID2D1SolidColorBrush* foregroundBrush, const DWRITE_COLOR_GLYPH_RUN1* colorGlyphRun) noexcept +{ + ID2D1Brush* runBrush = nullptr; + if (colorGlyphRun->paletteIndex == /*DWRITE_NO_PALETTE_INDEX*/ 0xffff) { - BOOL hasRun; - THROW_IF_FAILED(enumerator->MoveNext(&hasRun)); - if (!hasRun) - { - break; - } - - const DWRITE_COLOR_GLYPH_RUN1* colorGlyphRun; - THROW_IF_FAILED(enumerator->GetCurrentRun(&colorGlyphRun)); - - ID2D1Brush* runBrush = nullptr; - if (colorGlyphRun->paletteIndex == /*DWRITE_NO_PALETTE_INDEX*/ 0xffff) - { - runBrush = foregroundBrush; - } - else - { - if (!solidBrush) - { - THROW_IF_FAILED(d2dRenderTarget4->CreateSolidColorBrush(colorGlyphRun->runColor, &solidBrush)); - } - else - { - solidBrush->SetColor(colorGlyphRun->runColor); - } - runBrush = solidBrush.get(); - } + runBrush = foregroundBrush; + } + else + { + emojiBrush->SetColor(&colorGlyphRun->runColor); + runBrush = emojiBrush; + } - const D2D1_POINT_2F runOrigin{ - colorGlyphRun->baselineOriginX, - colorGlyphRun->baselineOriginY, - }; + const D2D1_POINT_2F baselineOrigin{ colorGlyphRun->baselineOriginX, colorGlyphRun->baselineOriginY }; - switch (colorGlyphRun->glyphImageFormat) - { - case DWRITE_GLYPH_IMAGE_FORMATS_NONE: - break; - case DWRITE_GLYPH_IMAGE_FORMATS_PNG: - case DWRITE_GLYPH_IMAGE_FORMATS_JPEG: - case DWRITE_GLYPH_IMAGE_FORMATS_TIFF: - case DWRITE_GLYPH_IMAGE_FORMATS_PREMULTIPLIED_B8G8R8A8: - d2dRenderTarget4->DrawColorBitmapGlyphRun(colorGlyphRun->glyphImageFormat, runOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->measuringMode, D2D1_COLOR_BITMAP_GLYPH_SNAP_OPTION_DEFAULT); - break; - case DWRITE_GLYPH_IMAGE_FORMATS_SVG: - d2dRenderTarget4->DrawSvgGlyphRun(runOrigin, &colorGlyphRun->glyphRun, runBrush, nullptr, 0, colorGlyphRun->measuringMode); - break; - default: - d2dRenderTarget4->DrawGlyphRun(runOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->glyphRunDescription, runBrush, colorGlyphRun->measuringMode); - break; - } + switch (colorGlyphRun->glyphImageFormat) + { + case DWRITE_GLYPH_IMAGE_FORMATS_NONE: + break; + case DWRITE_GLYPH_IMAGE_FORMATS_PNG: + case DWRITE_GLYPH_IMAGE_FORMATS_JPEG: + case DWRITE_GLYPH_IMAGE_FORMATS_TIFF: + case DWRITE_GLYPH_IMAGE_FORMATS_PREMULTIPLIED_B8G8R8A8: + d2dRenderTarget4->DrawColorBitmapGlyphRun(colorGlyphRun->glyphImageFormat, baselineOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->measuringMode, D2D1_COLOR_BITMAP_GLYPH_SNAP_OPTION_DEFAULT); + break; + case DWRITE_GLYPH_IMAGE_FORMATS_SVG: + d2dRenderTarget4->DrawSvgGlyphRun(baselineOrigin, &colorGlyphRun->glyphRun, runBrush, nullptr, 0, colorGlyphRun->measuringMode); + break; + default: + d2dRenderTarget4->DrawGlyphRun(baselineOrigin, &colorGlyphRun->glyphRun, colorGlyphRun->glyphRunDescription, runBrush, colorGlyphRun->measuringMode); + break; } } diff --git a/src/renderer/atlas/Backend.h b/src/renderer/atlas/Backend.h index f89e70d41a8..40c33b64151 100644 --- a/src/renderer/atlas/Backend.h +++ b/src/renderer/atlas/Backend.h @@ -118,8 +118,12 @@ namespace Microsoft::Console::Render::Atlas return val < min ? min : (max < val ? max : val); } + inline constexpr D2D1_RECT_F GlyphRunEmptyBounds{ 1e38f, 1e38f, -1e38f, -1e38f }; + void GlyphRunAccumulateBounds(const ID2D1DeviceContext* d2dRenderTarget, D2D1_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, D2D1_RECT_F& bounds); + wil::com_ptr TranslateColorGlyphRun(IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun) noexcept; - bool DrawGlyphRun(ID2D1DeviceContext* d2dRenderTarget, ID2D1DeviceContext4* d2dRenderTarget4, IDWriteFactory4* dwriteFactory4, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush); - void DrawBasicGlyphRun(ID2D1DeviceContext* d2dRenderTarget, D2D_POINT_2F baselineOrigin, const DWRITE_GLYPH_RUN* glyphRun, ID2D1Brush* foregroundBrush) noexcept; - void DrawColorGlyphRun(ID2D1DeviceContext4* d2dRenderTarget4, IDWriteColorGlyphRunEnumerator1* enumerator, ID2D1Brush* foregroundBrush); + bool ColorGlyphRunMoveNext(IDWriteColorGlyphRunEnumerator1* enumerator); + const DWRITE_COLOR_GLYPH_RUN1* ColorGlyphRunGetCurrentRun(IDWriteColorGlyphRunEnumerator1* enumerator); + void ColorGlyphRunAccumulateBounds(const ID2D1DeviceContext* d2dRenderTarget, const DWRITE_COLOR_GLYPH_RUN1* colorGlyphRun, D2D1_RECT_F& bounds); + void ColorGlyphRunDraw(ID2D1DeviceContext4* d2dRenderTarget4, ID2D1SolidColorBrush* emojiBrush, ID2D1SolidColorBrush* foregroundBrush, const DWRITE_COLOR_GLYPH_RUN1* colorGlyphRun) noexcept; } diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index 07548ff74f2..66ea4095556 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -23,20 +23,6 @@ TIL_FAST_MATH_BEGIN using namespace Microsoft::Console::Render::Atlas; -template<> -struct ::std::hash -{ - constexpr size_t operator()(u32 key) const noexcept - { - return til::flat_set_hash_integer(key); - } - - constexpr size_t operator()(const BackendD2D::CachedBrush& slot) const noexcept - { - return til::flat_set_hash_integer(slot.color); - } -}; - BackendD2D::BackendD2D(wil::com_ptr device, wil::com_ptr deviceContext) noexcept : _device{ std::move(device) }, _deviceContext{ std::move(deviceContext) } @@ -58,7 +44,6 @@ void BackendD2D::Render(RenderingPayload& p) _drawBackground(p); _drawCursorPart1(p); _drawText(p); - _drawGridlines(p); _drawCursorPart2(p); _drawSelection(p); #if ATLAS_DEBUG_SHOW_DIRTY @@ -122,13 +107,18 @@ void BackendD2D::_handleSettingsUpdate(const RenderingPayload& p) _renderTarget->SetUnitMode(D2D1_UNIT_MODE_PIXELS); _renderTarget->SetAntialiasMode(D2D1_ANTIALIAS_MODE_ALIASED); } - _clearBrushes(); + { + static constexpr D2D1_COLOR_F color{}; + THROW_IF_FAILED(_renderTarget->CreateSolidColorBrush(&color, nullptr, _emojiBrush.put())); + THROW_IF_FAILED(_renderTarget->CreateSolidColorBrush(&color, nullptr, _brush.put())); + _brushColor = 0; + } } if (!_dottedStrokeStyle) { static constexpr D2D1_STROKE_STYLE_PROPERTIES props{ .dashStyle = D2D1_DASH_STYLE_CUSTOM }; - static constexpr FLOAT dashes[2]{ 1, 2 }; + static constexpr FLOAT dashes[2]{ 1, 1 }; THROW_IF_FAILED(p.d2dFactory->CreateStrokeStyle(&props, &dashes[0], 2, _dottedStrokeStyle.addressof())); } @@ -209,59 +199,25 @@ void BackendD2D::_drawText(RenderingPayload& p) auto baselineX = 0.0f; auto baselineY = static_cast(p.s->font->cellSize.y * y + p.s->font->baseline); - if (p.invalidatedRows.contains(y)) + if (row->lineRendition != LineRendition::SingleWidth) { - for (const auto& m : row->mappings) + // If you print the top half of a double height row (DECDHL), the expectation is that only + // the top half is visible, which requires us to keep the clip rect at the bottom of the row. + // (Vice versa for the bottom half of a double height row.) + if (row->lineRendition >= LineRendition::DoubleHeightTop) { - if (!m.fontFace) + D2D1_RECT_F clipRect{ 0, 0, static_cast(p.s->targetSize.x), static_cast(p.s->targetSize.y) }; + if (row->lineRendition == LineRendition::DoubleHeightTop) { - continue; + clipRect.bottom = static_cast(row->dirtyBottom); } - - const DWRITE_GLYPH_RUN glyphRun{ - .fontFace = m.fontFace.get(), - .fontEmSize = p.s->font->fontSize, - .glyphCount = gsl::narrow_cast(m.glyphsTo - m.glyphsFrom), - .glyphIndices = &row->glyphIndices[m.glyphsFrom], - .glyphAdvances = &row->glyphAdvances[m.glyphsFrom], - .glyphOffsets = &row->glyphOffsets[m.glyphsFrom], - }; - - D2D1_RECT_F bounds{}; - THROW_IF_FAILED(_renderTarget->GetGlyphRunWorldBounds({ 0.0f, baselineY }, &glyphRun, DWRITE_MEASURING_MODE_NATURAL, &bounds)); - - if (bounds.top < bounds.bottom) + else { - // If you print the top half of a double height row (DECDHL), the expectation is that only - // the top half is visible, which requires us to keep the clip rect at the bottom of the row. - // (Vice versa for the bottom half of a double height row.) - // - // Since we used SetUnitMode(D2D1_UNIT_MODE_PIXELS), bounds.top/bottom is in pixels already and requires no conversion nor rounding. - if (row->lineRendition != LineRendition::DoubleHeightBottom) - { - row->dirtyTop = std::min(row->dirtyTop, static_cast(lrintf(bounds.top))); - } - if (row->lineRendition != LineRendition::DoubleHeightTop) - { - row->dirtyBottom = std::max(row->dirtyBottom, static_cast(lrintf(bounds.bottom))); - } + clipRect.top = static_cast(row->dirtyTop); } + _renderTarget->PushAxisAlignedClip(&clipRect, D2D1_ANTIALIAS_MODE_ALIASED); } - dirtyTop = std::min(dirtyTop, row->dirtyTop); - dirtyBottom = std::max(dirtyBottom, row->dirtyBottom); - } - - const D2D1_RECT_F clipRect{ - 0, - static_cast(row->dirtyTop), - static_cast(p.s->targetSize.x), - static_cast(row->dirtyBottom), - }; - _renderTarget->PushAxisAlignedClip(&clipRect, D2D1_ANTIALIAS_MODE_ALIASED); - - if (row->lineRendition != LineRendition::SingleWidth) - { baselineY = _drawTextPrepareLineRendition(p, baselineY, row->lineRendition); } @@ -298,7 +254,35 @@ void BackendD2D::_drawText(RenderingPayload& p) if (glyphRun.fontFace) { - DrawGlyphRun(_renderTarget.get(), _renderTarget4.get(), p.dwriteFactory4.get(), baselineOrigin, &glyphRun, brush); + D2D1_RECT_F bounds = GlyphRunEmptyBounds; + + if (const auto enumerator = TranslateColorGlyphRun(p.dwriteFactory4.get(), baselineOrigin, &glyphRun)) + { + while (ColorGlyphRunMoveNext(enumerator.get())) + { + const auto colorGlyphRun = ColorGlyphRunGetCurrentRun(enumerator.get()); + ColorGlyphRunDraw(_renderTarget4.get(), _emojiBrush.get(), brush, colorGlyphRun); + ColorGlyphRunAccumulateBounds(_renderTarget.get(), colorGlyphRun, bounds); + } + } + else + { + _renderTarget->DrawGlyphRun(baselineOrigin, &glyphRun, brush, DWRITE_MEASURING_MODE_NATURAL); + GlyphRunAccumulateBounds(_renderTarget.get(), baselineOrigin, &glyphRun, bounds); + } + + if (bounds.top < bounds.bottom) + { + // Since we used SetUnitMode(D2D1_UNIT_MODE_PIXELS), bounds.top/bottom is in pixels already and requires no conversion/rounding. + if (row->lineRendition != LineRendition::DoubleHeightTop) + { + row->dirtyBottom = std::max(row->dirtyBottom, static_cast(lrintf(bounds.bottom))); + } + if (row->lineRendition != LineRendition::DoubleHeightBottom) + { + row->dirtyTop = std::min(row->dirtyTop, static_cast(lrintf(bounds.top))); + } + } } for (UINT32 i = 0; i < glyphRun.glyphCount; ++i) @@ -308,12 +292,26 @@ void BackendD2D::_drawText(RenderingPayload& p) } } + if (!row->gridLineRanges.empty()) + { + _drawGridlineRow(p, row, y); + } + if (row->lineRendition != LineRendition::SingleWidth) { _drawTextResetLineRendition(); + + if (row->lineRendition >= LineRendition::DoubleHeightTop) + { + _renderTarget->PopAxisAlignedClip(); + } } - _renderTarget->PopAxisAlignedClip(); + if (p.invalidatedRows.contains(y)) + { + dirtyTop = std::min(dirtyTop, row->dirtyTop); + dirtyBottom = std::max(dirtyBottom, row->dirtyBottom); + } ++y; } @@ -327,7 +325,6 @@ void BackendD2D::_drawText(RenderingPayload& p) f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const noexcept { - const auto descender = static_cast(p.s->font->cellSize.y - p.s->font->baseline); D2D1_MATRIX_3X2_F transform{ .m11 = 2.0f, .m22 = 1.0f, @@ -336,7 +333,7 @@ f32 BackendD2D::_drawTextPrepareLineRendition(const RenderingPayload& p, f32 bas if (lineRendition >= LineRendition::DoubleHeightTop) { transform.m22 = 2.0f; - transform.dy = -1.0f * (baselineY + descender); + transform.dy = -1.0f * (baselineY + p.s->font->descender); if (lineRendition == LineRendition::DoubleHeightTop) { @@ -416,116 +413,87 @@ f32r BackendD2D::_getGlyphRunDesignBounds(const DWRITE_GLYPH_RUN& glyphRun, f32 return accumulatedBounds; } -void BackendD2D::_drawGridlines(const RenderingPayload& p) +void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y) { - u16 y = 0; - for (const auto row : p.rows) - { - if (!row->gridLineRanges.empty()) + const auto widthShift = gsl::narrow_cast(row->lineRendition != LineRendition::SingleWidth); + const auto cellSize = p.s->font->cellSize; + const auto rowTop = gsl::narrow_cast(cellSize.y * y); + const auto rowBottom = gsl::narrow_cast(rowTop + cellSize.y); + const auto textCellCenter = row->lineRendition == LineRendition::DoubleHeightTop ? rowBottom : rowTop; + + const auto appendVerticalLines = [&](const GridLineRange& r, FontDecorationPosition pos) { + const auto from = r.from >> widthShift; + const auto to = r.to >> widthShift; + + auto posX = from * cellSize.x + pos.position; + const auto end = to * cellSize.x; + + D2D1_POINT_2F point0{ 0, static_cast(textCellCenter) }; + D2D1_POINT_2F point1{ 0, static_cast(textCellCenter + cellSize.y) }; + const auto brush = _brushWithColor(r.color); + const f32 w = pos.height; + const f32 hw = w * 0.5f; + + for (; posX < end; posX += cellSize.x) { - _drawGridlineRow(p, row, y); + const auto centerX = posX + hw; + point0.x = centerX; + point1.x = centerX; + _renderTarget->DrawLine(point0, point1, brush, w, nullptr); } - y++; - } -} - -void BackendD2D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y) -{ - const auto columnToPx = [&](til::CoordType i) { - return static_cast(i * p.s->font->cellSize.x); }; - const auto rowToPx = [&](til::CoordType i) { - return static_cast(i * p.s->font->cellSize.y); + const auto appendHorizontalLine = [&](const GridLineRange& r, FontDecorationPosition pos, ID2D1StrokeStyle* strokeStyle) { + const auto from = r.from >> widthShift; + const auto to = r.to >> widthShift; + + const auto brush = _brushWithColor(r.color); + const f32 w = pos.height; + const f32 centerY = textCellCenter + pos.position + w * 0.5f; + const D2D1_POINT_2F point0{ static_cast(from * cellSize.x), centerY }; + const D2D1_POINT_2F point1{ static_cast(to * cellSize.x), centerY }; + _renderTarget->DrawLine(point0, point1, brush, w, strokeStyle); }; - const auto top = rowToPx(y); - const auto bottom = top + p.s->font->cellSize.y; - for (const auto& r : row->gridLineRanges) { // AtlasEngine.cpp shouldn't add any gridlines if they don't do anything. assert(r.lines.any()); - const auto left = columnToPx(r.from); - const auto right = columnToPx(r.to); - D2D1_RECT_F rect{}; - if (r.lines.test(GridLines::Left)) { - rect.top = top; - rect.bottom = bottom; - for (auto i = r.from; i < r.to; ++i) - { - rect.left = columnToPx(i); - rect.right = rect.left + p.s->font->thinLineWidth; - _fillRectangle(rect, r.color); - } + appendVerticalLines(r, p.s->font->gridLeft); } if (r.lines.test(GridLines::Right)) { - rect.top = top; - rect.bottom = bottom; - for (auto i = r.to; i > r.from; --i) - { - rect.right = columnToPx(i); - rect.left = rect.right - p.s->font->thinLineWidth; - _fillRectangle(rect, r.color); - } + appendVerticalLines(r, p.s->font->gridRight); } if (r.lines.test(GridLines::Top)) { - rect.left = left; - rect.top = top; - rect.right = right; - rect.bottom = rect.top + p.s->font->thinLineWidth; - _fillRectangle(rect, r.color); + appendHorizontalLine(r, p.s->font->gridTop, nullptr); } if (r.lines.test(GridLines::Bottom)) { - rect.left = left; - rect.top = bottom - p.s->font->thinLineWidth; - rect.right = right; - rect.bottom = bottom; - _fillRectangle(rect, r.color); + appendHorizontalLine(r, p.s->font->gridBottom, nullptr); } + if (r.lines.test(GridLines::Underline)) { - rect.left = left; - rect.top = top + p.s->font->underlinePos; - rect.right = right; - rect.bottom = rect.top + p.s->font->underlineWidth; - _fillRectangle(rect, r.color); + appendHorizontalLine(r, p.s->font->underline, nullptr); } if (r.lines.test(GridLines::HyperlinkUnderline)) { - const auto w = p.s->font->underlineWidth; - const auto centerY = (top + p.s->font->underlinePos) + w * 0.5f; - const auto brush = _brushWithColor(r.color); - const D2D1_POINT_2F point0{ static_cast(left), centerY }; - const D2D1_POINT_2F point1{ static_cast(right), centerY }; - _renderTarget->DrawLine(point0, point1, brush, w, _dottedStrokeStyle.get()); + appendHorizontalLine(r, p.s->font->underline, _dottedStrokeStyle.get()); } if (r.lines.test(GridLines::DoubleUnderline)) { - rect.left = left; - rect.top = top + p.s->font->doubleUnderlinePos.x; - rect.right = right; - rect.bottom = rect.top + p.s->font->thinLineWidth; - _fillRectangle(rect, r.color); - - rect.left = left; - rect.top = top + p.s->font->doubleUnderlinePos.y; - rect.right = right; - rect.bottom = rect.top + p.s->font->thinLineWidth; - _fillRectangle(rect, r.color); + for (const auto pos : p.s->font->doubleUnderline) + { + appendHorizontalLine(r, pos, nullptr); + } } if (r.lines.test(GridLines::Strikethrough)) { - rect.left = left; - rect.top = top + p.s->font->strikethroughPos; - rect.right = right; - rect.bottom = rect.top + p.s->font->strikethroughWidth; - _fillRectangle(rect, r.color); + appendHorizontalLine(r, p.s->font->strikethrough, nullptr); } } } @@ -647,8 +615,8 @@ void BackendD2D::_drawCursor(const RenderingPayload& p, ID2D1RenderTarget* rende renderTarget->FillRectangle(&rect, brush); break; case CursorType::Underscore: - rect.top += p.s->font->underlinePos; - rect.bottom = rect.top + p.s->font->underlineWidth; + rect.top += p.s->font->underline.position; + rect.bottom = rect.top + p.s->font->underline.height; renderTarget->FillRectangle(&rect, brush); break; case CursorType::EmptyBox: @@ -668,10 +636,10 @@ void BackendD2D::_drawCursor(const RenderingPayload& p, ID2D1RenderTarget* rende case CursorType::DoubleUnderscore: { auto rect2 = rect; - rect2.top = rect.top + p.s->font->doubleUnderlinePos.x; + rect2.top = rect.top + p.s->font->doubleUnderline[0].position; rect2.bottom = rect2.top + p.s->font->thinLineWidth; renderTarget->FillRectangle(&rect2, brush); - rect.top = rect.top + p.s->font->doubleUnderlinePos.y; + rect.top = rect.top + p.s->font->doubleUnderline[1].position; rect.bottom = rect.top + p.s->font->thinLineWidth; renderTarget->FillRectangle(&rect, brush); break; @@ -740,29 +708,21 @@ void BackendD2D::_debugDumpRenderTarget(const RenderingPayload& p) } #endif -ID2D1Brush* BackendD2D::_brushWithColor(u32 color) +ID2D1SolidColorBrush* BackendD2D::_brushWithColor(u32 color) { - if (_brushes.size() >= 16) + if (_brushColor != color) { - _clearBrushes(); + _brushWithColorUpdate(color); } - - const auto [cached, inserted] = _brushes.insert(color); - if (inserted) - { - const auto d2dColor = colorFromU32(color); - THROW_IF_FAILED(_renderTarget->CreateSolidColorBrush(&d2dColor, nullptr, cached.brush.addressof())); - } - - return cached.brush.get(); + return _brush.get(); } -void BackendD2D::_clearBrushes() const noexcept +ID2D1SolidColorBrush* BackendD2D::_brushWithColorUpdate(u32 color) { - for (auto& slot : _brushes.container()) - { - slot.brush.reset(); - } + const auto d2dColor = colorFromU32(color); + _brush->SetColor(&d2dColor); + _brushColor = color; + return _brush.get(); } void BackendD2D::_fillRectangle(const D2D1_RECT_F& rect, u32 color) diff --git a/src/renderer/atlas/BackendD2D.h b/src/renderer/atlas/BackendD2D.h index ddeb938bc43..6a01a1a48db 100644 --- a/src/renderer/atlas/BackendD2D.h +++ b/src/renderer/atlas/BackendD2D.h @@ -17,28 +17,6 @@ namespace Microsoft::Console::Render::Atlas bool RequiresContinuousRedraw() noexcept override; void WaitUntilCanRender() noexcept override; - struct CachedBrush - { - wil::com_ptr brush; - u32 color = 0; - - constexpr bool operator==(u32 key) const noexcept - { - return color == key; - } - - operator bool() const noexcept - { - return static_cast(brush); - } - - constexpr CachedBrush& operator=(u32 key) noexcept - { - color = key; - return *this; - } - }; - private: ATLAS_ATTR_COLD void _handleSettingsUpdate(const RenderingPayload& p); void _drawBackground(const RenderingPayload& p) noexcept; @@ -46,7 +24,6 @@ namespace Microsoft::Console::Render::Atlas f32 _drawTextPrepareLineRendition(const RenderingPayload& p, f32 baselineY, LineRendition lineRendition) const noexcept; void _drawTextResetLineRendition() const noexcept; ATLAS_ATTR_COLD f32r _getGlyphRunDesignBounds(const DWRITE_GLYPH_RUN& glyphRun, f32 baselineX, f32 baselineY); - void _drawGridlines(const RenderingPayload& p); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); void _drawCursorWithColor(const RenderingPayload& p); void _drawCursorPart1(const RenderingPayload& p); @@ -56,8 +33,8 @@ namespace Microsoft::Console::Render::Atlas void _drawSelection(const RenderingPayload& p); void _debugShowDirty(const RenderingPayload& p); void _debugDumpRenderTarget(const RenderingPayload& p); - ID2D1Brush* _brushWithColor(u32 color); - ATLAS_ATTR_COLD void _clearBrushes() const noexcept; + ATLAS_ATTR_COLD ID2D1SolidColorBrush* _brushWithColor(u32 color); + ATLAS_ATTR_COLD ID2D1SolidColorBrush* _brushWithColorUpdate(u32 color); void _fillRectangle(const D2D1_RECT_F& rect, u32 color); SwapChainManager _swapChainManager; @@ -75,7 +52,9 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _cursorBitmap; til::size _cursorBitmapSize; // in columns/rows - til::linear_flat_set _brushes; + wil::com_ptr _emojiBrush; + wil::com_ptr _brush; + u32 _brushColor = 0; Buffer _glyphMetrics; diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 7e69d5ec144..9c5bc00ab93 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -222,7 +222,7 @@ void BackendD3D::Render(RenderingPayload& p) #endif // After a Present() the render target becomes unbound. - _deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); + _deviceContext->OMSetRenderTargets(1, _customRenderTargetView ? _customRenderTargetView.addressof() : _renderTargetView.addressof(), nullptr); // Invalidating the render target helps with spotting invalid quad instances and Present1() bugs. #if ATLAS_DEBUG_SHOW_DIRTY || ATLAS_DEBUG_DUMP_RENDER_TARGET @@ -235,7 +235,6 @@ void BackendD3D::Render(RenderingPayload& p) _drawBackground(p); _drawCursorPart1(p); _drawText(p); - _drawGridlines(p); _drawCursorPart2(p); _drawSelection(p); #if ATLAS_DEBUG_SHOW_DIRTY @@ -499,10 +498,6 @@ void BackendD3D::_recreateCustomRenderTargetView(u16x2 targetSize) _customOffscreenTexture.reset(); _customOffscreenTextureView.reset(); - // This causes our regular rendered contents to end up in the offscreen texture. We'll then use the - // `_customRenderTargetView` to render into the swap chain using the custom (user provided) shader. - _customRenderTargetView = std::move(_renderTargetView); - const D3D11_TEXTURE2D_DESC desc{ .Width = targetSize.x, .Height = targetSize.y, @@ -514,7 +509,7 @@ void BackendD3D::_recreateCustomRenderTargetView(u16x2 targetSize) }; THROW_IF_FAILED(_device->CreateTexture2D(&desc, nullptr, _customOffscreenTexture.addressof())); THROW_IF_FAILED(_device->CreateShaderResourceView(_customOffscreenTexture.get(), nullptr, _customOffscreenTextureView.addressof())); - THROW_IF_FAILED(_device->CreateRenderTargetView(_customOffscreenTexture.get(), nullptr, _renderTargetView.addressof())); + THROW_IF_FAILED(_device->CreateRenderTargetView(_customOffscreenTexture.get(), nullptr, _customRenderTargetView.addressof())); } void BackendD3D::_recreateBackgroundColorBitmap(u16x2 cellCount) @@ -559,7 +554,7 @@ void BackendD3D::_recreateConstBuffer(const RenderingPayload& p) const data.cellCount = { static_cast(p.s->cellCount.x), static_cast(p.s->cellCount.y) }; DWrite_GetGammaRatios(_gamma, data.gammaRatios); data.enhancedContrast = p.s->font->antialiasingMode == AntialiasingMode::ClearType ? _cleartypeEnhancedContrast : _grayscaleEnhancedContrast; - data.dashedLineLength = p.s->font->underlineWidth * 3.0f; + data.underlineWidth = p.s->font->underline.height; _deviceContext->UpdateSubresource(_psConstantBuffer.get(), 0, nullptr, &data, 0, 0); } } @@ -593,7 +588,7 @@ void BackendD3D::_setupDeviceContextState(const RenderingPayload& p) // OM: Output Merger _deviceContext->OMSetBlendState(_blendState.get(), nullptr, 0xffffffff); - _deviceContext->OMSetRenderTargets(1, _renderTargetView.addressof(), nullptr); + _deviceContext->OMSetRenderTargets(1, _customRenderTargetView ? _customRenderTargetView.addressof() : _renderTargetView.addressof(), nullptr); } #ifndef NDEBUG @@ -791,6 +786,7 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) { static constexpr D2D1_COLOR_F color{ 1, 1, 1, 1 }; + THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _emojiBrush.put())); THROW_IF_FAILED(_d2dRenderTarget->CreateSolidColorBrush(&color, nullptr, _brush.put())); } @@ -815,10 +811,7 @@ void BackendD3D::_resetGlyphAtlas(const RenderingPayload& p) void BackendD3D::_markStateChange(ID3D11BlendState* blendState) { - _instancesStateChanges.emplace_back(StateChange{ - .blendState = blendState, - .offset = _instancesCount, - }); + _instancesStateChanges.emplace_back(blendState, _instancesCount); } BackendD3D::QuadInstance& BackendD3D::_getLastQuad() noexcept @@ -996,8 +989,20 @@ void BackendD3D::_drawText(RenderingPayload& p) for (const auto row : p.rows) { f32 baselineX = 0; - const auto baselineY = y * p.s->font->cellSize.y + p.s->font->baseline; - const auto lineRenditionScale = static_cast(row->lineRendition != LineRendition::SingleWidth); + f32 baselineY = y * p.s->font->cellSize.y + p.s->font->baseline; + f32 scaleX = 1; + f32 scaleY = 1; + + if (row->lineRendition != LineRendition::SingleWidth) + { + scaleX = 2; + + if (row->lineRendition >= LineRendition::DoubleHeightTop) + { + scaleY = 2; + baselineY /= 2; + } + } for (const auto& m : row->mappings) { @@ -1017,7 +1022,7 @@ void BackendD3D::_drawText(RenderingPayload& p) { const auto [glyphEntry, inserted] = fontFaceEntry.glyphs.insert(row->glyphIndices[x]); - if (inserted && !_drawGlyph(p, row->glyphAdvances[x], fontFaceEntry, glyphEntry)) + if (inserted && !_drawGlyph(p, fontFaceEntry, glyphEntry)) { // A deadlock in this retry loop is detected in _drawGlyphPrepareRetry. // @@ -1030,12 +1035,8 @@ void BackendD3D::_drawText(RenderingPayload& p) if (glyphEntry.data.GetShadingType() != ShadingType::Default) { - auto l = static_cast(lrintf(baselineX + row->glyphOffsets[x].advanceOffset)); - auto t = static_cast(lrintf(baselineY - row->glyphOffsets[x].ascenderOffset)); - - // A non-standard line rendition will make characters appear twice as wide, which requires us to scale the baseline advance by 2. - // We need to do this before applying the glyph offset however, since the offset is already 2x scaled in case of such glyphs. - l <<= lineRenditionScale; + auto l = static_cast(lrintf((baselineX + row->glyphOffsets[x].advanceOffset) * scaleX)); + auto t = static_cast(lrintf((baselineY - row->glyphOffsets[x].ascenderOffset) * scaleY)); l += glyphEntry.data.offset.x; t += glyphEntry.data.offset.y; @@ -1062,6 +1063,11 @@ void BackendD3D::_drawText(RenderingPayload& p) } } + if (!row->gridLineRanges.empty()) + { + _drawGridlineRow(p, row, y); + } + if (p.invalidatedRows.contains(y)) { dirtyTop = std::min(dirtyTop, row->dirtyTop); @@ -1084,33 +1090,46 @@ void BackendD3D::_drawText(RenderingPayload& p) // translate text like "!=" into a glyph that looks like "≠" (just 2 columns wide and not 1). // Glyphs like that still need to be colored in potentially multiple colors however, so this // function will handle these ligatures by splitting them up into multiple QuadInstances. +// +// It works by iteratively splitting the wide glyph into shorter and shorter segments like so +// (whitespaces indicate that the glyph was split up in a leading and trailing half): +// Renderer + Renderer -->|owns| RenderThread + Renderer -.-> RenderEngineBase + %% Mermaid.js has no support for backwards arrow at the moment + RenderEngineBase <-.->|extends| GdiEngine + RenderEngineBase <-.->|extends| DxEngine + Renderer ----> AtlasEngine + AtlasEngine.cpp <--> AtlasEngine.api.cpp + AtlasEngine.cpp <--> AtlasEngine.r.cpp + AtlasEngine.r.cpp --> BackendD2D.cpp + AtlasEngine.r.cpp --> BackendD3D.cpp + BackendD2D.cpp -.- Backend.cpp + BackendD3D.cpp -.- Backend.cpp +``` + +As you can see, breaking the text buffer down into GDI-style primitives just to rebuild them into DirectWrite ones, is pretty wasteful. It's also incredibly bug prone. It would be beneficial if the TextBuffer and rendering settings were given directly to AtlasEngine so it can do its own bidding. + +## BackendD3D + +The primary entrypoint for rendering is `IBackend::Render` and `BackendD3D` implements it via the following functions, by calling them one by one in the order listed here. + +### `_handleSettingsUpdate` + +```mermaid +graph TD + Render --> _handleSettingsUpdate + _handleSettingsUpdate -->|font changes| _updateFontDependents --> _d2dRenderTargetUpdateFontSettings + _handleSettingsUpdate -->|misc changes| _recreateCustomShader + _handleSettingsUpdate --->|misc changes| _recreateCustomRenderTargetView + _handleSettingsUpdate ---->|size changes| _recreateBackgroundColorBitmap + _handleSettingsUpdate -----> _recreateConstBuffer + _handleSettingsUpdate ------> _setupDeviceContextState +``` + +### `_drawBackground` + +```mermaid +graph TD + Render --> _drawBackground + _drawBackground --> _uploadBackgroundBitmap +``` + +### `_drawCursorPart1` / `_drawCursorPart2` + +```mermaid +graph TD + Render --> _drawCursorPart1["_drawCursorPart1\nruns before _drawText\ndraws cursors that are behind the text"] + Render --> _drawCursorPart2["_drawCursorPart2\nruns after _drawText\ndraws inverted cursors"] + _drawCursorPart1 -.->|_cursorRects| _drawCursorPart2 +``` + +### `_drawText` + +```mermaid +graph TD + Render --> _drawText + + _drawText --> foreachRow(("for each row")) + foreachRow --> foreachRow + foreachRow --> foreachFont(("for each font face")) + foreachFont --> foreachFont + foreachFont --> foreachGlyph(("for each glyph")) + foreachGlyph --> foreachGlyph + + foreachGlyph --> _glyphAtlasMap[("font/glyph-pair lookup in\nglyph cache hashmap")] + _glyphAtlasMap --> drawGlyph + drawGlyph --> _appendQuad["_appendQuad\nstages the glyph for later drawing"] + _glyphAtlasMap --> _appendQuad + + subgraph drawGlyph["if glyph is missing"] + _drawGlyph["_drawGlyph\n(defers to _drawSoftFontGlyph for soft fonts)"] + + _drawGlyph -.->|if glpyh cache is full| _drawGlyphPrepareRetry + _drawGlyphPrepareRetry --> _flushQuads["_flushQuads\ndraws the current state\ninto the render target"] + _flushQuads --> _recreateInstanceBuffers["_recreateInstanceBuffers\nallocates a GPU buffer\nfor our glyph instances"] + _drawGlyphPrepareRetry --> _resetGlyphAtlas["_resetGlyphAtlas\nclears the glyph texture"] + _resetGlyphAtlas --> _resizeGlyphAtlas["_resizeGlyphAtlas\nresizes the glyph texture if it's still small"] + + _drawGlyph -.->|if it's a DECDHL glyph| _splitDoubleHeightGlyph["_splitDoubleHeightGlyph\nDECDHL glyphs are split up into their\ntop/bottom halves to emulate clip rects"] + end + + foreachGlyph -.-> _drawTextOverlapSplit["_drawTextOverlapSplit\nsplits overly wide glyphs up into smaller chunks to support\nforeground color changes within the ligature"] + + foreachRow -.->|if gridlines exist| _drawGridlineRow["_drawGridlineRow\ndraws underlines, etc."] +``` + +### `_drawSelection` + +```mermaid +graph TD + Render --> _drawSelection +``` + +### `_handleSettingsUpdate` + +```mermaid +graph TD + Render --> _executeCustomShader +``` diff --git a/src/renderer/atlas/atlas.vcxproj b/src/renderer/atlas/atlas.vcxproj index 03c6c9e52df..689ec6ff8d7 100644 --- a/src/renderer/atlas/atlas.vcxproj +++ b/src/renderer/atlas/atlas.vcxproj @@ -88,6 +88,9 @@ /O3 /Qstrip_debug /Qstrip_reflect %(AdditionalOptions) + + + From 900b6a9d53dbc6da8ecd37489607eee28879674b Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Tue, 25 Apr 2023 20:08:13 +0200 Subject: [PATCH 34/37] Change the inverted cursor rendering approach --- src/renderer/atlas/AtlasEngine.cpp | 133 ++++++++++++----------- src/renderer/atlas/BackendD2D.cpp | 68 ++++-------- src/renderer/atlas/BackendD3D.cpp | 163 +++++++++++++++-------------- src/renderer/atlas/BackendD3D.h | 31 ++---- src/renderer/atlas/common.h | 10 ++ 5 files changed, 197 insertions(+), 208 deletions(-) diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index eb6657182a4..a5f8a95cb6a 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -87,75 +87,88 @@ try _api.invalidatedRows.start = std::min(_api.invalidatedRows.start, _p.s->cellCount.y); _api.invalidatedRows.end = clamp(_api.invalidatedRows.end, _api.invalidatedRows.start, _p.s->cellCount.y); } + + const auto allInvalid = _api.invalidatedRows == range{ 0, _p.s->cellCount.y }; + + // Avoid scrolling if everything's invalid anyways. This isn't here for performance or correctness + // (the code also works without this), but rather because it helps me reason about the way this works. + // For instance it ensures we don't pass a scroll rect to Present1() when effectively nothing is scrolling. + if (allInvalid) { - const auto limit = gsl::narrow_cast(_p.s->cellCount.y & 0x7fff); - _api.scrollOffset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); + _api.scrollOffset = 0; } - - // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". - if (const auto offset = _api.scrollOffset) + else { - const auto nothingInvalid = _api.invalidatedRows.start == _api.invalidatedRows.end; - - if (offset < 0) - { - // scrollOffset/offset = -1 - // +----------+ +----------+ - // | | | xxxxxxxxx| - // | xxxxxxxxx| -> |xxxxxxx | - // |xxxxxxx | | | - // +----------+ +----------+ - const u16 begRow = _p.s->cellCount.y + offset; - _api.invalidatedRows.start = nothingInvalid ? begRow : std::min(_api.invalidatedRows.start, begRow); - _api.invalidatedRows.end = _p.s->cellCount.y; - - const auto dst = std::copy_n(_p.rows.begin() - offset, _p.rows.size() + offset, _p.rowsScratch.begin()); - std::copy_n(_p.rows.begin(), -offset, dst); - } - else - { - // scrollOffset/offset = 1 - // +----------+ +----------+ - // | xxxxxxxxx| | | - // |xxxxxxx | -> | xxxxxxxxx| - // | | |xxxxxxx | - // +----------+ +----------+ - const u16 endRow = offset; - _api.invalidatedRows.start = 0; - _api.invalidatedRows.end = nothingInvalid ? endRow : std::max(_api.invalidatedRows.end, endRow); - - const auto dst = std::copy_n(_p.rows.end() - offset, offset, _p.rowsScratch.begin()); - std::copy_n(_p.rows.begin(), _p.rows.size() - offset, dst); - } + const auto limit = gsl::narrow_cast(_p.s->cellCount.y & 0x7fff); + const auto offset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); - std::swap(_p.rows, _p.rowsScratch); + _api.scrollOffset = offset; - // Scrolling the background bitmap is a lot easier because we can rely on memmove which works - // with both forwards and backwards copying. It's a mystery why the STL doesn't have this. + // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". + if (offset) { - const auto srcOffset = std::max(0, -offset) * gsl::narrow_cast(_p.colorBitmapRowStride); - const auto dstOffset = std::max(0, offset) * gsl::narrow_cast(_p.colorBitmapRowStride); - const auto count = _p.colorBitmapDepthStride - std::max(srcOffset, dstOffset); - assert(dstOffset >= 0 && dstOffset + count <= _p.colorBitmapDepthStride); - assert(srcOffset >= 0 && srcOffset + count <= _p.colorBitmapDepthStride); + const auto nothingInvalid = _api.invalidatedRows.start == _api.invalidatedRows.end; - auto src = _p.colorBitmap.data() + srcOffset; - auto dst = _p.colorBitmap.data() + dstOffset; - const auto bytes = count * sizeof(u32); + if (offset < 0) + { + // scrollOffset/offset = -1 + // +----------+ +----------+ + // | | | xxxxxxxxx| + // | xxxxxxxxx| -> |xxxxxxx | + // |xxxxxxx | | | + // +----------+ +----------+ + const u16 begRow = _p.s->cellCount.y + offset; + _api.invalidatedRows.start = nothingInvalid ? begRow : std::min(_api.invalidatedRows.start, begRow); + _api.invalidatedRows.end = _p.s->cellCount.y; + + const auto dst = std::copy_n(_p.rows.begin() - offset, _p.rows.size() + offset, _p.rowsScratch.begin()); + std::copy_n(_p.rows.begin(), -offset, dst); + } + else + { + // scrollOffset/offset = 1 + // +----------+ +----------+ + // | xxxxxxxxx| | | + // |xxxxxxx | -> | xxxxxxxxx| + // | | |xxxxxxx | + // +----------+ +----------+ + const u16 endRow = offset; + _api.invalidatedRows.start = 0; + _api.invalidatedRows.end = nothingInvalid ? endRow : std::max(_api.invalidatedRows.end, endRow); + + const auto dst = std::copy_n(_p.rows.end() - offset, offset, _p.rowsScratch.begin()); + std::copy_n(_p.rows.begin(), _p.rows.size() - offset, dst); + } - for (size_t i = 0; i < 2; ++i) + std::swap(_p.rows, _p.rowsScratch); + + // Scrolling the background bitmap is a lot easier because we can rely on memmove which works + // with both forwards and backwards copying. It's a mystery why the STL doesn't have this. { - // Avoid bumping the colorBitmapGeneration unless necessary. This approx. further halves - // the (already small) GPU load. This could easily be replaced with some custom SIMD - // to avoid going over the memory twice, but... that's a story for another day. - if (memcmp(dst, src, bytes) != 0) + const auto srcOffset = std::max(0, -offset) * gsl::narrow_cast(_p.colorBitmapRowStride); + const auto dstOffset = std::max(0, offset) * gsl::narrow_cast(_p.colorBitmapRowStride); + const auto count = _p.colorBitmapDepthStride - std::max(srcOffset, dstOffset); + assert(dstOffset >= 0 && dstOffset + count <= _p.colorBitmapDepthStride); + assert(srcOffset >= 0 && srcOffset + count <= _p.colorBitmapDepthStride); + + auto src = _p.colorBitmap.data() + srcOffset; + auto dst = _p.colorBitmap.data() + dstOffset; + const auto bytes = count * sizeof(u32); + + for (size_t i = 0; i < 2; ++i) { - memmove(dst, src, bytes); - _p.colorBitmapGenerations[i].bump(); + // Avoid bumping the colorBitmapGeneration unless necessary. This approx. further halves + // the (already small) GPU load. This could easily be replaced with some custom SIMD + // to avoid going over the memory twice, but... that's a story for another day. + if (memcmp(dst, src, bytes) != 0) + { + memmove(dst, src, bytes); + _p.colorBitmapGenerations[i].bump(); + } + + src += _p.colorBitmapDepthStride; + dst += _p.colorBitmapDepthStride; } - - src += _p.colorBitmapDepthStride; - dst += _p.colorBitmapDepthStride; } } } @@ -177,7 +190,7 @@ try _p.cursorRect = {}; _p.scrollOffset = _api.scrollOffset; - if (_api.invalidatedRows.start != _api.invalidatedRows.end) + if (_api.invalidatedRows.non_empty()) { const auto deltaPx = _api.scrollOffset * _p.s->font->cellSize.y; const til::CoordType targetSizeX = _p.s->targetSize.x; @@ -213,7 +226,7 @@ try // I feel a little bit like this is a hack, but I'm not sure how to better express this. // This ensures that we end up calling Present1() without dirty rects if the swap chain is // recreated/resized, because DXGI requires you to then call Present1() without dirty rects. - if (_api.invalidatedRows == range{ 0, _p.s->cellCount.y }) + if (allInvalid) { _p.dirtyRectInPx.top = 0; _p.dirtyRectInPx.bottom = targetSizeY; diff --git a/src/renderer/atlas/BackendD2D.cpp b/src/renderer/atlas/BackendD2D.cpp index a57e1358255..f9e14275117 100644 --- a/src/renderer/atlas/BackendD2D.cpp +++ b/src/renderer/atlas/BackendD2D.cpp @@ -484,38 +484,17 @@ void BackendD2D::_drawCursorPart1(const RenderingPayload& p) } const auto cursorColor = p.s->cursor->cursorColor; - if (cursorColor == 0xffffffff) - { - const auto cursorSize = p.cursorRect.size(); - if (cursorSize != _cursorBitmapSize) - { - _resizeCursorBitmap(p, cursorSize); - } - - const auto backgroundBitmapOffset = p.cursorRect.top * p.colorBitmapRowStride; - const auto cellSizeX = static_cast(p.s->font->cellSize.x); - const auto cellSizeY = static_cast(p.s->font->cellSize.y); - const auto offsetX = p.cursorRect.left * cellSizeX; - const auto offsetY = p.cursorRect.top * cellSizeY; - D2D1_RECT_F srcRect{ - .bottom = cursorSize.height * cellSizeY, - }; - D2D1_RECT_F dstRect{ - .top = offsetY, - .bottom = offsetY + srcRect.bottom, + if (cursorColor != 0xffffffff) + { + const D2D1_RECT_F rect{ + static_cast(p.cursorRect.left * p.s->font->cellSize.x), + static_cast(p.cursorRect.top * p.s->font->cellSize.y), + static_cast(p.cursorRect.right * p.s->font->cellSize.x), + static_cast(p.cursorRect.bottom * p.s->font->cellSize.y), }; - - for (til::CoordType x = 0; x < cursorSize.width; ++x) - { - const auto bg = p.backgroundBitmap[backgroundBitmapOffset + x]; - const auto brush = _brushWithColor(bg ^ 0x3f3f3f); - srcRect.left = x * cellSizeX; - srcRect.right = srcRect.left + cellSizeX; - dstRect.left = srcRect.left + offsetX; - dstRect.right = srcRect.right + offsetX; - _renderTarget->FillOpacityMask(_cursorBitmap.get(), brush, &dstRect, &srcRect); - } + const auto brush = _brushWithColor(cursorColor); + _drawCursor(p, _renderTarget.get(), rect, brush); } } @@ -526,26 +505,19 @@ void BackendD2D::_drawCursorPart2(const RenderingPayload& p) return; } - const auto cursorColor = p.s->cursor->cursorColor; - const D2D1_POINT_2F target{ - static_cast(p.cursorRect.left * p.s->font->cellSize.x), - static_cast(p.cursorRect.top * p.s->font->cellSize.y), - }; - - if (cursorColor == 0xffffffff) - { - _renderTarget->DrawImage(_cursorBitmap.get(), &target, nullptr, D2D1_INTERPOLATION_MODE_NEAREST_NEIGHBOR, D2D1_COMPOSITE_MODE_MASK_INVERT); - } - else + if (p.s->cursor->cursorColor == 0xffffffff) { - const D2D1_RECT_F rect{ - target.x, - target.y, - static_cast(p.cursorRect.right * p.s->font->cellSize.x), - static_cast(p.cursorRect.bottom * p.s->font->cellSize.y), + const auto cursorSize = p.cursorRect.size(); + if (cursorSize != _cursorBitmapSize) + { + _resizeCursorBitmap(p, cursorSize); + } + + const D2D1_POINT_2F target{ + static_cast(p.cursorRect.left * p.s->font->cellSize.x), + static_cast(p.cursorRect.top * p.s->font->cellSize.y), }; - const auto brush = _brushWithColor(cursorColor); - _drawCursor(p, _renderTarget.get(), rect, brush); + _renderTarget->DrawImage(_cursorBitmap.get(), &target, nullptr, D2D1_INTERPOLATION_MODE_NEAREST_NEIGHBOR, D2D1_COMPOSITE_MODE_MASK_INVERT); } } diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 2bc7cea4f6b..fa50b8070b1 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -177,24 +177,6 @@ BackendD3D::BackendD3D(const RenderingPayload& p) THROW_IF_FAILED(p.device->CreateBlendState(&desc, _blendState.addressof())); } - { - static constexpr D3D11_BLEND_DESC desc{ - .RenderTarget = { { - .BlendEnable = TRUE, - .SrcBlend = D3D11_BLEND_ONE, - .DestBlend = D3D11_BLEND_ONE, - .BlendOp = D3D11_BLEND_OP_SUBTRACT, - // In order for D3D to be okay with us using dual source blending in the shader, we need to use dual - // source blending in the blend state. Alternatively we could write an extra shader for these cursors. - .SrcBlendAlpha = D3D11_BLEND_SRC1_ALPHA, - .DestBlendAlpha = D3D11_BLEND_ZERO, - .BlendOpAlpha = D3D11_BLEND_OP_ADD, - .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, - } }, - }; - THROW_IF_FAILED(p.device->CreateBlendState(&desc, _blendStateInvert.addressof())); - } - #ifndef NDEBUG _sourceDirectory = std::filesystem::path{ __FILE__ }.parent_path(); _sourceCodeWatcher = wil::make_folder_change_reader_nothrow(_sourceDirectory.c_str(), false, wil::FolderChangeEvents::FileName | wil::FolderChangeEvents::LastWriteTime, [this](wil::FolderChangeEvent, PCWSTR path) { @@ -239,9 +221,9 @@ void BackendD3D::Render(RenderingPayload& p) #endif _drawBackground(p); - _drawCursorPart1(p); + _drawCursorBackground(p); _drawText(p); - _drawCursorPart2(p); + //_drawCursorPart2(p); _drawSelection(p); #if ATLAS_DEBUG_SHOW_DIRTY _debugShowDirty(p); @@ -803,11 +785,6 @@ void BackendD3D::_resizeGlyphAtlas(const RenderingPayload& p, const u16 u, const _rectPackerData = Buffer{ u }; } -void BackendD3D::_markStateChange(ID3D11BlendState* blendState) -{ - _instancesStateChanges.emplace_back(blendState, _instancesCount); -} - BackendD3D::QuadInstance& BackendD3D::_getLastQuad() noexcept { assert(_instancesCount != 0); @@ -847,6 +824,11 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) return; } + if (p.s->cursor->cursorColor == 0xffffffff && !_cursorRects.empty()) + { + _drawCursorInvert(); + } + // TODO: Shrink instances buffer if (_instancesCount > _instanceBufferCapacity) { @@ -879,24 +861,7 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) // Instead I found that packing instance data as tightly as possible made the biggest performance difference, // and packing 16 bit integers with ID3D11InputLayout is quite a bit more convenient too. - // This will cause the loop below to emit one final DrawIndexedInstanced() for the remainder of instances. - _markStateChange(nullptr); - - size_t previousOffset = 0; - for (const auto& state : _instancesStateChanges) - { - if (const auto count = state.offset - previousOffset) - { - p.deviceContext->DrawIndexedInstanced(6, count, 0, 0, previousOffset); - } - if (state.blendState) - { - p.deviceContext->OMSetBlendState(state.blendState, nullptr, 0xffffffff); - } - previousOffset = state.offset; - } - - _instancesStateChanges.clear(); + p.deviceContext->DrawIndexedInstanced(6, static_cast(_instancesCount), 0, 0, 0); _instancesCount = 0; } @@ -945,7 +910,6 @@ void BackendD3D::_drawBackground(const RenderingPayload& p) .shadingType = ShadingType::Background, .size = p.s->targetSize, }; - _flushQuads(p); } void BackendD3D::_uploadBackgroundBitmap(const RenderingPayload& p) @@ -1256,18 +1220,18 @@ bool BackendD3D::_drawGlyph(const RenderingPayload& p, const BackendD3D::AtlasFo // This calculates the black box of the glyph, or in other words, // it's extents/size relative to its baseline origin (at 0,0). // - // box.top --------++-----######--+ + // bounds.top ------++-----######--+ // (-7) || ############ // ||#### #### // |### ##### - // baseline _____ |### #####| - // origin \ |############# | - // (= 0,0) \||########### | + // baseline ______ |### #####| + // origin \|############# | + // (= 0,0) \|########### | // ++-------###---+ // ## ### | - // box.bottom -----+#########-----+ + // bounds.bottom ---+#########-----+ // (+2) | | - // box.left box.right + // bounds.left bounds.right // (-1) (+14) // @@ -1625,7 +1589,7 @@ void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* ro } } -void BackendD3D::_drawCursorPart1(const RenderingPayload& p) +void BackendD3D::_drawCursorBackground(const RenderingPayload& p) { _cursorRects.clear(); @@ -1654,7 +1618,7 @@ void BackendD3D::_drawCursorPart1(const RenderingPayload& p) static_cast(p.s->font->cellSize.x * (x1 - x0)), p.s->font->cellSize.y, }; - const auto color = cursorColor == 0xffffffff ? bg ^ 0x3f3f3f : cursorColor; + const auto color = cursorColor == 0xffffffff ? bg ^ 0xc0c0c0 : cursorColor; auto& c0 = _cursorRects.emplace_back(position, size, color); switch (static_cast(p.s->cursor->cursorType)) @@ -1716,51 +1680,92 @@ void BackendD3D::_drawCursorPart1(const RenderingPayload& p) } } + for (const auto& c : _cursorRects) + { + _appendQuad() = { + .shadingType = ShadingType::SolidFill, + .position = c.position, + .size = c.size, + .color = c.color, + }; + } + if (cursorColor == 0xffffffff) { for (auto& c : _cursorRects) { - _appendQuad() = { - .shadingType = ShadingType::SolidFill, - .position = c.position, - .size = c.size, - .color = c.color, - }; c.color = 0xffffffff; } } } -void BackendD3D::_drawCursorPart2(const RenderingPayload& p) +void BackendD3D::_drawCursorInvert() { - if (_cursorRects.empty()) + // NOTE: _appendQuad() may reallocate the _instances vector. It's important to iterate + // by index, because pointers (or iterators) would get invalidated. It's also important + // to cache the original _instancesCount since it'll get changed with each append. + const auto instancesCount = _instancesCount; + + for (const auto& c : _cursorRects) { - return; - } + const int cursorL = c.position.x; + const int cursorT = c.position.y; + const int cursorR = cursorL + c.size.x; + const int cursorB = cursorT + c.size.y; - const auto color = p.s->cursor->cursorColor; + for (size_t i = 0; i < instancesCount; ++i) + { + const auto& it = _instances[i]; + const auto shadingType = it.shadingType; - if (color == 0xffffffff) - { - _markStateChange(_blendStateInvert.get()); - } + if (shadingType < ShadingType::TextGrayscale || shadingType > ShadingType::TextClearType) + { + continue; + } - for (const auto& c : _cursorRects) - { - _appendQuad() = { - .shadingType = ShadingType::SolidFill, - .position = c.position, - .size = c.size, - .color = c.color, - }; - } + const int instanceL = it.position.x; + const int instanceT = it.position.y; + const int instanceR = instanceL + it.size.x; + const int instanceB = instanceT + it.size.y; - if (color == 0xffffffff) - { - _markStateChange(_blendState.get()); + if (instanceL < cursorR && cursorL < instanceR && instanceT < cursorB && cursorT < instanceB) + { + // The _instances vector is _huge_ (easily up to 100k items) whereas only 1-2 items will actually overlap + // with the cursor. --> Make this loop more compact by putting as much as possible into a function call. + _drawCursorInvertSlowPath(c, it); + } + } } } +void BackendD3D::_drawCursorInvertSlowPath(const CursorRect& c, const QuadInstance& it) +{ + const int cursorL = c.position.x; + const int cursorT = c.position.y; + const int cursorR = cursorL + c.size.x; + const int cursorB = cursorT + c.size.y; + + const int instanceL = it.position.x; + const int instanceT = it.position.y; + const int instanceR = instanceL + it.size.x; + const int instanceB = instanceT + it.size.y; + + const auto l = std::max(cursorL, instanceL); + const auto t = std::max(cursorT, instanceT); + const auto w = std::min(cursorR, instanceR) - l; + const auto h = std::min(cursorB, instanceB) - t; + const auto u = it.texcoord.x + l - instanceL; + const auto v = it.texcoord.y + t - instanceT; + + _appendQuad() = { + it.shadingType, + { static_cast(l), static_cast(t) }, + { static_cast(w), static_cast(h) }, + { static_cast(u), static_cast(v) }, + it.color ^ 0x00c0c0c0, + }; +} + void BackendD3D::_drawSelection(const RenderingPayload& p) { u16 y = 0; diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index 7dfbbe6bd56..a5e1cd8e700 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -172,6 +172,13 @@ namespace Microsoft::Console::Render::Atlas }; private: + struct CursorRect + { + i16x2 position; + u16x2 size; + u32 color; + }; + ATLAS_ATTR_COLD void _handleSettingsUpdate(const RenderingPayload& p); void _updateFontDependents(const RenderingPayload& p); void _d2dRenderTargetUpdateFontSettings(const RenderingPayload& p) const noexcept; @@ -187,7 +194,6 @@ namespace Microsoft::Console::Render::Atlas void _d2dEndDrawing(); ATLAS_ATTR_COLD void _resetGlyphAtlas(const RenderingPayload& p); ATLAS_ATTR_COLD void _resizeGlyphAtlas(const RenderingPayload& p, u16 u, u16 v); - void _markStateChange(ID3D11BlendState* blendState); QuadInstance& _getLastQuad() noexcept; QuadInstance& _appendQuad(); ATLAS_ATTR_COLD void _bumpInstancesSize(); @@ -202,8 +208,9 @@ namespace Microsoft::Console::Render::Atlas void _drawGlyphPrepareRetry(const RenderingPayload& p); void _splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); - void _drawCursorPart1(const RenderingPayload& p); - void _drawCursorPart2(const RenderingPayload& p); + void _drawCursorBackground(const RenderingPayload& p); + ATLAS_ATTR_COLD void _drawCursorInvert(); + ATLAS_ATTR_COLD void _drawCursorInvertSlowPath(const CursorRect& c, const QuadInstance& it); void _drawSelection(const RenderingPayload& p); void _executeCustomShader(RenderingPayload& p); @@ -212,7 +219,6 @@ namespace Microsoft::Console::Render::Atlas wil::com_ptr _vertexShader; wil::com_ptr _pixelShader; wil::com_ptr _blendState; - wil::com_ptr _blendStateInvert; wil::com_ptr _vsConstantBuffer; wil::com_ptr _psConstantBuffer; wil::com_ptr _vertexBuffer; @@ -222,17 +228,6 @@ namespace Microsoft::Console::Render::Atlas Buffer _instances; size_t _instancesCount = 0; - // This allows us to batch inverted cursors into the same - // _instanceBuffer upload as the rest of all other instances. - struct StateChange - { - ID3D11BlendState* blendState; - size_t offset; - }; - // 3 allows for 1 state change to _blendStateInvert, followed by 1 change back to _blendState, - // and finally 1 entry to signal the past-the-end size, as used by _flushQuads. - til::small_vector _instancesStateChanges; - wil::com_ptr _customRenderTargetView; wil::com_ptr _customOffscreenTexture; wil::com_ptr _customOffscreenTextureView; @@ -276,12 +271,6 @@ namespace Microsoft::Console::Render::Atlas // An empty-box cursor spanning a wide glyph that has different // background colors on each side results in 6 lines being drawn. - struct CursorRect - { - i16x2 position; - u16x2 size; - u32 color; - }; til::small_vector _cursorRects; bool _requiresContinuousRedraw = false; diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 84bb3263c52..42a02894fa0 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -108,6 +108,16 @@ namespace Microsoft::Console::Render::Atlas ATLAS_POD_OPS(range) + constexpr bool empty() const noexcept + { + return start >= end; + } + + constexpr bool non_empty() const noexcept + { + return start < end; + } + constexpr bool contains(T v) const noexcept { return v >= start && v < end; From b1590cc5a4a891f6229dc95cc7e734d0e7d9b4de Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 26 Apr 2023 00:51:28 +0200 Subject: [PATCH 35/37] Improve inverted cursor via hole punching --- src/renderer/atlas/BackendD3D.cpp | 115 +++++++++++++++++++------- src/renderer/atlas/BackendD3D.h | 5 +- src/renderer/atlas/shader_common.hlsl | 1 + src/renderer/atlas/shader_ps.hlsl | 1 - src/tools/RenderingTests/main.cpp | 2 + 5 files changed, 89 insertions(+), 35 deletions(-) diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index fa50b8070b1..84d2556b21c 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -223,7 +223,6 @@ void BackendD3D::Render(RenderingPayload& p) _drawBackground(p); _drawCursorBackground(p); _drawText(p); - //_drawCursorPart2(p); _drawSelection(p); #if ATLAS_DEBUG_SHOW_DIRTY _debugShowDirty(p); @@ -806,7 +805,8 @@ BackendD3D::QuadInstance& BackendD3D::_appendQuad() void BackendD3D::_bumpInstancesSize() { - const auto newSize = std::max(256, _instances.size() * 2); + auto newSize = std::max(_instancesCount, _instances.size() * 2); + newSize = std::max(size_t{ 256 }, newSize); Expects(newSize > _instances.size()); // Our render loop heavily relies on memcpy() which is up to between 1.5x (Intel) @@ -1023,7 +1023,7 @@ void BackendD3D::_drawText(RenderingPayload& p) if (!row->gridLineRanges.empty()) { - _drawGridlineRow(p, row, y); + _drawGridlines(p, y); } if (p.invalidatedRows.contains(y)) @@ -1484,8 +1484,10 @@ void BackendD3D::_splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasF } } -void BackendD3D::_drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y) +void BackendD3D::_drawGridlines(const RenderingPayload& p, u16 y) { + const auto row = p.rows[y]; + const auto horizontalShift = static_cast(row->lineRendition != LineRendition::SingleWidth); const auto verticalShift = static_cast(row->lineRendition >= LineRendition::DoubleHeightTop); @@ -1683,22 +1685,15 @@ void BackendD3D::_drawCursorBackground(const RenderingPayload& p) for (const auto& c : _cursorRects) { _appendQuad() = { - .shadingType = ShadingType::SolidFill, + .shadingType = ShadingType::Cursor, .position = c.position, .size = c.size, .color = c.color, }; } - - if (cursorColor == 0xffffffff) - { - for (auto& c : _cursorRects) - { - c.color = 0xffffffff; - } - } } +// TODO: _drawCursorInvert() only creates new quads and thus draws on top of the existing glyphs, making them too fat void BackendD3D::_drawCursorInvert() { // NOTE: _appendQuad() may reallocate the _instances vector. It's important to iterate @@ -1718,28 +1713,29 @@ void BackendD3D::_drawCursorInvert() const auto& it = _instances[i]; const auto shadingType = it.shadingType; - if (shadingType < ShadingType::TextGrayscale || shadingType > ShadingType::TextClearType) + if (shadingType >= ShadingType::TextGrayscale && shadingType <= ShadingType::SolidFill) { - continue; - } + const int instanceL = it.position.x; + const int instanceT = it.position.y; + const int instanceR = instanceL + it.size.x; + const int instanceB = instanceT + it.size.y; - const int instanceL = it.position.x; - const int instanceT = it.position.y; - const int instanceR = instanceL + it.size.x; - const int instanceB = instanceT + it.size.y; - - if (instanceL < cursorR && cursorL < instanceR && instanceT < cursorB && cursorT < instanceB) - { - // The _instances vector is _huge_ (easily up to 100k items) whereas only 1-2 items will actually overlap - // with the cursor. --> Make this loop more compact by putting as much as possible into a function call. - _drawCursorInvertSlowPath(c, it); + if (instanceL < cursorR && cursorL < instanceR && instanceT < cursorB && cursorT < instanceB) + { + // The _instances vector is _huge_ (easily up to 100k items) whereas only 1-2 items will actually overlap + // with the cursor. --> Make this loop more compact by putting as much as possible into a function call. + _drawCursorInvertSlowPath(c, i); + } } } } } -void BackendD3D::_drawCursorInvertSlowPath(const CursorRect& c, const QuadInstance& it) +void BackendD3D::_drawCursorInvertSlowPath(const CursorRect& c, size_t position) { + // NOTE: _bumpInstancesSize may reallocate below. Create a copy of `it` beforehand. + const auto it = _instances[position]; + const int cursorL = c.position.x; const int cursorT = c.position.y; const int cursorR = cursorL + c.size.x; @@ -1750,14 +1746,69 @@ void BackendD3D::_drawCursorInvertSlowPath(const CursorRect& c, const QuadInstan const int instanceR = instanceL + it.size.x; const int instanceB = instanceT + it.size.y; - const auto l = std::max(cursorL, instanceL); - const auto t = std::max(cursorT, instanceT); - const auto w = std::min(cursorR, instanceR) - l; - const auto h = std::min(cursorB, instanceB) - t; + const auto l = std::max(cursorL, instanceL); + const auto t = std::max(cursorT, instanceT); + const auto r = std::min(cursorR, instanceR); + const auto b = std::min(cursorB, instanceB); + + // Cut a hole into the original glyph and split it up. This ensures the original glyph + // doesn't dirty the cursor background with its un-inverted/reversed color. + rect cutouts[4]; + size_t cutoutCount = 0; + if (instanceT < t) + { + cutouts[cutoutCount++] = { instanceL, instanceT, instanceR, t }; + } + if (b < instanceB) + { + cutouts[cutoutCount++] = { instanceL, b, instanceR, instanceB }; + } + if (instanceL < l) + { + cutouts[cutoutCount++] = { instanceL, t, l, b }; + } + if (r < instanceR) + { + cutouts[cutoutCount++] = { r, t, instanceR, b }; + } + + if (cutoutCount > 1) + { + const auto delta = cutoutCount - 1; + + _instancesCount += delta; + if (_instancesCount >= _instances.size()) + { + _bumpInstancesSize(); + } + + // Make place for cutoutCount-many items at position. + const auto src = _instances.data() + position; + memmove(src + delta, src, (_instancesCount - position) * sizeof(QuadInstance)); + } + + for (size_t i = 0; i < cutoutCount; ++i) + { + const auto& cutout = cutouts[i]; + auto& target = _instances[position + i]; + const auto w = cutout.right - cutout.left; + const auto h = cutout.bottom - cutout.top; + const auto u = it.texcoord.x + cutout.left - instanceL; + const auto v = it.texcoord.y + cutout.top - instanceT; + + target = it; + target.position = { static_cast(cutout.left), static_cast(cutout.top) }; + target.size = { static_cast(w), static_cast(h) }; + target.texcoord = { static_cast(u), static_cast(v) }; + } + + const auto w = r - l; + const auto h = b - t; const auto u = it.texcoord.x + l - instanceL; const auto v = it.texcoord.y + t - instanceT; + auto& target = cutoutCount ? _appendQuad() : _instances[position]; - _appendQuad() = { + target = { it.shadingType, { static_cast(l), static_cast(t) }, { static_cast(w), static_cast(h) }, diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index a5e1cd8e700..e54a4ddd54e 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -65,6 +65,7 @@ namespace Microsoft::Console::Render::Atlas DottedLine = 4, DottedLineWide = 5, SolidFill = 6, + Cursor = 7, }; // NOTE: Don't initialize any members in this struct. This ensures that no @@ -207,10 +208,10 @@ namespace Microsoft::Console::Render::Atlas bool _drawSoftFontGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); void _drawGlyphPrepareRetry(const RenderingPayload& p); void _splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); - void _drawGridlineRow(const RenderingPayload& p, const ShapedRow* row, u16 y); + void _drawGridlines(const RenderingPayload& p, u16 y); void _drawCursorBackground(const RenderingPayload& p); ATLAS_ATTR_COLD void _drawCursorInvert(); - ATLAS_ATTR_COLD void _drawCursorInvertSlowPath(const CursorRect& c, const QuadInstance& it); + ATLAS_ATTR_COLD void _drawCursorInvertSlowPath(const CursorRect& c, size_t position); void _drawSelection(const RenderingPayload& p); void _executeCustomShader(RenderingPayload& p); diff --git a/src/renderer/atlas/shader_common.hlsl b/src/renderer/atlas/shader_common.hlsl index eb356cc8622..13a229dc00f 100644 --- a/src/renderer/atlas/shader_common.hlsl +++ b/src/renderer/atlas/shader_common.hlsl @@ -9,6 +9,7 @@ #define SHADING_TYPE_DOTTED_LINE 4 #define SHADING_TYPE_DOTTED_LINE_WIDE 5 #define SHADING_TYPE_SOLID_FILL 6 +#define SHADING_TYPE_CURSOR 7 // clang-format on struct VSData diff --git a/src/renderer/atlas/shader_ps.hlsl b/src/renderer/atlas/shader_ps.hlsl index 4627a927b13..691b06c9661 100644 --- a/src/renderer/atlas/shader_ps.hlsl +++ b/src/renderer/atlas/shader_ps.hlsl @@ -85,7 +85,6 @@ Output main(PSData data) : SV_Target weights = color.aaaa; break; } - case SHADING_TYPE_SOLID_FILL: default: { color = premultiplyColor(data.color); diff --git a/src/tools/RenderingTests/main.cpp b/src/tools/RenderingTests/main.cpp index 03285d23115..9db85e1e840 100644 --- a/src/tools/RenderingTests/main.cpp +++ b/src/tools/RenderingTests/main.cpp @@ -149,6 +149,8 @@ int main() }; static constexpr VTAttributeTest vtAttributeTests[]{ { L"ANSI escape SGR:", 0 }, + { L"bold", 1 }, + { L"faint", 2 }, { L"italic", 3 }, { L"underline", 4 }, { L"reverse", 7 }, From 990f57a985bf8f9952de339c8933b40645de9582 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 26 Apr 2023 13:18:24 +0200 Subject: [PATCH 36/37] Fix hole punching algorithm, Implement semi-reverse cursors --- src/renderer/atlas/BackendD3D.cpp | 232 +++++++++++++++++--------- src/renderer/atlas/BackendD3D.h | 20 ++- src/renderer/atlas/shader_common.hlsl | 2 - 3 files changed, 171 insertions(+), 83 deletions(-) diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index 84d2556b21c..a1c4bfbe74c 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -824,9 +824,9 @@ void BackendD3D::_flushQuads(const RenderingPayload& p) return; } - if (p.s->cursor->cursorColor == 0xffffffff && !_cursorRects.empty()) + if (!_cursorRects.empty()) { - _drawCursorInvert(); + _drawCursorForeground(p); } // TODO: Shrink instances buffer @@ -1517,7 +1517,7 @@ void BackendD3D::_drawGridlines(const RenderingPayload& p, u16 y) for (; posX < end; posX += textCellWidth) { _appendQuad() = { - .shadingType = ShadingType::SolidFill, + .shadingType = ShadingType::SolidLine, .position = { static_cast(posX), rowTop }, .size = { width, p.s->font->cellSize.y }, .color = r.color, @@ -1562,16 +1562,16 @@ void BackendD3D::_drawGridlines(const RenderingPayload& p, u16 y) } if (r.lines.test(GridLines::Top)) { - appendHorizontalLine(r, p.s->font->gridTop, ShadingType::SolidFill); + appendHorizontalLine(r, p.s->font->gridTop, ShadingType::SolidLine); } if (r.lines.test(GridLines::Bottom)) { - appendHorizontalLine(r, p.s->font->gridBottom, ShadingType::SolidFill); + appendHorizontalLine(r, p.s->font->gridBottom, ShadingType::SolidLine); } if (r.lines.test(GridLines::Underline)) { - appendHorizontalLine(r, p.s->font->underline, ShadingType::SolidFill); + appendHorizontalLine(r, p.s->font->underline, ShadingType::SolidLine); } if (r.lines.test(GridLines::HyperlinkUnderline)) { @@ -1581,12 +1581,12 @@ void BackendD3D::_drawGridlines(const RenderingPayload& p, u16 y) { for (const auto pos : p.s->font->doubleUnderline) { - appendHorizontalLine(r, pos, ShadingType::SolidFill); + appendHorizontalLine(r, pos, ShadingType::SolidLine); } } if (r.lines.test(GridLines::Strikethrough)) { - appendHorizontalLine(r, p.s->font->strikethrough, ShadingType::SolidFill); + appendHorizontalLine(r, p.s->font->strikethrough, ShadingType::SolidLine); } } } @@ -1600,6 +1600,13 @@ void BackendD3D::_drawCursorBackground(const RenderingPayload& p) return; } + _cursorPosition = { + p.s->font->cellSize.x * p.cursorRect.left, + p.s->font->cellSize.y * p.cursorRect.top, + p.s->font->cellSize.x * p.cursorRect.right, + p.s->font->cellSize.y * p.cursorRect.bottom, + }; + const auto cursorColor = p.s->cursor->cursorColor; const auto offset = p.cursorRect.top * p.colorBitmapRowStride; @@ -1620,8 +1627,10 @@ void BackendD3D::_drawCursorBackground(const RenderingPayload& p) static_cast(p.s->font->cellSize.x * (x1 - x0)), p.s->font->cellSize.y, }; - const auto color = cursorColor == 0xffffffff ? bg ^ 0xc0c0c0 : cursorColor; - auto& c0 = _cursorRects.emplace_back(position, size, color); + const auto isInverted = cursorColor == 0xffffffff; + const auto background = isInverted ? bg ^ 0xc0c0c0 : cursorColor; + const auto foreground = isInverted ? 0 : bg; + auto& c0 = _cursorRects.emplace_back(position, size, background, foreground); switch (static_cast(p.s->cursor->cursorType)) { @@ -1688,19 +1697,69 @@ void BackendD3D::_drawCursorBackground(const RenderingPayload& p) .shadingType = ShadingType::Cursor, .position = c.position, .size = c.size, - .color = c.color, + .color = c.background, }; } } -// TODO: _drawCursorInvert() only creates new quads and thus draws on top of the existing glyphs, making them too fat -void BackendD3D::_drawCursorInvert() +void BackendD3D::_drawCursorForeground(const RenderingPayload& p) { // NOTE: _appendQuad() may reallocate the _instances vector. It's important to iterate // by index, because pointers (or iterators) would get invalidated. It's also important // to cache the original _instancesCount since it'll get changed with each append. - const auto instancesCount = _instancesCount; + auto instancesCount = _instancesCount; + size_t instancesOffset = 0; + + assert(instancesCount != 0); + + // All of the text drawing primitives are drawn as a single block, after drawing + // the background and cursor background and before drawing the selection overlay. + // To avoid having to check the shadingType in the loop below, we'll find the + // start and end of this "block" here in advance. + for (; instancesOffset < instancesCount; ++instancesOffset) + { + const auto shadingType = _instances[instancesOffset].shadingType; + if (shadingType >= ShadingType::TextDrawingFirst && shadingType <= ShadingType::TextDrawingLast) + { + break; + } + } + // We can also skip any instances (= any rows) at the beginning that are clearly not overlapping with + // the cursor. This reduces the the CPU cost of this function by roughly half (a few microseconds). + for (; instancesOffset < instancesCount; ++instancesOffset) + { + const auto& it = _instances[instancesOffset]; + if ((it.position.y + it.size.y) > _cursorPosition.top) + { + break; + } + } + // Now do the same thing as above, but backwards from the end. + for (; instancesCount > instancesOffset; --instancesCount) + { + const auto shadingType = _instances[instancesCount - 1].shadingType; + if (shadingType >= ShadingType::TextDrawingFirst && shadingType <= ShadingType::TextDrawingLast) + { + break; + } + } + for (; instancesCount > instancesOffset; --instancesCount) + { + const auto& it = _instances[instancesCount - 1]; + if (it.position.y < _cursorPosition.bottom) + { + break; + } + } + + // For cursors with multiple rectangles this really isn't all that fast, because it iterates + // over the instances vector multiple times. But I also don't really care, because the + // double-underline and empty-box cursors are pretty annoying to deal with in any case. + // + // It would definitely help if instead of position & size QuadInstances would use left/top/right/bottom + // with f32, because then computing the intersection would be much faster via SIMD. But that would + // make the struct size larger and cost more power to transmit more data to the GPU. ugh. for (const auto& c : _cursorRects) { const int cursorL = c.position.x; @@ -1708,33 +1767,32 @@ void BackendD3D::_drawCursorInvert() const int cursorR = cursorL + c.size.x; const int cursorB = cursorT + c.size.y; - for (size_t i = 0; i < instancesCount; ++i) + for (size_t i = instancesOffset; i < instancesCount; ++i) { const auto& it = _instances[i]; - const auto shadingType = it.shadingType; + const int instanceL = it.position.x; + const int instanceT = it.position.y; + const int instanceR = instanceL + it.size.x; + const int instanceB = instanceT + it.size.y; - if (shadingType >= ShadingType::TextGrayscale && shadingType <= ShadingType::SolidFill) + if (instanceL < cursorR && instanceR > cursorL && instanceT < cursorB && instanceB > cursorT) { - const int instanceL = it.position.x; - const int instanceT = it.position.y; - const int instanceR = instanceL + it.size.x; - const int instanceB = instanceT + it.size.y; - - if (instanceL < cursorR && cursorL < instanceR && instanceT < cursorB && cursorT < instanceB) - { - // The _instances vector is _huge_ (easily up to 100k items) whereas only 1-2 items will actually overlap - // with the cursor. --> Make this loop more compact by putting as much as possible into a function call. - _drawCursorInvertSlowPath(c, i); - } + // The _instances vector is _huge_ (easily up to 50k items) whereas only 1-2 items will actually overlap + // with the cursor. --> Make this loop more compact by putting as much as possible into a function call. + const auto added = _drawCursorForegroundSlowPath(p, c, i); + i += added; + instancesCount += added; } } } } -void BackendD3D::_drawCursorInvertSlowPath(const CursorRect& c, size_t position) +size_t BackendD3D::_drawCursorForegroundSlowPath(const RenderingPayload& p, const CursorRect& c, size_t offset) { - // NOTE: _bumpInstancesSize may reallocate below. Create a copy of `it` beforehand. - const auto it = _instances[position]; + // We won't die from copying 24 bytes. It simplifies the code below especially in + // respect to when/if we overwrite the _instances[offset] slot with a cutout. +#pragma warning(suppress : 26820) // This is a potentially expensive copy operation. Consider using a reference unless a copy is required (p.9). + const auto it = _instances[offset]; const int cursorL = c.position.x; const int cursorT = c.position.y; @@ -1746,75 +1804,95 @@ void BackendD3D::_drawCursorInvertSlowPath(const CursorRect& c, size_t position) const int instanceR = instanceL + it.size.x; const int instanceB = instanceT + it.size.y; - const auto l = std::max(cursorL, instanceL); - const auto t = std::max(cursorT, instanceT); - const auto r = std::min(cursorR, instanceR); - const auto b = std::min(cursorB, instanceB); + const auto intersectionL = std::max(cursorL, instanceL); + const auto intersectionT = std::max(cursorT, instanceT); + const auto intersectionR = std::min(cursorR, instanceR); + const auto intersectionB = std::min(cursorB, instanceB); - // Cut a hole into the original glyph and split it up. This ensures the original glyph - // doesn't dirty the cursor background with its un-inverted/reversed color. + // We should only get called if there's actually an intersection. + assert(intersectionL < intersectionR && intersectionT < intersectionB); + + // We need to ensure that the glyph doesn't "dirty" the cursor background with its un-inverted/un-reversed color. + // If it did, and we'd draw the inverted/reversed glyph on top, it would look smudged. + // As such, this cuts a cursor-sized hole into the original glyph and splits it up. + // + // > Always initialize an object + // I would pay money if this warning was a little smarter. The array can remain uninitialized, + // because it acts like a tiny small_vector, but without the assertions. +#pragma warning(suppress : 26494) // Variable 'cutouts' is uninitialized. Always initialize an object (type.5). rect cutouts[4]; size_t cutoutCount = 0; - if (instanceT < t) + + if (instanceT < intersectionT) { - cutouts[cutoutCount++] = { instanceL, instanceT, instanceR, t }; + cutouts[cutoutCount++] = { instanceL, instanceT, instanceR, intersectionT }; } - if (b < instanceB) + if (instanceB > intersectionB) { - cutouts[cutoutCount++] = { instanceL, b, instanceR, instanceB }; + cutouts[cutoutCount++] = { instanceL, intersectionB, instanceR, instanceB }; } - if (instanceL < l) + if (instanceL < intersectionL) { - cutouts[cutoutCount++] = { instanceL, t, l, b }; + cutouts[cutoutCount++] = { instanceL, intersectionT, intersectionL, intersectionB }; } - if (r < instanceR) + if (instanceR > intersectionR) { - cutouts[cutoutCount++] = { r, t, instanceR, b }; + cutouts[cutoutCount++] = { intersectionR, intersectionT, instanceR, intersectionB }; } - if (cutoutCount > 1) + const auto addedInstances = cutoutCount ? cutoutCount - 1 : 0; + + // Make place for cutoutCount-many items at position. + // NOTE: _bumpInstancesSize() reallocates the vector and all references to _instances will now be invalid. + if (addedInstances) { - const auto delta = cutoutCount - 1; + const auto instancesCount = _instancesCount; - _instancesCount += delta; + _instancesCount += addedInstances; if (_instancesCount >= _instances.size()) { _bumpInstancesSize(); } - // Make place for cutoutCount-many items at position. - const auto src = _instances.data() + position; - memmove(src + delta, src, (_instancesCount - position) * sizeof(QuadInstance)); + const auto src = _instances.data() + offset; + const auto dst = src + addedInstances; + const auto count = instancesCount - offset; + assert(src >= _instances.begin() && (src + count) < _instances.end()); + assert(dst >= _instances.begin() && (dst + count) < _instances.end()); + memmove(dst, src, count * sizeof(QuadInstance)); } + // Now that there's space we can write the glyph cutouts back into the instances vector. for (size_t i = 0; i < cutoutCount; ++i) { const auto& cutout = cutouts[i]; - auto& target = _instances[position + i]; - const auto w = cutout.right - cutout.left; - const auto h = cutout.bottom - cutout.top; - const auto u = it.texcoord.x + cutout.left - instanceL; - const auto v = it.texcoord.y + cutout.top - instanceT; - - target = it; - target.position = { static_cast(cutout.left), static_cast(cutout.top) }; - target.size = { static_cast(w), static_cast(h) }; - target.texcoord = { static_cast(u), static_cast(v) }; - } - - const auto w = r - l; - const auto h = b - t; - const auto u = it.texcoord.x + l - instanceL; - const auto v = it.texcoord.y + t - instanceT; - auto& target = cutoutCount ? _appendQuad() : _instances[position]; - - target = { - it.shadingType, - { static_cast(l), static_cast(t) }, - { static_cast(w), static_cast(h) }, - { static_cast(u), static_cast(v) }, - it.color ^ 0x00c0c0c0, - }; + auto& target = _instances[offset + i]; + + target.shadingType = it.shadingType; + target.position.x = static_cast(cutout.left); + target.position.y = static_cast(cutout.top); + target.size.x = static_cast(cutout.right - cutout.left); + target.size.y = static_cast(cutout.bottom - cutout.top); + target.texcoord.x = static_cast(it.texcoord.x + cutout.left - instanceL); + target.texcoord.y = static_cast(it.texcoord.y + cutout.top - instanceT); + target.color = it.color; + } + + const auto cursorColor = p.s->cursor->cursorColor; + // If the cursor covers the entire glyph (like, let's say, a full-box cursor with an ASCII character), + // we don't append a new quad, but rather reuse the one that already exists (cutoutCount == 0). + auto& target = cutoutCount ? _appendQuad() : _instances[offset]; + + target.shadingType = it.shadingType; + target.position.x = static_cast(intersectionL); + target.position.y = static_cast(intersectionT); + target.size.x = static_cast(intersectionR - intersectionL); + target.size.y = static_cast(intersectionB - intersectionT); + target.texcoord.x = static_cast(it.texcoord.x + intersectionL - instanceL); + target.texcoord.y = static_cast(it.texcoord.y + intersectionT - instanceT); + target.color = cursorColor == 0xffffffff ? it.color ^ 0xc0c0c0 : c.foreground; + + return addedInstances; } void BackendD3D::_drawSelection(const RenderingPayload& p) @@ -1836,7 +1914,7 @@ void BackendD3D::_drawSelection(const RenderingPayload& p) else { _appendQuad() = { - .shadingType = ShadingType::SolidFill, + .shadingType = ShadingType::Selection, .position = { p.s->font->cellSize.x * row->selectionFrom, p.s->font->cellSize.y * y, diff --git a/src/renderer/atlas/BackendD3D.h b/src/renderer/atlas/BackendD3D.h index e54a4ddd54e..c5cf07e6553 100644 --- a/src/renderer/atlas/BackendD3D.h +++ b/src/renderer/atlas/BackendD3D.h @@ -59,13 +59,22 @@ namespace Microsoft::Console::Render::Atlas { Default = 0, Background = 0, + + // This block of values will be used for the TextDrawingFirst/Last range and need to stay together. + // This is used to quickly check if an instance is related to a "text drawing primitive". TextGrayscale = 1, TextClearType = 2, TextPassthrough = 3, DottedLine = 4, DottedLineWide = 5, - SolidFill = 6, + // All items starting here will be drawing as a solid RGBA color + SolidLine = 6, + Cursor = 7, + Selection = 8, + + TextDrawingFirst = TextGrayscale, + TextDrawingLast = SolidLine, }; // NOTE: Don't initialize any members in this struct. This ensures that no @@ -177,7 +186,8 @@ namespace Microsoft::Console::Render::Atlas { i16x2 position; u16x2 size; - u32 color; + u32 background; + u32 foreground; }; ATLAS_ATTR_COLD void _handleSettingsUpdate(const RenderingPayload& p); @@ -210,8 +220,8 @@ namespace Microsoft::Console::Render::Atlas void _splitDoubleHeightGlyph(const RenderingPayload& p, const AtlasFontFaceEntryInner& fontFaceEntry, AtlasGlyphEntry& glyphEntry); void _drawGridlines(const RenderingPayload& p, u16 y); void _drawCursorBackground(const RenderingPayload& p); - ATLAS_ATTR_COLD void _drawCursorInvert(); - ATLAS_ATTR_COLD void _drawCursorInvertSlowPath(const CursorRect& c, size_t position); + ATLAS_ATTR_COLD void _drawCursorForeground(const RenderingPayload& p); + ATLAS_ATTR_COLD size_t _drawCursorForegroundSlowPath(const RenderingPayload& p, const CursorRect& c, size_t offset); void _drawSelection(const RenderingPayload& p); void _executeCustomShader(RenderingPayload& p); @@ -273,6 +283,8 @@ namespace Microsoft::Console::Render::Atlas // An empty-box cursor spanning a wide glyph that has different // background colors on each side results in 6 lines being drawn. til::small_vector _cursorRects; + // The bounding rect of _cursorRects in pixels. + til::rect _cursorPosition; bool _requiresContinuousRedraw = false; diff --git a/src/renderer/atlas/shader_common.hlsl b/src/renderer/atlas/shader_common.hlsl index 13a229dc00f..957b17ac6ad 100644 --- a/src/renderer/atlas/shader_common.hlsl +++ b/src/renderer/atlas/shader_common.hlsl @@ -8,8 +8,6 @@ #define SHADING_TYPE_TEXT_PASSTHROUGH 3 #define SHADING_TYPE_DOTTED_LINE 4 #define SHADING_TYPE_DOTTED_LINE_WIDE 5 -#define SHADING_TYPE_SOLID_FILL 6 -#define SHADING_TYPE_CURSOR 7 // clang-format on struct VSData From ab13e16d44e69e57c07e90d2da9a6064f3e90e53 Mon Sep 17 00:00:00 2001 From: Leonard Hecker Date: Wed, 26 Apr 2023 13:24:02 +0200 Subject: [PATCH 37/37] Add an Emoji shortcut --- src/renderer/atlas/BackendD3D.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/renderer/atlas/BackendD3D.cpp b/src/renderer/atlas/BackendD3D.cpp index a1c4bfbe74c..9ecf9beac7e 100644 --- a/src/renderer/atlas/BackendD3D.cpp +++ b/src/renderer/atlas/BackendD3D.cpp @@ -1794,6 +1794,14 @@ size_t BackendD3D::_drawCursorForegroundSlowPath(const RenderingPayload& p, cons #pragma warning(suppress : 26820) // This is a potentially expensive copy operation. Consider using a reference unless a copy is required (p.9). const auto it = _instances[offset]; + // There's one special exception to the rule: Emojis. We currently don't really support inverting + // (or reversing) colored glyphs like that, so we can return early here and avoid cutting them up. + // It'd be too expensive to check for these rare glyph types inside the _drawCursorForeground() loop. + if (it.shadingType == ShadingType::TextPassthrough) + { + return 0; + } + const int cursorL = c.position.x; const int cursorT = c.position.y; const int cursorR = cursorL + c.size.x;