From c17f448d7375595115d3219004cce2156e670bb0 Mon Sep 17 00:00:00 2001 From: "Dustin L. Howett" Date: Mon, 14 Sep 2020 11:51:03 -0700 Subject: [PATCH] Make til::color's COLORREF conversion more optimal (#7619) Clang (10) has no trouble optimizing the COLORREF conversion operator to a simple 32-bit load with mask (!) even though it's a series of bit shifts across multiple struct members. MSVC (19.24) doesn't make the same optimization decision, and it emits three 8-bit loads and some shifting. In any case, the optimization only applies at -O2 (clang) and above. In this commit, we leverage the spec-legality of using unions for type conversions and the overlap of four uint8_ts and a uint32_t to make the conversion very obvious to both compilers. x86_64 msvc | O0 | O1 | O2 ------------|----|----|-------------------- shifts | 12 | 11 | 11 (fully inlined) union | 5 | 1 | 1 (fully inlined) x86_64 clang | O0 | O1 | O2 + O3 -------------|----|----|-------------------- shifts | 14 | 5 | 1 (fully inlined) union | 9 | 3 | 1 (fully inlined) j4james brought up some concerns about til::color's minor wastefulness in https://github.com/microsoft/terminal/pull/7578#discussion_r487355989. This is a clear, simple transformation that saves us a few instructions in a relatively common case, so I'm accepting a micro-optimization even though we don't have data showing this to be a hot spot. --- .github/actions/spell-check/expect/expect.txt | 1 + src/inc/til/color.h | 33 +++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/.github/actions/spell-check/expect/expect.txt b/.github/actions/spell-check/expect/expect.txt index ddb33c771f3..d755991d8ec 100644 --- a/.github/actions/spell-check/expect/expect.txt +++ b/.github/actions/spell-check/expect/expect.txt @@ -6,6 +6,7 @@ ABANDONFONT ABCDEFGHIJKLMNO ABCG abf +abgr abi ACCESSTOKEN acec diff --git a/src/inc/til/color.h b/src/inc/til/color.h index 122aaafb6ba..347aae822e5 100644 --- a/src/inc/til/color.h +++ b/src/inc/til/color.h @@ -12,7 +12,27 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" #pragma warning(disable : 26472) struct color { - uint8_t r, g, b, a; + // Clang (10) has no trouble optimizing the COLORREF conversion operator, below, to a + // simple 32-bit load with mask (!) even though it's a series of bit shifts across + // multiple struct members. + // CL (19.24) doesn't make the same optimization decision, and it emits three 8-bit loads + // and some shifting. + // In any case, the optimization only applies at -O2 (clang) and above. + // Here, we leverage the spec-legality of using unions for type conversions and the + // overlap of four uint8_ts and a uint32_t to make the conversion very obvious to + // both compilers. + union + { + struct + { +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ // Clang, GCC + uint8_t a, b, g, r; +#else + uint8_t r, g, b, a; +#endif + }; + uint32_t abgr; + }; constexpr color() noexcept : r{ 0 }, @@ -40,16 +60,13 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" #ifdef _WINDEF_ constexpr color(COLORREF c) : - r{ static_cast(c & 0xFF) }, - g{ static_cast((c & 0xFF00) >> 8) }, - b{ static_cast((c & 0xFF0000) >> 16) }, - a{ 255 } + abgr{ static_cast(c | 0xFF000000u) } { } operator COLORREF() const noexcept { - return static_cast(r) | (static_cast(g) << 8) | (static_cast(b) << 16); + return static_cast(abgr & 0x00FFFFFFu); } #endif @@ -137,7 +154,7 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" constexpr bool operator==(const til::color& other) const { - return r == other.r && g == other.g && b == other.b && a == other.a; + return abgr == other.abgr; } constexpr bool operator!=(const til::color& other) const @@ -171,6 +188,8 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned" #pragma warning(pop) } +static_assert(sizeof(til::color) == sizeof(uint32_t)); + #ifdef __WEX_COMMON_H__ namespace WEX::TestExecution {