Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure til::u8u16 still works if the string consists of just a lead byte #4685

Merged
1 commit merged into from
Feb 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/inc/til/u8u16convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
if ((*backIter & _Utf8BitMasks::MaskAsciiByte) > _Utf8BitMasks::IsAsciiByte)
{
// Check only up to 3 last bytes, if no Lead Byte was found then the byte before must be the Lead Byte and no partials are in the string
const size_t stopLen{ std::min(in.length(), gsl::narrow_cast<size_t>(4u)) };
for (size_t sequenceLen{ 1u }; sequenceLen < stopLen; ++sequenceLen, --backIter)
const size_t stopLen{ std::min(in.length(), gsl::narrow_cast<size_t>(3u)) };
for (size_t sequenceLen{ 1u }; sequenceLen <= stopLen; ++sequenceLen, --backIter)
{
// If Lead Byte found
if ((*backIter & _Utf8BitMasks::MaskContinuationByte) > _Utf8BitMasks::IsContinuationByte)
Expand Down
38 changes: 31 additions & 7 deletions src/til/ut_til/u8u16convertTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,31 +83,55 @@ void Utf8Utf16ConvertTests::TestU8ToU16Partials()
'\xA4',
'\xBD',
'\x9C',
'\xF0' // CJK UNIFIED IDEOGRAPH-24F5C (lead byte only)
'\xF0', // CJK UNIFIED IDEOGRAPH-24F5C (lead byte + 2 complementary bytes)
'\xA4',
'\xBD'
};

const std::string u8String2{
'\xA4', // CJK UNIFIED IDEOGRAPH-24F5C (complementary bytes)
'\xBD',
'\x9C'
'\x9C' // CJK UNIFIED IDEOGRAPH-24F5C (last complementary byte)
};

const std::wstring u16StringComp{
const std::wstring u16StringComp1{
gsl::narrow_cast<wchar_t>(0xD853), // CJK UNIFIED IDEOGRAPH-24F5C (surrogate pair)
gsl::narrow_cast<wchar_t>(0xDF5C)
};

// GH#4673
const std::string u8String3{
'\xE2' // WHITE SMILING FACE (lead byte)
};

const std::string u8String4{
'\x98', // WHITE SMILING FACE (complementary bytes)
'\xBA'
};

const std::wstring u16StringComp2{
gsl::narrow_cast<wchar_t>(0x263A) // WHITE SMILING FACE
};

til::u8state state{};

std::wstring u16Out1{};
const HRESULT hRes1{ til::u8u16(u8String1, u16Out1, state) };
VERIFY_ARE_EQUAL(S_OK, hRes1);
VERIFY_ARE_EQUAL(u16StringComp, u16Out1);
VERIFY_ARE_EQUAL(u16StringComp1, u16Out1);

std::wstring u16Out2{};
const HRESULT hRes2{ til::u8u16(u8String2, u16Out2, state) };
VERIFY_ARE_EQUAL(S_OK, hRes2);
VERIFY_ARE_EQUAL(u16StringComp, u16Out2);
VERIFY_ARE_EQUAL(u16StringComp1, u16Out2);

std::wstring u16Out3{};
const HRESULT hRes3{ til::u8u16(u8String3, u16Out3, state) };
VERIFY_ARE_EQUAL(S_OK, hRes3);
VERIFY_ARE_EQUAL(std::wstring{}, u16Out3);

std::wstring u16Out4{};
const HRESULT hRes4{ til::u8u16(u8String4, u16Out4, state) };
VERIFY_ARE_EQUAL(S_OK, hRes4);
VERIFY_ARE_EQUAL(u16StringComp2, u16Out4);
}

void Utf8Utf16ConvertTests::TestU16ToU8Partials()
Expand Down