Skip to content

Commit

Permalink
ICU-22953 MF2: Allow unpaired surrogates in text and quoted literals
Browse files Browse the repository at this point in the history
  • Loading branch information
catamorphism committed Nov 6, 2024
1 parent 842899d commit 376da67
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 4 deletions.
4 changes: 2 additions & 2 deletions icu4c/source/i18n/messageformat2_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ static bool isContentChar(UChar32 c) {
|| inRange(c, 0x002F, 0x003F) // Omit '@'
|| inRange(c, 0x0041, 0x005B) // Omit '\'
|| inRange(c, 0x005D, 0x007A) // Omit { | }
|| inRange(c, 0x007E, 0xD7FF) // Omit surrogates
|| inRange(c, 0xE000, 0x10FFFF);
|| inRange(c, 0x007E, 0x2FFF) // Omit IDEOGRAPHIC_SPACE
|| inRange(c, 0x3001, 0x10FFFF); // Allowing surrogates is intentional
}

// See `s` in the MessageFormat 2 grammar
Expand Down
24 changes: 22 additions & 2 deletions icu4c/source/test/intltest/messageformat2test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ TestMessageFormat2::runIndexedTest(int32_t index, UBool exec,
TESTCASE_AUTO(testFormatterAPI);
TESTCASE_AUTO(testHighLoneSurrogate);
TESTCASE_AUTO(testLowLoneSurrogate);
TESTCASE_AUTO(testLoneSurrogateInQuotedLiteral);
TESTCASE_AUTO(dataDrivenTests);
TESTCASE_AUTO_END;
}
Expand Down Expand Up @@ -350,7 +351,8 @@ void TestMessageFormat2::testHighLoneSurrogate() {
.setPattern(loneSurrogate, pe, errorCode)
.build(errorCode);
UnicodeString result = msgfmt1.formatToString({}, errorCode);
errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR, "testHighLoneSurrogate");
assertEquals("testHighLoneSurrogate", loneSurrogate, result);
errorCode.errIfFailureAndReset("testHighLoneSurrogate");
}

// ICU-22890 lone surrogate cause infinity loop
Expand All @@ -364,7 +366,25 @@ void TestMessageFormat2::testLowLoneSurrogate() {
.setPattern(loneSurrogate, pe, errorCode)
.build(errorCode);
UnicodeString result = msgfmt2.formatToString({}, errorCode);
errorCode.expectErrorAndReset(U_MF_SYNTAX_ERROR, "testLowLoneSurrogate");
assertEquals("testLowLoneSurrogate", loneSurrogate, result);
errorCode.errIfFailureAndReset("testLowLoneSurrogate");
}

void TestMessageFormat2::testLoneSurrogateInQuotedLiteral() {
IcuTestErrorCode errorCode(*this, "testLoneSurrogateInQuotedLiteral");
UParseError pe = { 0, 0, {0}, {0} };
// |\udc02|
UnicodeString literal("{|");
literal += 0xdc02;
literal += "|}";
UnicodeString expectedResult({0xdc02, 0});
icu::message2::MessageFormatter msgfmt2 =
icu::message2::MessageFormatter::Builder(errorCode)
.setPattern(literal, pe, errorCode)
.build(errorCode);
UnicodeString result = msgfmt2.formatToString({}, errorCode);
assertEquals("testLoneSurrogateInQuotedLiteral", expectedResult, result);
errorCode.errIfFailureAndReset("testLoneSurrogateInQuotedLiteral");
}

void TestMessageFormat2::dataDrivenTests() {
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/messageformat2test.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ class TestMessageFormat2: public IntlTest {

void testHighLoneSurrogate(void);
void testLowLoneSurrogate(void);
void testLoneSurrogateInQuotedLiteral(void);
}; // class TestMessageFormat2

U_NAMESPACE_BEGIN
Expand Down

0 comments on commit 376da67

Please sign in to comment.