diff --git a/deps/icu-small/README-SMALL-ICU.txt b/deps/icu-small/README-SMALL-ICU.txt index b98a528485653b..2ff8a36061cfad 100644 --- a/deps/icu-small/README-SMALL-ICU.txt +++ b/deps/icu-small/README-SMALL-ICU.txt @@ -1,8 +1,8 @@ Small ICU sources - auto generated by shrink-icu-src.py This directory contains the ICU subset used by --with-intl=small-icu (the default) -It is a strict subset of ICU 62 source files with the following exception(s): -* deps/icu-small/source/data/in/icudt62l.dat : Reduced-size data file +It is a strict subset of ICU 63 source files with the following exception(s): +* deps/icu-small/source/data/in/icudt63l.dat : Reduced-size data file To rebuild this directory, see ../../tools/icu/README.md diff --git a/deps/icu-small/source/common/bmpset.cpp b/deps/icu-small/source/common/bmpset.cpp index 35bc80dce359eb..bc79f5e5a63be1 100644 --- a/deps/icu-small/source/common/bmpset.cpp +++ b/deps/icu-small/source/common/bmpset.cpp @@ -241,13 +241,13 @@ void BMPSet::overrideIllegal() { bmpBlockBits[i]|=bits; } - mask=~(0x10001<<0xd); // Lead byte 0xED. + mask= static_cast(~(0x10001<<0xd)); // Lead byte 0xED. bits=1<<0xd; for(i=32; i<64; ++i) { // Second half of 4k block. bmpBlockBits[i]=(bmpBlockBits[i]&mask)|bits; } } else { - mask=~(0x10001<<0xd); // Lead byte 0xED. + mask= static_cast(~(0x10001<<0xd)); // Lead byte 0xED. for(i=32; i<64; ++i) { // Second half of 4k block. bmpBlockBits[i]&=mask; } diff --git a/deps/icu-small/source/common/bytesinkutil.cpp b/deps/icu-small/source/common/bytesinkutil.cpp index 6af7ddfd597638..c64a845f87538e 100644 --- a/deps/icu-small/source/common/bytesinkutil.cpp +++ b/deps/icu-small/source/common/bytesinkutil.cpp @@ -11,6 +11,7 @@ #include "unicode/utf8.h" #include "unicode/utf16.h" #include "bytesinkutil.h" +#include "charstr.h" #include "cmemory.h" #include "uassert.h" @@ -120,4 +121,41 @@ ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit, return TRUE; } +CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) { +} + +CharStringByteSink::~CharStringByteSink() = default; + +void +CharStringByteSink::Append(const char* bytes, int32_t n) { + UErrorCode status = U_ZERO_ERROR; + dest_.append(bytes, n, status); + // Any errors are silently ignored. +} + +char* +CharStringByteSink::GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, + int32_t scratch_capacity, + int32_t* result_capacity) { + if (min_capacity < 1 || scratch_capacity < min_capacity) { + *result_capacity = 0; + return nullptr; + } + + UErrorCode status = U_ZERO_ERROR; + char* result = dest_.getAppendBuffer( + min_capacity, + desired_capacity_hint, + *result_capacity, + status); + if (U_SUCCESS(status)) { + return result; + } + + *result_capacity = scratch_capacity; + return scratch; +} + U_NAMESPACE_END diff --git a/deps/icu-small/source/common/bytesinkutil.h b/deps/icu-small/source/common/bytesinkutil.h index 8287ffea4ca713..69e4cbcd263932 100644 --- a/deps/icu-small/source/common/bytesinkutil.h +++ b/deps/icu-small/source/common/bytesinkutil.h @@ -13,6 +13,7 @@ U_NAMESPACE_BEGIN class ByteSink; +class CharString; class Edits; class U_COMMON_API ByteSinkUtil { @@ -58,4 +59,25 @@ class U_COMMON_API ByteSinkUtil { ByteSink &sink, uint32_t options, Edits *edits); }; +class CharStringByteSink : public ByteSink { +public: + CharStringByteSink(CharString* dest); + ~CharStringByteSink() override; + + CharStringByteSink() = delete; + CharStringByteSink(const CharStringByteSink&) = delete; + CharStringByteSink& operator=(const CharStringByteSink&) = delete; + + void Append(const char* bytes, int32_t n) override; + + char* GetAppendBuffer(int32_t min_capacity, + int32_t desired_capacity_hint, + char* scratch, + int32_t scratch_capacity, + int32_t* result_capacity) override; + +private: + CharString& dest_; +}; + U_NAMESPACE_END diff --git a/deps/icu-small/source/common/bytestriebuilder.cpp b/deps/icu-small/source/common/bytestriebuilder.cpp index 581505e0092b15..ec1ab7d8f5080e 100644 --- a/deps/icu-small/source/common/bytestriebuilder.cpp +++ b/deps/icu-small/source/common/bytestriebuilder.cpp @@ -339,7 +339,8 @@ BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode) : LinearMatchNode(len, nextNode), s(bytes) { - hash=hash*37+ustr_hashCharsN(bytes, len); + hash=static_cast( + static_cast(hash)*37u + static_cast(ustr_hashCharsN(bytes, len))); } UBool diff --git a/deps/icu-small/source/common/characterproperties.cpp b/deps/icu-small/source/common/characterproperties.cpp new file mode 100644 index 00000000000000..3aff85b3f1193e --- /dev/null +++ b/deps/icu-small/source/common/characterproperties.cpp @@ -0,0 +1,336 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// characterproperties.cpp +// created: 2018sep03 Markus W. Scherer + +#include "unicode/utypes.h" +#include "unicode/localpointer.h" +#include "unicode/uchar.h" +#include "unicode/ucpmap.h" +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/uniset.h" +#include "unicode/uscript.h" +#include "unicode/uset.h" +#include "cmemory.h" +#include "mutex.h" +#include "normalizer2impl.h" +#include "uassert.h" +#include "ubidi_props.h" +#include "ucase.h" +#include "ucln_cmn.h" +#include "umutex.h" +#include "uprops.h" + +using icu::UInitOnce; +using icu::UnicodeSet; + +namespace { + +UBool U_CALLCONV characterproperties_cleanup(); + +struct Inclusion { + UnicodeSet *fSet; + UInitOnce fInitOnce; +}; +Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions() + +UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {}; + +UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {}; + +UMutex cpMutex = U_MUTEX_INITIALIZER; + +//---------------------------------------------------------------- +// Inclusions list +//---------------------------------------------------------------- + +// USetAdder implementation +// Does not use uset.h to reduce code dependencies +void U_CALLCONV +_set_add(USet *set, UChar32 c) { + ((UnicodeSet *)set)->add(c); +} + +void U_CALLCONV +_set_addRange(USet *set, UChar32 start, UChar32 end) { + ((UnicodeSet *)set)->add(start, end); +} + +void U_CALLCONV +_set_addString(USet *set, const UChar *str, int32_t length) { + ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length)); +} + +UBool U_CALLCONV characterproperties_cleanup() { + for (Inclusion &in: gInclusions) { + delete in.fSet; + in.fSet = nullptr; + in.fInitOnce.reset(); + } + for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) { + delete sets[i]; + sets[i] = nullptr; + } + for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) { + ucptrie_close(reinterpret_cast(maps[i])); + maps[i] = nullptr; + } + return TRUE; +} + +} // namespace + +U_NAMESPACE_BEGIN + +/* +Reduce excessive reallocation, and make it easier to detect initialization problems. +Usually you don't see smaller sets than this for Unicode 5.0. +*/ +constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072; + +void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) { + // This function is invoked only via umtx_initOnce(). + // This function is a friend of class UnicodeSet. + + U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT); + if (src == UPROPS_SRC_NONE) { + errorCode = U_INTERNAL_PROGRAM_ERROR; + return; + } + UnicodeSet * &incl = gInclusions[src].fSet; + U_ASSERT(incl == nullptr); + + incl = new UnicodeSet(); + if (incl == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + USetAdder sa = { + (USet *)incl, + _set_add, + _set_addRange, + _set_addString, + nullptr, // don't need remove() + nullptr // don't need removeRange() + }; + + incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode); + switch(src) { + case UPROPS_SRC_CHAR: + uchar_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_PROPSVEC: + upropsvec_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_CHAR_AND_PROPSVEC: + uchar_addPropertyStarts(&sa, &errorCode); + upropsvec_addPropertyStarts(&sa, &errorCode); + break; +#if !UCONFIG_NO_NORMALIZATION + case UPROPS_SRC_CASE_AND_NORM: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + ucase_addPropertyStarts(&sa, &errorCode); + break; + } + case UPROPS_SRC_NFC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFKC: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFKC_CF: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addPropertyStarts(&sa, errorCode); + } + break; + } + case UPROPS_SRC_NFC_CANON_ITER: { + const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode); + if(U_SUCCESS(errorCode)) { + impl->addCanonIterPropertyStarts(&sa, errorCode); + } + break; + } +#endif + case UPROPS_SRC_CASE: + ucase_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_BIDI: + ubidi_addPropertyStarts(&sa, &errorCode); + break; + case UPROPS_SRC_INPC: + case UPROPS_SRC_INSC: + case UPROPS_SRC_VO: + uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode); + break; + default: + errorCode = U_INTERNAL_PROGRAM_ERROR; + break; + } + + if (U_FAILURE(errorCode)) { + delete incl; + incl = nullptr; + return; + } + // Compact for caching + incl->compact(); + ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup); +} + +const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + if (src < 0 || UPROPS_SRC_COUNT <= src) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Inclusion &i = gInclusions[src]; + umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode); + return i.fSet; +} + +const UnicodeSet *CharacterProperties::getInclusionsForProperty( + UProperty prop, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + UPropertySource src = uprops_getSource(prop); + return getInclusionsForSource(src, errorCode); +} + +U_NAMESPACE_END + +namespace { + +UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + icu::LocalPointer set(new UnicodeSet()); + if (set.isNull()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + const UnicodeSet *inclusions = + icu::CharacterProperties::getInclusionsForProperty(property, errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t numRanges = inclusions->getRangeCount(); + UChar32 startHasProperty = -1; + + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = inclusions->getRangeEnd(i); + for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch. + if (u_hasBinaryProperty(c, property)) { + if (startHasProperty < 0) { + // Transition from false to true. + startHasProperty = c; + } + } else if (startHasProperty >= 0) { + // Transition from true to false. + set->add(startHasProperty, c - 1); + startHasProperty = -1; + } + } + } + if (startHasProperty >= 0) { + set->add(startHasProperty, 0x10FFFF); + } + set->freeze(); + return set.orphan(); +} + +UCPMap *makeMap(UProperty property, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return nullptr; } + uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0; + icu::LocalUMutableCPTriePointer mutableTrie( + umutablecptrie_open(nullValue, nullValue, &errorCode)); + const UnicodeSet *inclusions = + icu::CharacterProperties::getInclusionsForProperty(property, errorCode); + if (U_FAILURE(errorCode)) { return nullptr; } + int32_t numRanges = inclusions->getRangeCount(); + UChar32 start = 0; + uint32_t value = nullValue; + + for (int32_t i = 0; i < numRanges; ++i) { + UChar32 rangeEnd = inclusions->getRangeEnd(i); + for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) { + // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch. + uint32_t nextValue = u_getIntPropertyValue(c, property); + if (value != nextValue) { + if (value != nullValue) { + umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode); + } + start = c; + value = nextValue; + } + } + } + if (value != 0) { + umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode); + } + + UCPTrieType type; + if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) { + type = UCPTRIE_TYPE_FAST; + } else { + type = UCPTRIE_TYPE_SMALL; + } + UCPTrieValueWidth valueWidth; + // TODO: UCharacterProperty.IntProperty + int32_t max = u_getIntPropertyMaxValue(property); + if (max <= 0xff) { + valueWidth = UCPTRIE_VALUE_BITS_8; + } else if (max <= 0xffff) { + valueWidth = UCPTRIE_VALUE_BITS_16; + } else { + valueWidth = UCPTRIE_VALUE_BITS_32; + } + return reinterpret_cast( + umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode)); +} + +} // namespace + +U_NAMESPACE_USE + +U_CAPI const USet * U_EXPORT2 +u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return nullptr; } + if (property < 0 || UCHAR_BINARY_LIMIT <= property) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Mutex m(&cpMutex); + UnicodeSet *set = sets[property]; + if (set == nullptr) { + sets[property] = set = makeSet(property, *pErrorCode); + } + if (U_FAILURE(*pErrorCode)) { return nullptr; } + return set->toUSet(); +} + +U_CAPI const UCPMap * U_EXPORT2 +u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return nullptr; } + if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + Mutex m(&cpMutex); + UCPMap *map = maps[property - UCHAR_INT_START]; + if (map == nullptr) { + maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode); + } + return map; +} diff --git a/deps/icu-small/source/common/charstr.cpp b/deps/icu-small/source/common/charstr.cpp index 353f1d52542fa2..852cc539457760 100644 --- a/deps/icu-small/source/common/charstr.cpp +++ b/deps/icu-small/source/common/charstr.cpp @@ -79,7 +79,7 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error return *this; } if(sLength<0) { - sLength=uprv_strlen(s); + sLength= static_cast(uprv_strlen(s)); } if(sLength>0) { if(s==(buffer.getAlias()+len)) { @@ -126,15 +126,21 @@ char *CharString::getAppendBuffer(int32_t minCapacity, } CharString &CharString::appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode) { + return appendInvariantChars(s.getBuffer(), s.length(), errorCode); +} + +CharString &CharString::appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return *this; } - if (!uprv_isInvariantUnicodeString(s)) { + if (!uprv_isInvariantUString(uchars, ucharsLen)) { errorCode = U_INVARIANT_CONVERSION_ERROR; return *this; } - if(ensureCapacity(len+s.length()+1, 0, errorCode)) { - len+=s.extract(0, 0x7fffffff, buffer.getAlias()+len, buffer.getCapacity()-len, US_INV); + if(ensureCapacity(len+ucharsLen+1, 0, errorCode)) { + u_UCharsToChars(uchars, buffer.getAlias()+len, ucharsLen); + len += ucharsLen; + buffer[len] = 0; } return *this; } diff --git a/deps/icu-small/source/common/charstr.h b/deps/icu-small/source/common/charstr.h index 86f69c383a0b37..1a97e01988f991 100644 --- a/deps/icu-small/source/common/charstr.h +++ b/deps/icu-small/source/common/charstr.h @@ -123,6 +123,7 @@ class U_COMMON_API CharString : public UMemory { UErrorCode &errorCode); CharString &appendInvariantChars(const UnicodeString &s, UErrorCode &errorCode); + CharString &appendInvariantChars(const UChar* uchars, int32_t ucharsLen, UErrorCode& errorCode); /** * Appends a filename/path part, e.g., a directory name. diff --git a/deps/icu-small/source/common/cmemory.h b/deps/icu-small/source/common/cmemory.h index f98e13efc0ffbe..a6dd209d80b2f3 100644 --- a/deps/icu-small/source/common/cmemory.h +++ b/deps/icu-small/source/common/cmemory.h @@ -172,7 +172,7 @@ class LocalMemory : public LocalPointerBase { * @return *this */ LocalMemory &moveFrom(LocalMemory &src) U_NOEXCEPT { - delete[] LocalPointerBase::ptr; + uprv_free(LocalPointerBase::ptr); LocalPointerBase::ptr=src.ptr; src.ptr=NULL; return *this; @@ -279,6 +279,10 @@ inline T *LocalMemory::allocateInsteadAndCopy(int32_t newCapacity, int32_t le * * Unlike LocalMemory and LocalArray, this class never adopts * (takes ownership of) another array. + * + * WARNING: MaybeStackArray only works with primitive (plain-old data) types. + * It does NOT know how to call a destructor! If you work with classes with + * destructors, consider LocalArray in localpointer.h. */ template class MaybeStackArray { diff --git a/deps/icu-small/source/common/dictbe.cpp b/deps/icu-small/source/common/dictbe.cpp index 419d062ef25d44..0e4d0850fac912 100644 --- a/deps/icu-small/source/common/dictbe.cpp +++ b/deps/icu-small/source/common/dictbe.cpp @@ -325,9 +325,9 @@ ThaiBreakEngine::divideUpDictionaryRange( UText *text, // two characters after uc were not 0x0E4C THANTHAKHAT before // checking the dictionary. That is just a performance filter, // but it's not clear it's faster than checking the trie. - int32_t candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + int32_t num_candidates = words[(wordsFound + 1) % THAI_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); utext_setNativeIndex(text, current + cuWordLength + chars); - if (candidates > 0) { + if (num_candidates > 0) { break; } } @@ -555,9 +555,9 @@ LaoBreakEngine::divideUpDictionaryRange( UText *text, if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { // Maybe. See if it's in the dictionary. // TODO: this looks iffy; compare with old code. - int32_t candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + int32_t num_candidates = words[(wordsFound + 1) % LAO_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); utext_setNativeIndex(text, current + cuWordLength + chars); - if (candidates > 0) { + if (num_candidates > 0) { break; } } @@ -748,9 +748,9 @@ BurmeseBreakEngine::divideUpDictionaryRange( UText *text, if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { // Maybe. See if it's in the dictionary. // TODO: this looks iffy; compare with old code. - int32_t candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + int32_t num_candidates = words[(wordsFound + 1) % BURMESE_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); utext_setNativeIndex(text, current + cuWordLength + chars); - if (candidates > 0) { + if (num_candidates > 0) { break; } } @@ -953,9 +953,9 @@ KhmerBreakEngine::divideUpDictionaryRange( UText *text, uc = utext_current32(text); if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) { // Maybe. See if it's in the dictionary. - int32_t candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); + int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd); utext_setNativeIndex(text, current+cuWordLength+chars); - if (candidates > 0) { + if (num_candidates > 0) { break; } } diff --git a/deps/icu-small/source/common/edits.cpp b/deps/icu-small/source/common/edits.cpp index 3b3611fcf80cfe..00a8d601a1cc80 100644 --- a/deps/icu-small/source/common/edits.cpp +++ b/deps/icu-small/source/common/edits.cpp @@ -276,7 +276,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error // ab deletions meet bc insertions at the same intermediate-string index. // Some users expect the bc insertions to come first, so we fetch from bc first. if (bc_bLength == 0) { - if (bcHasNext && (bcHasNext = bcIter.next(errorCode))) { + if (bcHasNext && (bcHasNext = bcIter.next(errorCode)) != 0) { bc_bLength = bcIter.oldLength(); cLength = bcIter.newLength(); if (bc_bLength == 0) { @@ -293,7 +293,7 @@ Edits &Edits::mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &error // else see if the other iterator is done, too. } if (ab_bLength == 0) { - if (abHasNext && (abHasNext = abIter.next(errorCode))) { + if (abHasNext && (abHasNext = abIter.next(errorCode)) != 0) { aLength = abIter.oldLength(); ab_bLength = abIter.newLength(); if (ab_bLength == 0) { diff --git a/deps/icu-small/source/common/loadednormalizer2impl.cpp b/deps/icu-small/source/common/loadednormalizer2impl.cpp index 6fb9b816dc6591..82cb325b723311 100644 --- a/deps/icu-small/source/common/loadednormalizer2impl.cpp +++ b/deps/icu-small/source/common/loadednormalizer2impl.cpp @@ -18,6 +18,7 @@ #include "unicode/udata.h" #include "unicode/localpointer.h" #include "unicode/normalizer2.h" +#include "unicode/ucptrie.h" #include "unicode/unistr.h" #include "unicode/unorm.h" #include "cstring.h" @@ -42,12 +43,12 @@ class LoadedNormalizer2Impl : public Normalizer2Impl { isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); UDataMemory *memory; - UTrie2 *ownedTrie; + UCPTrie *ownedTrie; }; LoadedNormalizer2Impl::~LoadedNormalizer2Impl() { udata_close(memory); - utrie2_close(ownedTrie); + ucptrie_close(ownedTrie); } UBool U_CALLCONV @@ -62,7 +63,7 @@ LoadedNormalizer2Impl::isAcceptable(void * /*context*/, pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[2]==0x6d && pInfo->dataFormat[3]==0x32 && - pInfo->formatVersion[0]==3 + pInfo->formatVersion[0]==4 ) { // Normalizer2Impl *me=(Normalizer2Impl *)context; // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); @@ -91,9 +92,9 @@ LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCod int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; - ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, - inBytes+offset, nextOffset-offset, NULL, - &errorCode); + ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16, + inBytes+offset, nextOffset-offset, NULL, + &errorCode); if(U_FAILURE(errorCode)) { return; } @@ -131,15 +132,26 @@ U_CDECL_BEGIN static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup(); U_CDECL_END -static Norm2AllModes *nfkcSingleton; -static Norm2AllModes *nfkc_cfSingleton; -static UHashtable *cache=NULL; +#if !NORM2_HARDCODE_NFC_DATA +static Norm2AllModes *nfcSingleton; +static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; +#endif +static Norm2AllModes *nfkcSingleton; static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; + +static Norm2AllModes *nfkc_cfSingleton; static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; +static UHashtable *cache=NULL; + // UInitOnce singleton initialization function static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { +#if !NORM2_HARDCODE_NFC_DATA + if (uprv_strcmp(what, "nfc") == 0) { + nfcSingleton = Norm2AllModes::createInstance(NULL, "nfc", errorCode); + } else +#endif if (uprv_strcmp(what, "nfkc") == 0) { nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); } else if (uprv_strcmp(what, "nfkc_cf") == 0) { @@ -157,19 +169,36 @@ static void U_CALLCONV deleteNorm2AllModes(void *allModes) { } static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { +#if !NORM2_HARDCODE_NFC_DATA + delete nfcSingleton; + nfcSingleton = NULL; + nfcInitOnce.reset(); +#endif + delete nfkcSingleton; nfkcSingleton = NULL; + nfkcInitOnce.reset(); + delete nfkc_cfSingleton; nfkc_cfSingleton = NULL; + nfkc_cfInitOnce.reset(); + uhash_close(cache); cache=NULL; - nfkcInitOnce.reset(); - nfkc_cfInitOnce.reset(); return TRUE; } U_CDECL_END +#if !NORM2_HARDCODE_NFC_DATA +const Norm2AllModes * +Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode); + return nfcSingleton; +} +#endif + const Norm2AllModes * Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return NULL; } @@ -184,6 +213,36 @@ Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { return nfkc_cfSingleton; } +#if !NORM2_HARDCODE_NFC_DATA +const Normalizer2 * +Normalizer2::getNFCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->comp : NULL; +} + +const Normalizer2 * +Normalizer2::getNFDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->decomp : NULL; +} + +const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcd : NULL; +} + +const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? &allModes->fcc : NULL; +} + +const Normalizer2Impl * +Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { + const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); + return allModes!=NULL ? allModes->impl : NULL; +} +#endif + const Normalizer2 * Normalizer2::getNFKCInstance(UErrorCode &errorCode) { const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); @@ -247,7 +306,7 @@ Normalizer2::getInstance(const char *packageName, } void *temp=uhash_get(cache, name); if(temp==NULL) { - int32_t keyLength=uprv_strlen(name)+1; + int32_t keyLength= static_cast(uprv_strlen(name)+1); char *nameCopy=(char *)uprv_malloc(keyLength); if(nameCopy==NULL) { errorCode=U_MEMORY_ALLOCATION_ERROR; diff --git a/deps/icu-small/source/common/locdspnm.cpp b/deps/icu-small/source/common/locdspnm.cpp index 6ceb6cfc8bc653..2d9389e910a2ab 100644 --- a/deps/icu-small/source/common/locdspnm.cpp +++ b/deps/icu-small/source/common/locdspnm.cpp @@ -45,9 +45,9 @@ static int32_t ncat(char *buffer, uint32_t buflen, ...) { } va_start(args, buflen); - while ((str = va_arg(args, char *))) { + while ((str = va_arg(args, char *)) != 0) { char c; - while (p != e && (c = *str++)) { + while (p != e && (c = *str++) != 0) { *p++ = c; } } @@ -98,7 +98,7 @@ ICUDataTable::ICUDataTable(const char* path, const Locale& locale) : path(NULL), locale(Locale::getRoot()) { if (path) { - int32_t len = uprv_strlen(path); + int32_t len = static_cast(uprv_strlen(path)); this->path = (const char*) uprv_malloc(len + 1); if (this->path) { uprv_strcpy((char *)this->path, path); @@ -560,21 +560,21 @@ LocaleDisplayNamesImpl::adjustForUsageAndContext(CapContextUsage usage, } UnicodeString& -LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale, +LocaleDisplayNamesImpl::localeDisplayName(const Locale& loc, UnicodeString& result) const { - if (locale.isBogus()) { + if (loc.isBogus()) { result.setToBogus(); return result; } UnicodeString resultName; - const char* lang = locale.getLanguage(); + const char* lang = loc.getLanguage(); if (uprv_strlen(lang) == 0) { lang = "root"; } - const char* script = locale.getScript(); - const char* country = locale.getCountry(); - const char* variant = locale.getVariant(); + const char* script = loc.getScript(); + const char* country = loc.getCountry(); + const char* variant = loc.getVariant(); UBool hasScript = uprv_strlen(script) > 0; UBool hasCountry = uprv_strlen(country) > 0; @@ -630,14 +630,14 @@ LocaleDisplayNamesImpl::localeDisplayName(const Locale& locale, resultRemainder.findAndReplace(formatOpenParen, formatReplaceOpenParen); resultRemainder.findAndReplace(formatCloseParen, formatReplaceCloseParen); - LocalPointer e(locale.createKeywords(status)); + LocalPointer e(loc.createKeywords(status)); if (e.isValid() && U_SUCCESS(status)) { UnicodeString temp2; char value[ULOC_KEYWORD_AND_VALUES_CAPACITY]; // sigh, no ULOC_VALUE_CAPACITY const char* key; while ((key = e->next((int32_t *)0, status)) != NULL) { value[0] = 0; - locale.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); + loc.getKeywordValue(key, value, ULOC_KEYWORD_AND_VALUES_CAPACITY, status); if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) { return result; } diff --git a/deps/icu-small/source/common/locid.cpp b/deps/icu-small/source/common/locid.cpp index 36508acaf5ca70..e0dcc8a88ec0ec 100644 --- a/deps/icu-small/source/common/locid.cpp +++ b/deps/icu-small/source/common/locid.cpp @@ -31,9 +31,12 @@ ****************************************************************************** */ +#include +#include "unicode/bytestream.h" #include "unicode/locid.h" #include "unicode/strenum.h" +#include "unicode/stringpiece.h" #include "unicode/uloc.h" #include "putilimp.h" #include "mutex.h" @@ -43,9 +46,11 @@ #include "cstring.h" #include "uassert.h" #include "uhash.h" +#include "ulocimp.h" #include "ucln_cmn.h" #include "ustr_imp.h" #include "charstr.h" +#include "bytesinkutil.h" U_CDECL_BEGIN static UBool U_CALLCONV locale_cleanup(void); @@ -424,49 +429,70 @@ Locale::Locale(const Locale &other) *this = other; } -Locale &Locale::operator=(const Locale &other) -{ +Locale::Locale(Locale&& other) U_NOEXCEPT + : UObject(other), fullName(fullNameBuffer), baseName(fullName) { + *this = std::move(other); +} + +Locale& Locale::operator=(const Locale& other) { if (this == &other) { return *this; } - /* Free our current storage */ - if (baseName != fullName) { - uprv_free(baseName); + setToBogus(); + + if (other.fullName == other.fullNameBuffer) { + uprv_strcpy(fullNameBuffer, other.fullNameBuffer); + } else if (other.fullName == nullptr) { + fullName = nullptr; + } else { + fullName = uprv_strdup(other.fullName); + if (fullName == nullptr) return *this; } - baseName = NULL; - if(fullName != fullNameBuffer) { - uprv_free(fullName); - fullName = fullNameBuffer; + + if (other.baseName == other.fullName) { + baseName = fullName; + } else if (other.baseName != nullptr) { + baseName = uprv_strdup(other.baseName); + if (baseName == nullptr) return *this; } - /* Allocate the full name if necessary */ - if(other.fullName != other.fullNameBuffer) { - fullName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(other.fullName)+1)); - if (fullName == NULL) { - return *this; - } + uprv_strcpy(language, other.language); + uprv_strcpy(script, other.script); + uprv_strcpy(country, other.country); + + variantBegin = other.variantBegin; + fIsBogus = other.fIsBogus; + + return *this; +} + +Locale& Locale::operator=(Locale&& other) U_NOEXCEPT { + if (baseName != fullName) uprv_free(baseName); + if (fullName != fullNameBuffer) uprv_free(fullName); + + if (other.fullName == other.fullNameBuffer) { + uprv_strcpy(fullNameBuffer, other.fullNameBuffer); + fullName = fullNameBuffer; + } else { + fullName = other.fullName; } - /* Copy the full name */ - uprv_strcpy(fullName, other.fullName); - /* Copy the baseName if it differs from fullName. */ if (other.baseName == other.fullName) { baseName = fullName; } else { - if (other.baseName) { - baseName = uprv_strdup(other.baseName); - } + baseName = other.baseName; } - /* Copy the language and country fields */ uprv_strcpy(language, other.language); uprv_strcpy(script, other.script); uprv_strcpy(country, other.country); - /* The variantBegin is an offset, just copy it */ variantBegin = other.variantBegin; fIsBogus = other.fIsBogus; + + other.baseName = other.fullName = other.fullNameBuffer; + return *this; } @@ -545,7 +571,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize) /* after uloc_getName/canonicalize() we know that only '_' are separators */ separator = field[0] = fullName; fieldIdx = 1; - while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) && fieldIdx < UPRV_LENGTHOF(field)-1) { + while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != 0 && fieldIdx < UPRV_LENGTHOF(field)-1) { field[fieldIdx] = separator + 1; fieldLen[fieldIdx-1] = (int32_t)(separator - field[fieldIdx-1]); fieldIdx++; @@ -652,7 +678,7 @@ Locale::initBaseName(UErrorCode &status) { int32_t Locale::hashCode() const { - return ustr_hashCharsN(fullName, uprv_strlen(fullName)); + return ustr_hashCharsN(fullName, static_cast(uprv_strlen(fullName))); } void @@ -704,6 +730,276 @@ Locale::setDefault( const Locale& newLocale, locale_set_default_internal(localeID, status); } +void +Locale::addLikelySubtags(UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + + // The maximized locale ID string is often longer, but there is no good + // heuristic to estimate just how much longer. Leave that to CharString. + CharString maximizedLocaleID; + int32_t maximizedLocaleIDCapacity = static_cast(uprv_strlen(fullName)); + + char* buffer; + int32_t reslen; + + for (;;) { + buffer = maximizedLocaleID.getAppendBuffer( + /*minCapacity=*/maximizedLocaleIDCapacity, + /*desiredCapacityHint=*/maximizedLocaleIDCapacity, + maximizedLocaleIDCapacity, + status); + + if (U_FAILURE(status)) { + return; + } + + reslen = uloc_addLikelySubtags( + fullName, + buffer, + maximizedLocaleIDCapacity, + &status); + + if (status != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + maximizedLocaleIDCapacity = reslen; + status = U_ZERO_ERROR; + } + + if (U_FAILURE(status)) { + return; + } + + maximizedLocaleID.append(buffer, reslen, status); + if (status == U_STRING_NOT_TERMINATED_WARNING) { + status = U_ZERO_ERROR; // Terminators provided by CharString. + } + + if (U_FAILURE(status)) { + return; + } + + init(maximizedLocaleID.data(), /*canonicalize=*/FALSE); + if (isBogus()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +void +Locale::minimizeSubtags(UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } + + // Except for a few edge cases (like the empty string, that is minimized to + // "en__POSIX"), minimized locale ID strings will be either the same length + // or shorter than their input. + CharString minimizedLocaleID; + int32_t minimizedLocaleIDCapacity = static_cast(uprv_strlen(fullName)); + + char* buffer; + int32_t reslen; + + for (;;) { + buffer = minimizedLocaleID.getAppendBuffer( + /*minCapacity=*/minimizedLocaleIDCapacity, + /*desiredCapacityHint=*/minimizedLocaleIDCapacity, + minimizedLocaleIDCapacity, + status); + + if (U_FAILURE(status)) { + return; + } + + reslen = uloc_minimizeSubtags( + fullName, + buffer, + minimizedLocaleIDCapacity, + &status); + + if (status != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + // Because of the internal minimal buffer size of CharString, I can't + // think of any input data for which this could possibly ever happen. + // Maybe it would be better replaced with an assertion instead? + minimizedLocaleIDCapacity = reslen; + status = U_ZERO_ERROR; + } + + if (U_FAILURE(status)) { + return; + } + + minimizedLocaleID.append(buffer, reslen, status); + if (status == U_STRING_NOT_TERMINATED_WARNING) { + status = U_ZERO_ERROR; // Terminators provided by CharString. + } + + if (U_FAILURE(status)) { + return; + } + + init(minimizedLocaleID.data(), /*canonicalize=*/FALSE); + if (isBogus()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +Locale U_EXPORT2 +Locale::forLanguageTag(StringPiece tag, UErrorCode& status) +{ + Locale result(Locale::eBOGUS); + + if (U_FAILURE(status)) { + return result; + } + + // If a BCP-47 language tag is passed as the language parameter to the + // normal Locale constructor, it will actually fall back to invoking + // uloc_forLanguageTag() to parse it if it somehow is able to detect that + // the string actually is BCP-47. This works well for things like strings + // using BCP-47 extensions, but it does not at all work for things like + // BCP-47 grandfathered tags (eg. "en-GB-oed") which are possible to also + // interpret as ICU locale IDs and because of that won't trigger the BCP-47 + // parsing. Therefore the code here explicitly calls uloc_forLanguageTag() + // and then Locale::init(), instead of just calling the normal constructor. + + // All simple language tags will have the exact same length as ICU locale + // ID strings as they have as BCP-47 strings (like "en_US" for "en-US"). + CharString localeID; + int32_t resultCapacity = tag.size(); + + char* buffer; + int32_t parsedLength, reslen; + + for (;;) { + buffer = localeID.getAppendBuffer( + /*minCapacity=*/resultCapacity, + /*desiredCapacityHint=*/resultCapacity, + resultCapacity, + status); + + if (U_FAILURE(status)) { + return result; + } + + reslen = ulocimp_forLanguageTag( + tag.data(), + tag.length(), + buffer, + resultCapacity, + &parsedLength, + &status); + + if (status != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + // For all BCP-47 language tags that use extensions, the corresponding + // ICU locale ID will be longer but uloc_forLanguageTag() does compute + // the exact length needed so this memory reallocation will be done at + // most once. + resultCapacity = reslen; + status = U_ZERO_ERROR; + } + + if (U_FAILURE(status)) { + return result; + } + + if (parsedLength != tag.size()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return result; + } + + localeID.append(buffer, reslen, status); + if (status == U_STRING_NOT_TERMINATED_WARNING) { + status = U_ZERO_ERROR; // Terminators provided by CharString. + } + + if (U_FAILURE(status)) { + return result; + } + + result.init(localeID.data(), /*canonicalize=*/FALSE); + if (result.isBogus()) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + return result; +} + +void +Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const +{ + if (U_FAILURE(status)) { + return; + } + + if (fIsBogus) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + // All simple language tags will have the exact same length as BCP-47 + // strings as they have as ICU locale IDs (like "en-US" for "en_US"). + LocalMemory scratch; + int32_t scratch_capacity = static_cast(uprv_strlen(fullName)); + + if (scratch_capacity == 0) { + scratch_capacity = 3; // "und" + } + + char* buffer; + int32_t result_capacity, reslen; + + for (;;) { + if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + buffer = sink.GetAppendBuffer( + /*min_capacity=*/scratch_capacity, + /*desired_capacity_hint=*/scratch_capacity, + scratch.getAlias(), + scratch_capacity, + &result_capacity); + + reslen = uloc_toLanguageTag( + fullName, + buffer, + result_capacity, + /*strict=*/FALSE, + &status); + + if (status != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + // For some very few edge cases a language tag will be longer as a + // BCP-47 string than it is as an ICU locale ID. Most notoriously "C" + // expands to the BCP-47 tag "en-US-u-va-posix", 16 times longer, and + // it'll take several calls to uloc_toLanguageTag() to figure that out. + // https://unicode-org.atlassian.net/browse/ICU-20132 + scratch_capacity = reslen; + status = U_ZERO_ERROR; + } + + if (U_FAILURE(status)) { + return; + } + + sink.Append(buffer, reslen); + if (status == U_STRING_NOT_TERMINATED_WARNING) { + status = U_ZERO_ERROR; // Terminators not used. + } +} + Locale U_EXPORT2 Locale::createFromName (const char *name) { @@ -1010,20 +1306,84 @@ KeywordEnumeration::~KeywordEnumeration() { uprv_free(keywords); } +// A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in +// the next() method for each keyword before returning it. +class UnicodeKeywordEnumeration : public KeywordEnumeration { +public: + using KeywordEnumeration::KeywordEnumeration; + virtual ~UnicodeKeywordEnumeration(); + + virtual const char* next(int32_t* resultLength, UErrorCode& status) { + const char* legacy_key = KeywordEnumeration::next(nullptr, status); + if (U_SUCCESS(status) && legacy_key != nullptr) { + const char* key = uloc_toUnicodeLocaleKey(legacy_key); + if (key == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } else { + if (resultLength != nullptr) { + *resultLength = static_cast(uprv_strlen(key)); + } + return key; + } + } + if (resultLength != nullptr) *resultLength = 0; + return nullptr; + } +}; + +// Out-of-line virtual destructor to serve as the "key function". +UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default; + StringEnumeration * Locale::createKeywords(UErrorCode &status) const { char keywords[256]; - int32_t keywordCapacity = 256; + int32_t keywordCapacity = sizeof keywords; StringEnumeration *result = NULL; + if (U_FAILURE(status)) { + return result; + } + const char* variantStart = uprv_strchr(fullName, '@'); const char* assignment = uprv_strchr(fullName, '='); if(variantStart) { if(assignment > variantStart) { int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status); - if(keyLen) { + if(U_SUCCESS(status) && keyLen) { result = new KeywordEnumeration(keywords, keyLen, 0, status); + if (!result) { + status = U_MEMORY_ALLOCATION_ERROR; + } + } + } else { + status = U_INVALID_FORMAT_ERROR; + } + } + return result; +} + +StringEnumeration * +Locale::createUnicodeKeywords(UErrorCode &status) const +{ + char keywords[256]; + int32_t keywordCapacity = sizeof keywords; + StringEnumeration *result = NULL; + + if (U_FAILURE(status)) { + return result; + } + + const char* variantStart = uprv_strchr(fullName, '@'); + const char* assignment = uprv_strchr(fullName, '='); + if(variantStart) { + if(assignment > variantStart) { + int32_t keyLen = locale_getKeywords(variantStart+1, '@', keywords, keywordCapacity, NULL, 0, NULL, FALSE, &status); + if(U_SUCCESS(status) && keyLen) { + result = new UnicodeKeywordEnumeration(keywords, keyLen, 0, status); + if (!result) { + status = U_MEMORY_ALLOCATION_ERROR; + } } } else { status = U_INVALID_FORMAT_ERROR; @@ -1038,6 +1398,105 @@ Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, U return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status); } +void +Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + + if (fIsBogus) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + // TODO: Remove the need for a const char* to a NUL terminated buffer. + const CharString keywordName_nul(keywordName, status); + if (U_FAILURE(status)) { + return; + } + + LocalMemory scratch; + int32_t scratch_capacity = 16; // Arbitrarily chosen default size. + + char* buffer; + int32_t result_capacity, reslen; + + for (;;) { + if (scratch.allocateInsteadAndReset(scratch_capacity) == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + + buffer = sink.GetAppendBuffer( + /*min_capacity=*/scratch_capacity, + /*desired_capacity_hint=*/scratch_capacity, + scratch.getAlias(), + scratch_capacity, + &result_capacity); + + reslen = uloc_getKeywordValue( + fullName, + keywordName_nul.data(), + buffer, + result_capacity, + &status); + + if (status != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + scratch_capacity = reslen; + status = U_ZERO_ERROR; + } + + if (U_FAILURE(status)) { + return; + } + + sink.Append(buffer, reslen); + if (status == U_STRING_NOT_TERMINATED_WARNING) { + status = U_ZERO_ERROR; // Terminators not used. + } +} + +void +Locale::getUnicodeKeywordValue(StringPiece keywordName, + ByteSink& sink, + UErrorCode& status) const { + // TODO: Remove the need for a const char* to a NUL terminated buffer. + const CharString keywordName_nul(keywordName, status); + if (U_FAILURE(status)) { + return; + } + + const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data()); + + if (legacy_key == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + CharString legacy_value; + { + CharStringByteSink sink(&legacy_value); + getKeywordValue(legacy_key, sink, status); + } + + if (U_FAILURE(status)) { + return; + } + + const char* unicode_value = uloc_toUnicodeLocaleType( + keywordName_nul.data(), legacy_value.data()); + + if (unicode_value == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + sink.Append(unicode_value, static_cast(uprv_strlen(unicode_value))); +} + void Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status) { @@ -1048,6 +1507,46 @@ Locale::setKeywordValue(const char* keywordName, const char* keywordValue, UErro } } +void +Locale::setKeywordValue(StringPiece keywordName, + StringPiece keywordValue, + UErrorCode& status) { + // TODO: Remove the need for a const char* to a NUL terminated buffer. + const CharString keywordName_nul(keywordName, status); + const CharString keywordValue_nul(keywordValue, status); + setKeywordValue(keywordName_nul.data(), keywordValue_nul.data(), status); +} + +void +Locale::setUnicodeKeywordValue(StringPiece keywordName, + StringPiece keywordValue, + UErrorCode& status) { + // TODO: Remove the need for a const char* to a NUL terminated buffer. + const CharString keywordName_nul(keywordName, status); + const CharString keywordValue_nul(keywordValue, status); + + if (U_FAILURE(status)) { + return; + } + + const char* legacy_key = uloc_toLegacyKey(keywordName_nul.data()); + + if (legacy_key == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + const char* legacy_value = + uloc_toLegacyType(keywordName_nul.data(), keywordValue_nul.data()); + + if (legacy_value == nullptr) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + setKeywordValue(legacy_key, legacy_value, status); +} + const char * Locale::getBaseName() const { return baseName; diff --git a/deps/icu-small/source/common/mutex.h b/deps/icu-small/source/common/mutex.h index 04c22b4a3767b1..5223397bbcc10a 100644 --- a/deps/icu-small/source/common/mutex.h +++ b/deps/icu-small/source/common/mutex.h @@ -35,7 +35,7 @@ U_NAMESPACE_BEGIN // For example: // -// UMutex myMutex; +// UMutex myMutex = U_MUTEX_INITIALIZER; // // void Function(int arg1, int arg2) // { diff --git a/deps/icu-small/source/common/norm2_nfc_data.h b/deps/icu-small/source/common/norm2_nfc_data.h index c8bf440ae10819..82a68097385285 100644 --- a/deps/icu-small/source/common/norm2_nfc_data.h +++ b/deps/icu-small/source/common/norm2_nfc_data.h @@ -11,258 +11,197 @@ #ifdef INCLUDED_FROM_NORMALIZER2_CPP -static const UVersionInfo norm2_nfc_data_formatVersion={3,0,0,0}; +static const UVersionInfo norm2_nfc_data_formatVersion={4,0,0,0}; static const UVersionInfo norm2_nfc_data_dataVersion={0xb,0,0,0}; static const int32_t norm2_nfc_data_indexes[Normalizer2Impl::IX_COUNT]={ -0x50,0x4e50,0x8aa8,0x8ba8,0x8ba8,0x8ba8,0x8ba8,0x8ba8,0xc0,0x300,0xadc,0x29d0,0x3c56,0xfc00,0x1282,0x3b8c, +0x50,0x4ab0,0x8708,0x8808,0x8808,0x8808,0x8808,0x8808,0xc0,0x300,0xadc,0x29d0,0x3c56,0xfc00,0x1282,0x3b8c, 0x3c24,0x3c56,0x300,0 }; -static const uint16_t norm2_nfc_data_trieIndex[9976]={ -0x2aa,0x2b2,0x2ba,0x2c2,0x2d0,0x2d8,0x2e0,0x2e8,0x2f0,0x2f8,0x300,0x308,0x310,0x318,0x31e,0x326, -0x32e,0x336,0x2c9,0x2d1,0x33b,0x343,0x2c9,0x2d1,0x34b,0x353,0x35b,0x363,0x36b,0x373,0x37b,0x383, -0x38b,0x393,0x39b,0x3a3,0x3ab,0x3b3,0x3bb,0x3c3,0x2c9,0x2d1,0x2c9,0x2d1,0x3ca,0x3d2,0x3da,0x3e2, -0x3e6,0x3ee,0x3f4,0x3fc,0x2c9,0x2d1,0x404,0x40c,0x410,0x418,0x420,0x428,0x2c9,0x2d1,0x426,0x42e, -0x436,0x43d,0x441,0x2c9,0x2c9,0x2c9,0x448,0x450,0x2c9,0x458,0x460,0x2c9,0x2c9,0x468,0x470,0x478, -0x2c9,0x480,0x488,0x2c9,0x2c9,0x490,0x498,0x2c9,0x2c9,0x468,0x49f,0x2c9,0x4a7,0x4ad,0x4b5,0x2c9, -0x2c9,0x2c9,0x4bc,0x2c9,0x2c9,0x4c2,0x4ca,0x2c9,0x2c9,0x4d0,0x4d8,0x2c9,0x2c9,0x2c9,0x4de,0x2c9, -0x2c9,0x4e6,0x4ed,0x2c9,0x2c9,0x4f0,0x4f7,0x2c9,0x4fa,0x501,0x509,0x511,0x519,0x521,0x528,0x2c9, -0x2c9,0x52f,0x2c9,0x2c9,0x536,0x2c9,0x2c9,0x2c9,0x96d,0x2c9,0x2c9,0x975,0x2c9,0x97b,0x983,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x53a,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x542,0x542,0x2c9,0x2c9,0x2c9,0x2c9,0x548,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x550,0x2c9,0x2c9,0x2c9,0x553,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x55a,0x2c9,0x2c9,0x562,0x2c9,0x56a,0x2c9,0x2c9,0x572,0x577,0x57f,0x585,0x2c9,0x58b,0x2c9,0x592, -0x2c9,0x597,0x2c9,0x2c9,0x2c9,0x2c9,0x59d,0x5a5,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x5ad,0x5b2, -0x5ba,0x5c2,0x5ca,0x5d2,0x5da,0x5e2,0x5ea,0x5f2,0x5fa,0x602,0x60a,0x612,0x61a,0x622,0x62a,0x632, -0x63a,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x63e,0x646,0x2c9,0x64d,0x2c9,0x2c9,0x651,0x658,0x65d,0x2c9, -0x665,0x66d,0x675,0x67d,0x685,0x68d,0x2c9,0x695,0x2c9,0x69b,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x69e,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x6a6,0x2c9,0x2c9,0x2c9,0x6ab,0x2c9,0x2c9,0x2c9,0x6b3, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x6bb,0x6c2,0x6ca,0x6d2,0x6da,0x6e2,0x6ea,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x6f2,0x6fa,0x2c9,0x2c9,0x702,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x709,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x710,0x718,0x2c9,0x71e,0x722,0x2c9,0x2c9,0x598,0x72a,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x72e,0x736,0x739,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x498, -0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c, -0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e, -0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990, -0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b, -0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d, -0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f, -0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f, -0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c, -0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e, -0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990, -0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b, -0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d, -0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f, -0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f, -0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c, -0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e, -0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990, -0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b, -0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d, -0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f, -0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f, -0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x99f,0x98b,0x98c,0x98d,0x98e,0x98f,0x990,0x997,0x2c9,0x2c9, -0x9a7,0x9ae,0x2aa,0x9b5,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa, -0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa,0x2aa, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x741,0x749,0x751,0x759,0x761,0x769,0x771,0x779, -0x781,0x789,0x791,0x799,0x7a1,0x7a9,0x7b1,0x2c9,0x7b8,0x7c0,0x7c8,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x7d0,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0xb28,0xb28,0xb40,0xb80,0xbc0,0xc00,0xc40,0xc78,0xcb8,0xb24,0xcec,0xb24,0xd2c,0xd6c,0xdac,0xdec, -0xe2c,0xe6c,0xeac,0xeec,0xb24,0xb24,0xf28,0xf68,0xf98,0xfd0,0xb24,0x1010,0x1040,0x1080,0xb24,0x1098, -0x880,0x8b0,0x8ee,0x92d,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x95a,0x188,0x188, -0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x976,0x188,0x188,0x9ac,0x188,0x9ec,0xa26,0x188,0x188, -0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188, -0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0x188,0xa66, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x7d4, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x7dc,0x2c9,0x2c9,0x2c9,0x7df,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x7e6,0x7ea,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x7f2,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x7f9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x800,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x709,0x6ab,0x805,0x80d,0x2c9,0x2c9,0x815,0x81c,0x2c9,0x598,0x2c9,0x2c9,0x824,0x2c9,0x2c9,0x827, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x82d,0x2c9,0x830,0x838,0x83f,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x847,0x2c9,0x2c9,0x84f,0x857,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x85c,0x864,0x2c9,0x2c9,0x6ab, -0x2c9,0x2c9,0x2c9,0x867,0x2c9,0x2c9,0x2c9,0x86d,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x870,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x542,0x86e, -0x2c9,0x877,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x6ab,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x87f,0x2c9,0x882,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x888,0x2c9,0x88e,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x894,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x89c,0x8a4,0x8ac,0x8b2,0x8ba,0x2c9,0x2c9,0x2c9,0x8c2,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x8ca,0x8d2,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x8d6,0x2c9,0x2c9,0x2c9, -0x8dd,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x8e5,0x8ed,0x8f5,0x8fd,0x905,0x90d,0x915,0x91d,0x925,0x92d, -0x935,0x93d,0x945,0x94d,0x955,0x95d,0x965,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9, -0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2c9,0x2a9,0x2a9,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,4,8,0xc,1,1,0x10,0x50,0x5c,0x70,0x88,0xcc,0xd0, -0xec,0x108,0x144,0x148,0x15c,0x174,0x180,0x1a4,0x1e4,1,0x1ec,0x20c,0x228,0x244,0x290,0x298, -0x2b0,0x2b8,0x2dc,1,1,1,1,1,1,0x2f4,0x334,0x340,0x354,0x36c,0x3b0,0x3b4, -0x3d0,0x3f0,0x428,0x430,0x444,0x45c,0x468,0x48c,0x4cc,1,0x4d4,0x4f4,0x510,0x530,0x57c,0x584, -0x5a0,0x5a8,0x5d0,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0x5e8,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x1284,0x128a,0xade,0x1290,0xaf4,0xafe,0x5f4,0xb08,0x1296,0x129c,0xb12,0x12a2,0x12a8,0x12ae,0x12b4,0xb28, -1,0x12ba,0x12c0,0x12c6,0xb32,0xb48,0xb5a,1,0x5fc,0x12cc,0x12d2,0x12d8,0xb64,0x12de,1,1, -0x12e4,0x12ea,0xb7a,0x12f0,0xb90,0xb9a,0x600,0xba4,0x12f6,0x12fc,0xbae,0x1302,0x1308,0x130e,0x1314,0xbc4, -1,0x131a,0x1320,0x1326,0xbce,0xbe4,0xbf6,1,0x608,0x132c,0x1332,0x1338,0xc00,0x133e,1,0x1344, -0x134a,0x1350,0xc16,0xc2c,0x1357,0x135d,0x1362,0x1368,0x136e,0x1374,0x137a,0x1380,0x1386,0x138c,0x1392,0x1398, -1,1,0xc42,0xc50,0x139e,0x13a4,0x13aa,0x13b0,0x13b7,0x13bd,0x13c2,0x13c8,0x13ce,0x13d4,0x13da,0x13e0, -0x13e6,0x13ec,0x13f3,0x13f9,0x13fe,0x1404,1,1,0x140a,0x1410,0x1416,0x141c,0x1422,0x1428,0x142f,0x1435, -0x143a,1,1,1,0x1441,0x1447,0x144d,0x1453,1,0x1458,0x145e,0x1465,0x146b,0x1470,0x1476,1, -1,1,1,0x147c,0x1482,0x1489,0x148f,0x1494,0x149a,1,1,1,0xc5e,0xc6c,0x14a0,0x14a6, -0x14ac,0x14b2,1,1,0x14b8,0x14be,0x14c5,0x14cb,0x14d0,0x14d6,0xc7a,0xc84,0x14dc,0x14e2,0x14e9,0x14ef, -0xc8e,0xc98,0x14f5,0x14fb,0x1500,0x1506,1,1,0xca2,0xcac,0xcb6,0xcc0,0x150c,0x1512,0x1518,0x151e, -0x1524,0x152a,0x1531,0x1537,0x153c,0x1542,0x1548,0x154e,0x1554,0x155a,0x1560,0x1566,0x156c,0x1572,0x1578,0x60c, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xcca,0xce4,1,1,1,1,1,1,1,1,1,1,1,1,1,0xcfe, -0xd18,1,1,1,1,1,1,0x610,1,1,1,1,1,1,1,1, -1,1,1,1,1,0x157e,0x1584,0x158a,0x1590,0x1596,0x159c,0x15a2,0x15a8,0x15b0,0x15ba,0x15c4, -0x15ce,0x15d8,0x15e2,0x15ec,0x15f6,1,0x1600,0x160a,0x1614,0x161e,0x1627,0x162d,1,1,0x1632,0x1638, -0x163e,0x1644,0xd32,0xd3c,0x164d,0x1657,0x165f,0x1665,0x166b,1,1,1,0x1670,0x1676,1,1, -0x167c,0x1682,0x168a,0x1694,0x169d,0x16a3,0x16a9,0x16af,0x16b4,0x16ba,0x16c0,0x16c6,0x16cc,0x16d2,0x16d8,0x16de, -0x16e4,0x16ea,0x16f0,0x16f6,0x16fc,0x1702,0x1708,0x170e,0x1714,0x171a,0x1720,0x1726,0x172c,0x1732,0x1738,0x173e, -0x1744,0x174a,0x1750,0x1756,1,1,0x175c,0x1762,1,1,1,1,1,1,0xd46,0xd50, -0xd5a,0xd64,0x176a,0x1774,0x177e,0x1788,0xd6e,0xd78,0x1792,0x179c,0x17a4,0x17aa,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x614,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfdcc,0xfdcc,0xfdcc,0xfdcc, -0xfdcc,0xffcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xffcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xffcc,0xfdcc, -0xfdcc,0xffd0,0xffb8,0xffb8,0xffb8,0xffb8,0xffd0,0xfdb0,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xff94,0xff94,0xfdb8, -0xfdb8,0xfdb8,0xfdb8,0xfd94,0xfd94,0xffb8,0xffb8,0xffb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xffb8, -0xfe02,0xfe02,0xfe02,0xfe02,0xfc02,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0x3c26,0x3c2c,0xfdcc,0x3c32, -0x3c38,0xfde0,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,1,0xffcc,0xffcc,0xffcc,0xffb8, -0xffb8,0xffb8,0xffb8,0xffcc,0xffd0,0xffb8,0xffb8,0xffcc,0xffd2,0xffd4,0xffd4,0xffd2,0xffd4,0xffd4,0xffd2,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, -0x29d1,1,1,1,1,1,1,1,1,1,0x29d5,1,1,1,1,1, -1,0x17b1,0x17b7,0x29d9,0x17bd,0x17c3,0x17c9,1,0x17cf,1,0x17d5,0x17db,0x17e3,0x618,1,1, -1,0x634,1,0x644,1,0x658,1,1,1,1,1,0x674,1,0x684,1,1, -1,0x688,1,1,1,0x6a0,0x17eb,0x17f1,0xd82,0x17f7,0xd8c,0x17fd,0x1805,0x6b4,1,1, -1,0x6d4,1,0x6e4,1,0x6fc,1,1,1,1,1,0x71c,1,0x72c,1,1, -1,0x734,1,1,1,0x754,0xd96,0xda8,0x180d,0x1813,0xdba,1,1,1,0x76c,0x1819, -0x181f,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x1825,0x182b,1,0x1831, -1,1,0x774,0x1837,1,1,1,1,0x183d,0x1843,0x1849,1,0x778,1,1,0x780, -1,0x784,0x790,0x798,0x79c,0x184f,0x7ac,1,1,1,0x7b0,1,1,1,1,0x7b4, -1,1,1,0x7c4,1,1,1,0x7c8,1,0x7cc,1,1,0x7d0,1,1,0x7d8, -1,0x7dc,0x7e8,0x7f0,0x7f4,0x1855,0x804,1,1,1,0x808,1,1,1,1,0x80c, -1,1,1,0x81c,1,1,1,0x820,1,0x824,1,1,0x185b,0x1861,1,0x1867, -1,1,0x828,0x186d,1,1,1,1,0x1873,0x1879,0x187f,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x82c,0x830,0x1885,0x188b,1,1,1,1,1,1,1,1,1,1,1,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0x1891,0x1897,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x189d,0x18a3,0x18a9,0x18af, -1,1,0x18b5,0x18bb,0x834,0x838,0x18c1,0x18c7,0x18cd,0x18d3,0x18d9,0x18df,1,1,0x18e5,0x18eb, -0x18f1,0x18f7,0x18fd,0x1903,0x83c,0x840,0x1909,0x190f,0x1915,0x191b,0x1921,0x1927,0x192d,0x1933,0x1939,0x193f, -0x1945,0x194b,1,1,0x1951,0x1957,1,1,1,1,1,1,1,1,1,1, +static const uint16_t norm2_nfc_data_trieIndex[1690]={ +0,0x40,0x7b,0xbb,0xfb,0x13a,0x17a,0x1b2,0x1f2,0x226,0x254,0x226,0x294,0x2d4,0x313,0x353, +0x393,0x3d2,0x40f,0x44e,0x226,0x226,0x488,0x4c8,0x4f8,0x530,0x226,0x570,0x59f,0x5de,0x226,0x5f3, +0x631,0x65f,0x226,0x68c,0x6cc,0x709,0x729,0x768,0x7a7,0x7e4,0x803,0x840,0x729,0x879,0x8a7,0x8e6, +0x226,0x920,0x937,0x977,0x98e,0x9cd,0x226,0xa03,0xa23,0xa5e,0xa6a,0xaa4,0xacc,0xb09,0xb49,0xb83, +0xb9e,0x226,0xbd9,0x226,0xc19,0xc38,0xc6e,0xcab,0x226,0x226,0x226,0x226,0x226,0xcce,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xcfa,0x226,0x226,0xd2f, +0x226,0x226,0xd4d,0x226,0xd77,0x226,0x226,0x226,0xdb3,0xdd3,0xe13,0x226,0xe51,0xe91,0xec5,0xef1, +0x808,0x226,0x226,0xf25,0x226,0x226,0x226,0xf65,0xfa5,0xfe5,0x1025,0x1065,0x10a5,0x10e5,0x1125,0x1165, +0x11a5,0x226,0x226,0x11d5,0x1206,0x226,0x1236,0x1269,0x12a6,0x12e5,0x1325,0x135b,0x1389,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x13b4,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0xcbc,0x226,0x13d1,0x226,0x1411,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x1451,0x148b,0x14c9,0x1509,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1548,0x1586,0x15a6,0x226,0x226,0x226,0x226, +0x15e0,0x226,0x226,0x161c,0x164e,0x167c,0x80c,0x168f,0x226,0x226,0x169f,0x16df,0x226,0x226,0x226,0x13e3, +0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727, +0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737, +0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b, +0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f, +0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f, +0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723, +0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733, +0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727, +0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737, +0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b, +0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x1733,0x171f,0x1727,0x172f,0x1737,0x1723,0x172b,0x176b,0x226, +0x17ab,0x17e6,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x1826,0x1866,0x18a6,0x18e6,0x1926,0x1966,0x19a6,0x19e6,0x1a09,0x1a49,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a69,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x61f,0x62e,0x644,0x663,0x678,0x678,0x678,0x67c,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x54f,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x40c, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1a9c,0x226,0x226,0x1aac,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0xdc5,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1abc,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ac6,0x54f, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7eb,0x226,0x226,0x9ba,0x226,0x1ad6, +0x1ae3,0x1aef,0x226,0x226,0x226,0x226,0x414,0x226,0x1afa,0x1b0a,0x226,0x226,0x226,0x7e0,0x226,0x226, +0x226,0x226,0x1b1a,0x226,0x226,0x226,0x1b25,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x1b2c,0x226,0x226,0x226,0x226,0x1b37,0x1b46,0x8f6,0x1b54,0x412,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x1b62,0x798,0x226,0x226,0x226,0x226,0x226,0x1b72,0x1b81,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x8d6,0x1b89,0x1b99,0x226,0x226,0x226,0x9ba, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ba3,0x226,0x226,0x226,0x226,0x226,0x226,0x7e6,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1ba0,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x7ed,0x7ea,0x226,0x226,0x226,0x226,0x7e8, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x9ba,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0xbd3,0x226,0x226,0x226,0x226,0x7ea,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bb3,0x226,0x226,0x226, +0xebe,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1bb8,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x1bc7,0x1bd7,0x1be5,0x1bf2,0x226,0x1bfe,0x1c0c,0x1c1c,0x226,0x226,0x226,0x226, +0xce9,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c2c,0x1c34,0x1c42,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c52,0x226,0x226,0x226, +0x226,0x226,0x226,0x1c5e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x1c6e, +0x1c7e,0x1c8e,0x1c9e,0x1cae,0x1cbe,0x1cce,0x1cde,0x1cee,0x1cfe,0x1d0e,0x1d1e,0x1d2e,0x1d3e,0x1d4e,0x1d5e,0x1d6e, +0x1d7e,0x1d8e,0x1d9e,0x1dae,0x1dbe,0x1dce,0x1dde,0x1dee,0x1dfe,0x1e0e,0x1e1e,0x1e2e,0x1e3e,0x1e4e,0x1e5e,0x1e6e, +0x1e7e,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226, +0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x226,0x408, +0x428,0xc4,0xc4,0xc4,0x448,0x457,0x46a,0x486,0x4a3,0x4bf,0x4dc,0x4f9,0x516,0x533,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0x54d,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0x564,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x56f,0x58c,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0x5ac,0xc4,0xc4,0xc4,0x5bf,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4, +0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0xc4,0x5df,0x5ff +}; + +static const uint16_t norm2_nfc_data_trieData[7822]={ +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,4,8,0xc,1, +1,0x10,0x50,0x5c,0x70,0x88,0xcc,0xd0,0xec,0x108,0x144,0x148,0x15c,0x174,0x180,0x1a4, +0x1e4,1,0x1ec,0x20c,0x228,0x244,0x290,0x298,0x2b0,0x2b8,0x2dc,1,1,1,1,1, +1,0x2f4,0x334,0x340,0x354,0x36c,0x3b0,0x3b4,0x3d0,0x3f0,0x428,0x430,0x444,0x45c,0x468,0x48c, +0x4cc,1,0x4d4,0x4f4,0x510,0x530,0x57c,0x584,0x5a0,0x5a8,0x5d0,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x5e8,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x1284,0x128a,0xade,0x1290,0xaf4, +0xafe,0x5f4,0xb08,0x1296,0x129c,0xb12,0x12a2,0x12a8,0x12ae,0x12b4,0xb28,1,0x12ba,0x12c0,0x12c6,0xb32, +0xb48,0xb5a,1,0x5fc,0x12cc,0x12d2,0x12d8,0xb64,0x12de,1,1,0x12e4,0x12ea,0xb7a,0x12f0,0xb90, +0xb9a,0x600,0xba4,0x12f6,0x12fc,0xbae,0x1302,0x1308,0x130e,0x1314,0xbc4,1,0x131a,0x1320,0x1326,0xbce, +0xbe4,0xbf6,1,0x608,0x132c,0x1332,0x1338,0xc00,0x133e,1,0x1344,0x134a,0x1350,0xc16,0xc2c,0x1357, +0x135d,0x1362,0x1368,0x136e,0x1374,0x137a,0x1380,0x1386,0x138c,0x1392,0x1398,1,1,0xc42,0xc50,0x139e, +0x13a4,0x13aa,0x13b0,0x13b7,0x13bd,0x13c2,0x13c8,0x13ce,0x13d4,0x13da,0x13e0,0x13e6,0x13ec,0x13f3,0x13f9,0x13fe, +0x1404,1,1,0x140a,0x1410,0x1416,0x141c,0x1422,0x1428,0x142f,0x1435,0x143a,1,1,1,0x1441, +0x1447,0x144d,0x1453,1,0x1458,0x145e,0x1465,0x146b,0x1470,0x1476,1,1,1,0x147c,0x1482,0x1489, +0x148f,0x1494,0x149a,1,1,1,0xc5e,0xc6c,0x14a0,0x14a6,0x14ac,0x14b2,1,1,0x14b8,0x14be, +0x14c5,0x14cb,0x14d0,0x14d6,0xc7a,0xc84,0x14dc,0x14e2,0x14e9,0x14ef,0xc8e,0xc98,0x14f5,0x14fb,0x1500,0x1506, +1,1,0xca2,0xcac,0xcb6,0xcc0,0x150c,0x1512,0x1518,0x151e,0x1524,0x152a,0x1531,0x1537,0x153c,0x1542, +0x1548,0x154e,0x1554,0x155a,0x1560,0x1566,0x156c,0x1572,0x1578,0x60c,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0xcca,0xce4,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xcfe,0xd18,1,1,1,1,1, +1,0x610,1,1,1,1,1,1,1,1,1,1,1,1,1,0x157e, +0x1584,0x158a,0x1590,0x1596,0x159c,0x15a2,0x15a8,0x15b0,0x15ba,0x15c4,0x15ce,0x15d8,0x15e2,0x15ec,0x15f6,1, +0x1600,0x160a,0x1614,0x161e,0x1627,0x162d,1,1,0x1632,0x1638,0x163e,0x1644,0xd32,0xd3c,0x164d,0x1657, +0x165f,0x1665,0x166b,1,1,1,0x1670,0x1676,1,1,0x167c,0x1682,0x168a,0x1694,0x169d,0x16a3, +0x16a9,0x16af,0x16b4,0x16ba,0x16c0,0x16c6,0x16cc,0x16d2,0x16d8,0x16de,0x16e4,0x16ea,0x16f0,0x16f6,0x16fc,0x1702, +0x1708,0x170e,0x1714,0x171a,0x1720,0x1726,0x172c,0x1732,0x1738,0x173e,0x1744,0x174a,0x1750,0x1756,1,1, +0x175c,0x1762,1,1,1,1,1,1,0xd46,0xd50,0xd5a,0xd64,0x176a,0x1774,0x177e,0x1788, +0xd6e,0xd78,0x1792,0x179c,0x17a4,0x17aa,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x614,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc,0xfdcc, +0xfdcc,0xffcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xffcc,0xfdcc,0xfdcc,0xffd0,0xffb8,0xffb8,0xffb8,0xffb8,0xffd0,0xfdb0, +0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xff94,0xff94,0xfdb8,0xfdb8,0xfdb8,0xfdb8,0xfd94,0xfd94,0xffb8,0xffb8,0xffb8, +0xffb8,0xfdb8,0xfdb8,0xffb8,0xfdb8,0xfdb8,0xffb8,0xffb8,0xfe02,0xfe02,0xfe02,0xfe02,0xfc02,0xffb8,0xffb8,0xffb8, +0xffb8,0xffcc,0xffcc,0xffcc,0x3c26,0x3c2c,0xfdcc,0x3c32,0x3c38,0xfde0,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, +0xffcc,0xffb8,0xffb8,1,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffd0,0xffb8,0xffb8,0xffcc, +0xffd2,0xffd4,0xffd4,0xffd2,0xffd4,0xffd4,0xffd2,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,0x29d1,1,1,1,1,1,1,1, +1,1,0x29d5,1,1,1,1,1,0x17b1,0x17b7,0x29d9,0x17bd,0x17c3,0x17c9,1,0x17cf, +1,0x17d5,0x17db,0x17e3,0x618,1,1,1,0x634,1,0x644,1,0x658,1,1,1, +1,1,0x674,1,0x684,1,1,1,0x688,1,1,1,0x6a0,0x17eb,0x17f1,0xd82, +0x17f7,0xd8c,0x17fd,0x1805,0x6b4,1,1,1,0x6d4,1,0x6e4,1,0x6fc,1,1,1, +1,1,0x71c,1,0x72c,1,1,1,0x734,1,1,1,0x754,0xd96,0xda8,0x180d, +0x1813,0xdba,1,1,1,0x76c,0x1819,0x181f,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x1825,0x182b,1,0x1831,1,1,0x774,0x1837,1,1,1,1,0x183d, +0x1843,0x1849,1,0x778,1,1,0x780,1,0x784,0x790,0x798,0x79c,0x184f,0x7ac,1,1, +1,0x7b0,1,1,1,1,0x7b4,1,1,1,0x7c4,1,1,1,0x7c8,1, +0x7cc,1,1,0x7d0,1,1,0x7d8,1,0x7dc,0x7e8,0x7f0,0x7f4,0x1855,0x804,1,1, +1,0x808,1,1,1,0x80c,1,1,1,0x81c,1,1,1,0x820,1,0x824, +1,1,0x185b,0x1861,1,0x1867,1,1,0x828,0x186d,1,1,1,1,0x1873,0x1879, +0x187f,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x82c,0x830,0x1885,0x188b,1,1,1,1,1,1, +1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x1891, +0x1897,1,1,1,1,1,1,1,1,1,1,1,1,1,0x189d,0x18a3, +0x18a9,0x18af,1,1,0x18b5,0x18bb,0x834,0x838,0x18c1,0x18c7,0x18cd,0x18d3,0x18d9,0x18df,1,1, +0x18e5,0x18eb,0x18f1,0x18f7,0x18fd,0x1903,0x83c,0x840,0x1909,0x190f,0x1915,0x191b,0x1921,0x1927,0x192d,0x1933, +0x1939,0x193f,0x1945,0x194b,1,1,0x1951,0x1957,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc, 0xffcc,0xffcc,0xffbc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8, 0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffbc,0xffc8,0xffcc,0xfe14,0xfe16,0xfe18,0xfe1a,0xfe1c,0xfe1e,0xfe20,0xfe22, @@ -281,369 +220,415 @@ static const uint16_t norm2_nfc_data_trieIndex[9976]={ 1,1,0x85c,0x1987,1,0x860,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc, 0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,0xffcc,0xffcc,1,0xffb8,0xffcc,0xffcc,0xffb8,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0xfe48,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0xfe48,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, +0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffb8,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc, +0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffcc,0xffb8,0xffb8,0xffcc,0xffb8,0xffcc, -0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1, +0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,1,1,1,1, -1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc, -0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,1,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8, -0xfe36,0xfe38,0xfe3a,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -1,1,1,1,1,1,1,1,0x864,0x198d,1,1,1,1,1,1, -0x868,0x1993,1,0x86c,0x1999,1,1,1,1,1,1,1,0xfc0e,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1, -1,0xffcc,0xffb8,0xffcc,0xffcc,1,1,1,0x29dc,0x29e2,0x29e8,0x29ee,0x29f4,0x29fa,0x2a00,0x2a06, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffb8, +0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xfe36,0xfe38,0xfe3a,0xffcc, +0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,1,0xfc00,1, -1,1,1,1,1,1,1,0x870,1,1,1,0x199f,0x19a5,0xfe12,1,1, -1,1,1,1,1,1,1,0xfc00,1,1,1,1,0x2a0c,0x2a12,1,0x2a18, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,1, +1,1,1,1,0x864,0x198d,1,1,1,1,1,1,0x868,0x1993,1,0x86c, +0x1999,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc,1,1, +1,0x29dc,0x29e2,0x29e8,0x29ee,0x29f4,0x29fa,0x2a00,0x2a06,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0x2a1e,1,1,0x2a24,1,1,1,1,1,0xfe0e,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1, -1,1,1,1,1,1,1,1,1,0x2a2a,0x2a30,0x2a36,1,1,0x2a3c,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0x870, +1,1,1,0x199f,0x19a5,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00, +1,1,1,1,0x2a0c,0x2a12,1,0x2a18,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xffcc,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0x2a1e,1,1,0x2a24,1,1, +1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1, +1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,0x2a2a,0x2a30,0x2a36, +1,1,0x2a3c,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e, 1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0x878,0x19ab,1,1,0x19b1,0x19b7,0xfe12,1,1,1,1,1,1, -1,1,0xfc00,0xfc00,1,1,1,1,0x2a42,0x2a48,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x884,1, -0x19bd,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xfc00,1,1,1,1,1,1,1,0x888,0x890,1,1,0x19c3,0x19c9, -0x19cf,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0x878,0x19ab,1,1,0x19b1,0x19b7,0xfe12,1,1,1,1,1,1,1,1,0xfc00, +0xfc00,1,1,1,1,0x2a42,0x2a48,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x884,1,0x19bd,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x888,0x890,1,1, +0x19c3,0x19c9,0x19cf,0xfe12,1,1,1,1,1,1,1,1,1,0xfc00,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0x894,1,0x19d5,1,1,1,1,0xfe12,1,1, 1,1,1,1,1,0xfea8,0xfcb6,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xfe0e,1,1,0x898,0x19db,1,0xfc00,1,1,1,0x89c,0x19e1, -0x19e7,1,0xdc4,0x19ef,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1, -1,1,1,1,1,1,0x8a8,0x8b0,1,1,0x19f7,0x19fd,0x1a03,0xfe12,1,1, -1,1,1,1,1,1,1,0xfc00,1,1,1,1,1,1,1,1, -1,1,0xfc12,1,1,1,1,0xfc00,1,1,1,1,1,1,1,1, -1,0x8b4,0x1a09,1,0xdce,0x1a11,0x1a19,0xfc00,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfece,0xfece,0xfe12,1,1,1,1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xfeec,0xfeec,1,1,1,1,1,1, -1,1,1,1,0xfef4,0xfef4,0xfef4,0xfef4,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xffb8,1,0xffb8,1,0xffb0,1,1, -1,1,1,1,1,1,1,0x2a4f,1,1,1,1,1,1,1,1, -1,0x2a55,1,1,1,1,0x2a5b,1,1,1,1,0x2a61,1,1,1,1, -0x2a67,1,1,1,1,1,1,1,1,1,1,1,1,0x2a6d,1,1, -1,1,1,1,1,0xff02,0xff04,0x3c40,0xff08,0x3c48,0x2a72,1,0x2a78,1,0xff04,0xff04, -0xff04,0xff04,1,1,0xff04,0x3c50,0xffcc,0xffcc,0xfe12,1,0xffcc,0xffcc,1,1,1,1, -1,1,1,1,1,1,1,0x2a7f,1,1,1,1,1,1,1,1, -1,0x2a85,1,1,1,1,0x2a8b,1,1,1,1,0x2a91,1,1,1,1, -0x2a97,1,1,1,1,1,1,1,1,1,1,1,1,0x2a9d,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0xfe0e,1,1,0x898,0x19db,1,0xfc00,1,1,1,0x89c,0x19e1,0x19e7, +1,0xdc4,0x19ef,1,0xfe12,1,1,1,1,1,1,1,0xfc00,0xfc00,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,0xfc00,1,1,1, +1,1,1,0x8a8,0x8b0,1,1,0x19f7,0x19fd,0x1a03,0xfe12,1,1,1,1,1, +1,1,1,1,0xfc00,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc12,1,1, +1,1,0xfc00,1,1,1,1,1,1,1,1,1,0x8b4,0x1a09,1,0xdce, +0x1a11,0x1a19,0xfc00,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0xfece,0xfece,0xfe12,1,1, +1,1,1,1,1,1,0xfed6,0xfed6,0xfed6,0xfed6,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0xfeec,0xfeec,1,1,1,1,1,1,1,1,0xfef4,0xfef4,0xfef4,0xfef4, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0xffb8,1,0xffb8,1,0xffb0,1,1,1,1,1,1,0x2a4f,1,1,1, +1,1,1,1,1,1,0x2a55,1,1,1,1,0x2a5b,1,1,1,1, +0x2a61,1,1,1,1,0x2a67,1,1,1,1,1,1,1,1,1,1, +1,1,0x2a6d,1,1,1,1,1,1,1,0xff02,0xff04,0x3c40,0xff08,0x3c48,0x2a72, +1,0x2a78,1,0xff04,0xff04,0xff04,0xff04,1,1,0xff04,0x3c50,0xffcc,0xffcc,0xfe12,1,0xffcc, +0xffcc,1,1,1,1,1,1,1,1,1,1,1,0x2a7f,1,1,1, +1,1,1,1,1,1,0x2a85,1,1,1,1,0x2a8b,1,1,1,1, +0x2a91,1,1,1,1,0x2a97,1,1,1,1,1,1,1,1,1,1, +1,1,0x2a9d,1,1,1,1,1,1,0xffb8,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,0x8c0,0x1a1f,1,1,1,1,1,1,1,0xfc00,1,1,1, +1,1,1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,1,1, 1,1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0x8c0,0x1a1f,1,1,1,1,1,1,1,0xfc00,1,1,1,1,1, -1,1,1,0xfe0e,1,0xfe12,0xfe12,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffb8,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1, +1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,1, -1,1,1,1,1,1,1,1,1,0xffc8,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, -0xffb8,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffb8,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8, -0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,0xffb8,1,1,1,1,1,1,1,0x8c4,0x1a25,0x8c8, -0x1a2b,0x8cc,0x1a31,0x8d0,0x1a37,0x8d4,0x1a3d,1,1,0x8d8,0x1a43,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a49,0x8e0,0x1a4f,0x8e4,0x8e8,0x1a55,0x1a5b,0x8ec,0x1a61, -0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, -0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xfe12,0xfe12,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe0e,1, -1,1,1,1,1,1,1,1,1,1,0xfe12,0xfe12,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, +0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, -0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1, -1,0xffb8,1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1,1, -1,1,1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc, -0xffcc,0xffd4,0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8, -0xffc8,0xffb8,1,0xffcc,0xffd2,0xffb8,0xffcc,0xffb8,0x1a66,0x1a6c,0x1a72,0x1a78,0x1a7f,0x1a85,0x1a8b,0x1a91, -0x1a99,0x1aa3,0x1aaa,0x1ab0,0x1ab6,0x1abc,0x1ac2,0x1ac8,0x1acf,0x1ad5,0x1ada,0x1ae0,0x1ae8,0x1af2,0x1afc,0x1b06, -0x1b0e,0x1b14,0x1b1a,0x1b20,0x1b29,0x1b33,0x1b3b,0x1b41,0x1b46,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70, -0x1b77,0x1b7d,0x1b82,0x1b88,0x1b8e,0x1b94,0x1b9c,0x1ba6,0x1bae,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0xdd8,0xde2, -0x1bd4,0x1bde,0x1be6,0x1bec,0x1bf2,0x1bf8,0x1bfe,0x1c04,0x1c0a,0x1c10,0x1c17,0x1c1d,0x1c22,0x1c28,0x1c2e,0x1c34, -0x1c3a,0x1c40,0x1c46,0x1c4c,0x1c54,0x1c5e,0x1c68,0x1c72,0x1c7c,0x1c86,0x1c90,0x1c9a,0x1ca3,0x1ca9,0x1caf,0x1cb5, -0x1cba,0x1cc0,0xdec,0xdf6,0x1cc8,0x1cd2,0x1cda,0x1ce0,0x1ce6,0x1cec,0xe00,0xe0a,0x1cf4,0x1cfe,0x1d08,0x1d12, -0x1d1c,0x1d26,0x1d2e,0x1d34,0x1d3a,0x1d40,0x1d46,0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c, -0x1d84,0x1d8e,0x1d98,0x1da2,0x1daa,0x1db0,0x1db7,0x1dbd,0x1dc2,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec, -0x1df3,0x1df9,0x1dff,0x1e05,0x1e0b,0x1e11,0x1e16,0x1e1c,0x1e22,0x1e28,0x1e2f,0x1e35,0x1e3b,0x1e41,0x1e46,0x1e4c, -0x1e52,0x1e58,1,0x1e5f,1,1,1,1,0xe14,0xe22,0x1e64,0x1e6a,0x1e72,0x1e7c,0x1e86,0x1e90, -0x1e9a,0x1ea4,0x1eae,0x1eb8,0x1ec2,0x1ecc,0x1ed6,0x1ee0,0x1eea,0x1ef4,0x1efe,0x1f08,0x1f12,0x1f1c,0x1f26,0x1f30, -0xe30,0xe3a,0x1f38,0x1f3e,0x1f44,0x1f4a,0x1f52,0x1f5c,0x1f66,0x1f70,0x1f7a,0x1f84,0x1f8e,0x1f98,0x1fa2,0x1fac, -0x1fb4,0x1fba,0x1fc0,0x1fc6,0xe44,0xe4e,0x1fcc,0x1fd2,0x1fda,0x1fe4,0x1fee,0x1ff8,0x2002,0x200c,0x2016,0x2020, -0x202a,0x2034,0x203e,0x2048,0x2052,0x205c,0x2066,0x2070,0x207a,0x2084,0x208e,0x2098,0x20a0,0x20a6,0x20ac,0x20b2, -0x20ba,0x20c4,0x20ce,0x20d8,0x20e2,0x20ec,0x20f6,0x2100,0x210a,0x2114,0x211c,0x2122,0x2129,0x212f,0x2134,0x213a, -0x2140,0x2146,1,1,1,1,1,1,0xe58,0xe6e,0xe86,0xe94,0xea2,0xeb0,0xebe,0xecc, -0xed8,0xeee,0xf06,0xf14,0xf22,0xf30,0xf3e,0xf4c,0xf58,0xf66,0x214f,0x2159,0x2163,0x216d,1,1, -0xf74,0xf82,0x2177,0x2181,0x218b,0x2195,1,1,0xf90,0xfa6,0xfbe,0xfcc,0xfda,0xfe8,0xff6,0x1004, -0x1010,0x1026,0x103e,0x104c,0x105a,0x1068,0x1076,0x1084,0x1090,0x10a2,0x219f,0x21a9,0x21b3,0x21bd,0x21c7,0x21d1, -0x10b4,0x10c6,0x21db,0x21e5,0x21ef,0x21f9,0x2203,0x220d,0x10d8,0x10e6,0x2217,0x2221,0x222b,0x2235,1,1, -0x10f4,0x1102,0x223f,0x2249,0x2253,0x225d,1,1,0x1110,0x1122,0x2267,0x2271,0x227b,0x2285,0x228f,0x2299, -1,0x1134,1,0x22a3,1,0x22ad,1,0x22b7,0x1146,0x115c,0x1174,0x1182,0x1190,0x119e,0x11ac,0x11ba, -0x11c6,0x11dc,0x11f4,0x1202,0x1210,0x121e,0x122c,0x123a,0x1246,0x3b8e,0x22bf,0x3b96,0x1250,0x3b9e,0x22c5,0x3ba6, -0x22cb,0x3bae,0x22d1,0x3bb6,0x125a,0x3bbe,1,1,0x22d8,0x22e2,0x22f1,0x2301,0x2311,0x2321,0x2331,0x2341, -0x234c,0x2356,0x2365,0x2375,0x2385,0x2395,0x23a5,0x23b5,0x23c0,0x23ca,0x23d9,0x23e9,0x23f9,0x2409,0x2419,0x2429, -0x2434,0x243e,0x244d,0x245d,0x246d,0x247d,0x248d,0x249d,0x24a8,0x24b2,0x24c1,0x24d1,0x24e1,0x24f1,0x2501,0x2511, -0x251c,0x2526,0x2535,0x2545,0x2555,0x2565,0x2575,0x2585,0x258f,0x2595,0x259d,0x25a4,0x25ad,1,0x1264,0x25b7, -0x25bf,0x25c5,0x25cb,0x3bc6,0x25d0,1,0x2aa2,0x8f0,1,0x25d7,0x25df,0x25e6,0x25ef,1,0x126e,0x25f9, -0x2601,0x3bce,0x2607,0x3bd6,0x260c,0x2613,0x2619,0x261f,0x2625,0x262b,0x2633,0x3be0,1,1,0x263b,0x2643, -0x264b,0x2651,0x2657,0x3bea,1,0x265d,0x2663,0x2669,0x266f,0x2675,0x267d,0x3bf4,0x2685,0x268b,0x2691,0x2699, -0x26a1,0x26a7,0x26ad,0x3bfe,0x26b3,0x26b9,0x3c06,0x2aa7,1,1,0x26c1,0x26c8,0x26d1,1,0x1278,0x26db, -0x26e3,0x3c0e,0x26e9,0x3c16,0x26ee,0x2aab,0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc, -0xfe02,0xfe02,0xfe02,0xffcc,0xffcc,1,1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc, -0xffb8,0xffcc,0xfe02,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0x2aae,1,1,1,0x2ab2,0x3c1e, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1,0x26f5,0x26fb, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2701,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0x2707,0x270d,0x2713,0x914,1,0x918,1,0x91c,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0x920,0x2719,1,1,1,0x924,0x271f,1,0x928, -0x2725,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0x92c,0x272b,0x930,0x2731,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0x934,1,1,1,1,0x2737,1,0x938,0x273d,0x93c,1,0x2743,0x940,0x2749,1,1, -1,0x944,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x274f,0x948,0x2755,1,0x94c,0x950,1,1,1,1,1,1, -1,0x275b,0x2761,0x2767,0x276d,0x2773,0x954,0x958,0x2779,0x277f,0x95c,0x960,0x2785,0x278b,0x964,0x968, -0x96c,0x970,1,1,0x2791,0x2797,0x974,0x978,0x279d,0x27a3,0x97c,0x980,0x27a9,0x27af,1,1, -1,1,1,1,1,0x984,0x988,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1,0x998, -0x27b5,0x27bb,0x27c1,0x27c7,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1,1, -1,1,1,1,0x27cd,0x27d3,0x27d9,0x27df,1,1,1,1,1,1,0x27e5,0x27eb, -0x27f1,0x27f7,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0x2ab7,0x2abb,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x2abf,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, -1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0,0xffc0,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x9ac,1, -1,1,1,0x9b0,0x27fd,0x9b4,0x2803,0x9b8,0x2809,0x9bc,0x280f,0x9c0,0x2815,0x9c4,0x281b,0x9c8, -0x2821,0x9cc,0x2827,0x9d0,0x282d,0x9d4,0x2833,0x9d8,0x2839,0x9dc,0x283f,1,0x9e0,0x2845,0x9e4,0x284b, -0x9e8,0x2851,1,1,1,1,1,0x9ec,0x2857,0x285d,0x9f4,0x2863,0x2869,0x9fc,0x286f,0x2875, -0xa04,0x287b,0x2881,0xa0c,0x2887,0x288d,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x2893,1,1,1, -1,0xfc10,0xfc10,1,1,0xa14,0x2899,1,1,1,1,1,1,1,0xa18,1, -1,1,1,0xa1c,0x289f,0xa20,0x28a5,0xa24,0x28ab,0xa28,0x28b1,0xa2c,0x28b7,0xa30,0x28bd,0xa34, -0x28c3,0xa38,0x28c9,0xa3c,0x28cf,0xa40,0x28d5,0xa44,0x28db,0xa48,0x28e1,1,0xa4c,0x28e7,0xa50,0x28ed, -0xa54,0x28f3,1,1,1,1,1,0xa58,0x28f9,0x28ff,0xa60,0x2905,0x290b,0xa68,0x2911,0x2917, -0xa70,0x291d,0x2923,0xa78,0x2929,0x292f,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x2935,1,1,0x293b, -0x2941,0x2947,0x294d,1,1,0xa90,0x2953,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xffcc,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1, +1,1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, +0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1, -1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,1,0xffcc,0xffcc,0xffb8,1,1,0xffcc, -0xffcc,1,1,1,1,1,0xffcc,0xffcc,1,0xffcc,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1, -1,1,1,1,0x2ac5,0x2ac9,0x2acd,0x2ad1,0x2ad5,0x2ad9,0x2add,0x2ae1,0x2ae1,0x2ae5,0x2ae9,0x2aed, -0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,0x2b11,0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25,0x2b29,0x2b2d, -0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,0x2b51,0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65,0x2b69,0x2b6d, -0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,0x2b91,0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5,0x2ba9,0x2bad, -0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,0x2bd1,0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5,0x2be9,0x2bed, -0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,0x2c11,0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25,0x2c29,0x2c2d, -0x2b11,0x2c31,0x2c35,0x2c39,0x2c3d,0x2c41,0x2c45,0x2c49,0x2c4d,0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61,0x2c65,0x2c69, -0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,0x2c8d,0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1,0x2ca5,0x2ca9, -0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,0x2ccd,0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1,0x2ce5,0x2ce9, -0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,0x2d0d,0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21,0x2d25,0x2d29, -0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2c79,0x2d41,0x2d45,0x2d49,0x2d4d,0x2d51,0x2d55,0x2d59,0x2d5d,0x2c39,0x2d61, -0x2d65,0x2d69,0x2d6d,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,0x2d85,0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99,0x2d9d,0x2da1, -0x2da5,0x2da9,0x2dad,0x2b11,0x2db1,0x2db5,0x2db9,0x2dbd,0x2dc1,0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5,0x2dd9,0x2ddd, -0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,0x2e01,0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15,0x2e19,0x2c41, -0x2e1d,0x2e21,0x2e25,0x2e29,0x2e2d,0x2e31,0x2e35,0x2e39,0x2e3d,0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51,0x2e55,0x2e59, -0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,0x2e7d,0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91,0x2e95,0x2e99, -0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,0x2ebd,0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1,0x2ed5,0x2ed9, -0x2edd,0x2ee1,1,1,0x2ee5,1,0x2ee9,1,1,0x2eed,0x2ef1,0x2ef5,0x2ef9,0x2efd,0x2f01,0x2f05, -0x2f09,0x2f0d,0x2f11,1,0x2f15,1,0x2f19,1,1,0x2f1d,0x2f21,1,1,1,0x2f25,0x2f29, -0x2f2d,0x2f31,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,0x2f4d,0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61,0x2f65,0x2f69, -0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,0x2f8d,0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1,0x2fa5,0x2fa9, -0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,0x2fcd,0x2fd1,0x2fd5,0x2d15,0x2fd9,0x2fdd,0x2fe1,0x2fe5, -0x2fe9,0x2fed,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x3001,0x3005,0x3009,0x300d,0x2f1d,0x3011,0x3015,0x3019,0x301d, -0x3021,0x3027,1,1,0x302b,0x302f,0x3033,0x3037,0x303b,0x303f,0x3043,0x3047,0x2f55,0x304b,0x304f,0x3053, -0x2ee5,0x3057,0x305b,0x305f,0x3063,0x3067,0x306b,0x306f,0x3073,0x3077,0x307b,0x307f,0x3083,0x2f79,0x3087,0x2f7d, -0x308b,0x308f,0x3093,0x3097,0x309b,0x2ee9,0x2b65,0x309f,0x30a3,0x30a7,0x2c7d,0x2dd9,0x30ab,0x30af,0x2f99,0x30b3, -0x2f9d,0x30b7,0x30bb,0x30bf,0x2ef1,0x30c3,0x30c7,0x30cb,0x30cf,0x30d3,0x2ef5,0x30d7,0x30db,0x30df,0x30e3,0x30e7, -0x30eb,0x2fd5,0x30ef,0x30f3,0x2d15,0x30f7,0x2fe5,0x30fb,0x30ff,0x3103,0x3107,0x310b,0x2ff9,0x310f,0x2f19,0x3113, -0x2ffd,0x2c31,0x3117,0x3001,0x311b,0x3009,0x311f,0x3123,0x3127,0x312b,0x312f,0x3011,0x2f09,0x3133,0x3015,0x3137, -0x3019,0x313b,0x2ae1,0x313f,0x3145,0x314b,0x3151,0x3155,0x3159,0x315d,0x3163,0x3169,0x316f,0x3173,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0x3176,0xfe34,0x317c, -1,1,1,1,1,1,1,1,1,1,0x3182,0x3188,0x3190,0x319a,0x31a2,0x31a8, -0x31ae,0x31b4,0x31ba,0x31c0,0x31c6,0x31cc,0x31d2,1,0x31d8,0x31de,0x31e4,0x31ea,0x31f0,1,0x31f6,1, -0x31fc,0x3202,1,0x3208,0x320e,1,0x3214,0x321a,0x3220,0x3226,0x322c,0x3232,0x3238,0x323e,0x3244,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xffb8,1,1, -0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xffb8,1,0xffcc,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xffcc,0xfe02,0xffb8,1,1,1,1,0xfe12,1,1,1,1,1,0xffcc,0xffb8,1, +1,0xfe12,1,1,1,1,1,1,1,1,1,1,0xffcc,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc,0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8, -0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xa94,0x2959,0xa9a, -0x2963,1,1,1,1,1,1,1,1,0xaa0,1,1,1,1,1,0x296d, -1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,0xfc0e,1, -1,1,1,1,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xffc8,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0xfc00,1,1,1,1,1,1,0x2977,0x2981, -1,0xaa6,0xaac,0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1, -0xfe12,1,1,1,1,1,1,1,1,1,0xfe0e,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe12,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,0xfe0e,0xfe0e,1,0xfc00,1, -1,1,1,1,1,1,1,0xab2,1,1,1,0x298b,0x2995,0xfe12,1,1, -1,1,1,1,1,1,1,0xfc00,1,1,1,1,1,1,1,1, -1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12,1, -1,1,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xffcc,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xfc00,1,1,1, -1,1,1,1,1,0xabe,0xfc00,0x299f,0x29a9,0xfc00,0x29b3,1,1,1,0xfe12,0xfe0e, +0xffbc,0xffcc,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc00, -1,1,1,1,1,1,1,1,0xad0,0xad6,0x29bd,0x29c7,1,1,1,0xfe12, -0xfe0e,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1,0xfe12, +1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +1,1,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0xfe12,0xfe0e,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,0xfe0e,1, -0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, +1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc, +0xffb8,1,1,1,1,1,0x8c4,0x1a25,0x8c8,0x1a2b,0x8cc,0x1a31,0x8d0,0x1a37,0x8d4,0x1a3d, +1,1,0x8d8,0x1a43,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xfe0e,0xfc00,1,1,1,1,0x8dc,0x1a49,0x8e0,0x1a4f,0x8e4, +0x8e8,0x1a55,0x1a5b,0x8ec,0x1a61,0xfe12,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffb8,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, +0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe02,1, +1,1,1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1,1, +1,1,1,0xfe12,0xfe12,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc, +0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1, +1,1,0xffb8,1,1,1,1,1,1,0xffcc,1,1,1,0xffcc,0xffcc,1, +1,1,1,1,1,0xffcc,0xffcc,0xffb8,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8, +0xffcc,0xffcc,0xffd4,0xffac,0xffb8,0xff94,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffd0,0xffc8,0xffc8,0xffb8,1, +0xffcc,0xffd2,0xffb8,0xffcc,0xffb8,0x1a66,0x1a6c,0x1a72,0x1a78,0x1a7f,0x1a85,0x1a8b,0x1a91,0x1a99,0x1aa3,0x1aaa, +0x1ab0,0x1ab6,0x1abc,0x1ac2,0x1ac8,0x1acf,0x1ad5,0x1ada,0x1ae0,0x1ae8,0x1af2,0x1afc,0x1b06,0x1b0e,0x1b14,0x1b1a, +0x1b20,0x1b29,0x1b33,0x1b3b,0x1b41,0x1b46,0x1b4c,0x1b52,0x1b58,0x1b5e,0x1b64,0x1b6a,0x1b70,0x1b77,0x1b7d,0x1b82, +0x1b88,0x1b8e,0x1b94,0x1b9c,0x1ba6,0x1bae,0x1bb4,0x1bba,0x1bc0,0x1bc6,0x1bcc,0xdd8,0xde2,0x1bd4,0x1bde,0x1be6, +0x1bec,0x1bf2,0x1bf8,0x1bfe,0x1c04,0x1c0a,0x1c10,0x1c17,0x1c1d,0x1c22,0x1c28,0x1c2e,0x1c34,0x1c3a,0x1c40,0x1c46, +0x1c4c,0x1c54,0x1c5e,0x1c68,0x1c72,0x1c7c,0x1c86,0x1c90,0x1c9a,0x1ca3,0x1ca9,0x1caf,0x1cb5,0x1cba,0x1cc0,0xdec, +0xdf6,0x1cc8,0x1cd2,0x1cda,0x1ce0,0x1ce6,0x1cec,0xe00,0xe0a,0x1cf4,0x1cfe,0x1d08,0x1d12,0x1d1c,0x1d26,0x1d2e, +0x1d34,0x1d3a,0x1d40,0x1d46,0x1d4c,0x1d52,0x1d58,0x1d5e,0x1d64,0x1d6a,0x1d70,0x1d76,0x1d7c,0x1d84,0x1d8e,0x1d98, +0x1da2,0x1daa,0x1db0,0x1db7,0x1dbd,0x1dc2,0x1dc8,0x1dce,0x1dd4,0x1dda,0x1de0,0x1de6,0x1dec,0x1df3,0x1df9,0x1dff, +0x1e05,0x1e0b,0x1e11,0x1e16,0x1e1c,0x1e22,0x1e28,0x1e2f,0x1e35,0x1e3b,0x1e41,0x1e46,0x1e4c,0x1e52,0x1e58,1, +0x1e5f,1,1,1,1,0xe14,0xe22,0x1e64,0x1e6a,0x1e72,0x1e7c,0x1e86,0x1e90,0x1e9a,0x1ea4,0x1eae, +0x1eb8,0x1ec2,0x1ecc,0x1ed6,0x1ee0,0x1eea,0x1ef4,0x1efe,0x1f08,0x1f12,0x1f1c,0x1f26,0x1f30,0xe30,0xe3a,0x1f38, +0x1f3e,0x1f44,0x1f4a,0x1f52,0x1f5c,0x1f66,0x1f70,0x1f7a,0x1f84,0x1f8e,0x1f98,0x1fa2,0x1fac,0x1fb4,0x1fba,0x1fc0, +0x1fc6,0xe44,0xe4e,0x1fcc,0x1fd2,0x1fda,0x1fe4,0x1fee,0x1ff8,0x2002,0x200c,0x2016,0x2020,0x202a,0x2034,0x203e, +0x2048,0x2052,0x205c,0x2066,0x2070,0x207a,0x2084,0x208e,0x2098,0x20a0,0x20a6,0x20ac,0x20b2,0x20ba,0x20c4,0x20ce, +0x20d8,0x20e2,0x20ec,0x20f6,0x2100,0x210a,0x2114,0x211c,0x2122,0x2129,0x212f,0x2134,0x213a,0x2140,0x2146,1, +1,1,1,1,1,0xe58,0xe6e,0xe86,0xe94,0xea2,0xeb0,0xebe,0xecc,0xed8,0xeee,0xf06, +0xf14,0xf22,0xf30,0xf3e,0xf4c,0xf58,0xf66,0x214f,0x2159,0x2163,0x216d,1,1,0xf74,0xf82,0x2177, +0x2181,0x218b,0x2195,1,1,0xf90,0xfa6,0xfbe,0xfcc,0xfda,0xfe8,0xff6,0x1004,0x1010,0x1026,0x103e, +0x104c,0x105a,0x1068,0x1076,0x1084,0x1090,0x10a2,0x219f,0x21a9,0x21b3,0x21bd,0x21c7,0x21d1,0x10b4,0x10c6,0x21db, +0x21e5,0x21ef,0x21f9,0x2203,0x220d,0x10d8,0x10e6,0x2217,0x2221,0x222b,0x2235,1,1,0x10f4,0x1102,0x223f, +0x2249,0x2253,0x225d,1,1,0x1110,0x1122,0x2267,0x2271,0x227b,0x2285,0x228f,0x2299,1,0x1134,1, +0x22a3,1,0x22ad,1,0x22b7,0x1146,0x115c,0x1174,0x1182,0x1190,0x119e,0x11ac,0x11ba,0x11c6,0x11dc,0x11f4, +0x1202,0x1210,0x121e,0x122c,0x123a,0x1246,0x3b8e,0x22bf,0x3b96,0x1250,0x3b9e,0x22c5,0x3ba6,0x22cb,0x3bae,0x22d1, +0x3bb6,0x125a,0x3bbe,1,1,0x22d8,0x22e2,0x22f1,0x2301,0x2311,0x2321,0x2331,0x2341,0x234c,0x2356,0x2365, +0x2375,0x2385,0x2395,0x23a5,0x23b5,0x23c0,0x23ca,0x23d9,0x23e9,0x23f9,0x2409,0x2419,0x2429,0x2434,0x243e,0x244d, +0x245d,0x246d,0x247d,0x248d,0x249d,0x24a8,0x24b2,0x24c1,0x24d1,0x24e1,0x24f1,0x2501,0x2511,0x251c,0x2526,0x2535, +0x2545,0x2555,0x2565,0x2575,0x2585,0x258f,0x2595,0x259d,0x25a4,0x25ad,1,0x1264,0x25b7,0x25bf,0x25c5,0x25cb, +0x3bc6,0x25d0,1,0x2aa2,0x8f0,1,0x25d7,0x25df,0x25e6,0x25ef,1,0x126e,0x25f9,0x2601,0x3bce,0x2607, +0x3bd6,0x260c,0x2613,0x2619,0x261f,0x2625,0x262b,0x2633,0x3be0,1,1,0x263b,0x2643,0x264b,0x2651,0x2657, +0x3bea,1,0x265d,0x2663,0x2669,0x266f,0x2675,0x267d,0x3bf4,0x2685,0x268b,0x2691,0x2699,0x26a1,0x26a7,0x26ad, +0x3bfe,0x26b3,0x26b9,0x3c06,0x2aa7,1,1,0x26c1,0x26c8,0x26d1,1,0x1278,0x26db,0x26e3,0x3c0e,0x26e9, +0x3c16,0x26ee,0x2aab,0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02, +0xffcc,0xffcc,1,1,1,1,0xffcc,1,1,1,0xfe02,0xfe02,0xffcc,0xffb8,0xffcc,0xfe02, +0xfe02,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0x2aae,1,1,1, +0x2ab2,0x3c1e,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x908,1,0x90c,1,0x910,1,1,1,1,1, +0x26f5,0x26fb,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0x2701,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x2707,0x270d,0x2713,0x914,1,0x918,1,0x91c,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x920,0x2719,1,1,1,0x924,0x271f, +1,0x928,0x2725,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x92c,0x272b,0x930,0x2731,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0x934,1,1,1,0x2737,1,0x938,0x273d,0x93c,1,0x2743,0x940,0x2749,1, +1,1,0x944,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0x274f,0x948,0x2755,1,0x94c,0x950,1,1,1,1,1, +1,1,0x275b,0x2761,0x2767,0x276d,0x2773,0x954,0x958,0x2779,0x277f,0x95c,0x960,0x2785,0x278b,0x964, +0x968,0x96c,0x970,1,1,0x2791,0x2797,0x974,0x978,0x279d,0x27a3,0x97c,0x980,0x27a9,0x27af,1, +1,1,1,1,1,1,0x984,0x988,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0x98c,1,1,1,1,1,0x990,0x994,1, +0x998,0x27b5,0x27bb,0x27c1,0x27c7,1,1,0x99c,0x9a0,0x9a4,0x9a8,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x27cd,0x27d3,0x27d9,0x27df,1, +1,1,1,1,1,0x27e5,0x27eb,0x27f1,0x27f7,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,0x2ab7,0x2abb,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +0x2abf,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x324a,0x3254, -0x3268,0x3280,0x3298,0x32b0,0x32c8,0xffb0,0xffb0,0xfe02,0xfe02,0xfe02,1,1,1,0xffc4,0xffb0,0xffb0, -0xffb0,0xffb0,0xffb0,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8, -0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1, -1,1,1,0x32d6,0x32e0,0x32f4,0x330c,0x3324,0x333c,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,1,1,1, +0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1, +1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, 0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, -0xffcc,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,0xffcc, -0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1, -1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,0x334b,0x334f,0x3353,0x3357,0x335d,0x2f3d,0x3361,0x3365,0x3369,0x336d,0x2f41,0x3371, -0x3375,0x3379,0x2f45,0x337f,0x3383,0x3387,0x338b,0x3391,0x3395,0x3399,0x339d,0x33a3,0x33a7,0x33ab,0x33af,0x302f, -0x33b3,0x33b9,0x33bd,0x33c1,0x33c5,0x33c9,0x33cd,0x33d1,0x33d5,0x3043,0x2f49,0x2f4d,0x3047,0x33d9,0x33dd,0x2c49, -0x33e1,0x2f51,0x33e5,0x33e9,0x33ed,0x33f1,0x33f1,0x33f1,0x33f5,0x33fb,0x33ff,0x3403,0x3407,0x340d,0x3411,0x3415, -0x3419,0x341d,0x3421,0x3425,0x3429,0x342d,0x3431,0x3435,0x3439,0x343d,0x343d,0x304f,0x3441,0x3445,0x3449,0x344d, -0x2f59,0x3451,0x3455,0x3459,0x2ead,0x345d,0x3461,0x3465,0x3469,0x346d,0x3471,0x3475,0x3479,0x347d,0x3483,0x3487, -0x348b,0x348f,0x3493,0x3497,0x349b,0x34a1,0x34a7,0x34ab,0x34af,0x34b3,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34c7, -0x34cb,0x34d1,0x34d5,0x2c39,0x34d9,0x34dd,0x34e3,0x34e7,0x34eb,0x34ef,0x34f3,0x34f7,0x2f6d,0x34fb,0x34ff,0x3503, -0x3509,0x350d,0x3513,0x3517,0x351b,0x351f,0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3545,0x3549, -0x354d,0x3551,0x2b61,0x3555,0x355b,0x355f,0x355f,0x3565,0x3569,0x3569,0x356d,0x3571,0x3577,0x357d,0x3581,0x3585, -0x3589,0x358d,0x3591,0x3595,0x3599,0x359d,0x35a1,0x2f71,0x35a5,0x35ab,0x35af,0x35b3,0x307f,0x35b3,0x35b7,0x2f79, -0x35bb,0x35bf,0x35c3,0x35c7,0x2f7d,0x2af5,0x35cb,0x35cf,0x35d3,0x35d7,0x35db,0x35df,0x35e3,0x35e9,0x35ed,0x35f1, -0x35f5,0x35f9,0x35fd,0x3603,0x3607,0x360b,0x360f,0x3613,0x3617,0x361b,0x361f,0x3623,0x2f81,0x3627,0x362b,0x3631, -0x3635,0x3639,0x363d,0x2f89,0x3641,0x3645,0x3649,0x364d,0x3651,0x3655,0x3659,0x365d,0x2b65,0x309f,0x3661,0x3665, -0x3669,0x366d,0x3673,0x3677,0x367b,0x367f,0x2f8d,0x3683,0x3689,0x368d,0x3691,0x3151,0x3695,0x3699,0x369d,0x36a1, -0x36a5,0x36ab,0x36af,0x36b3,0x36b7,0x36bd,0x36c1,0x36c5,0x36c9,0x2c7d,0x36cd,0x36d1,0x36d7,0x36dd,0x36e3,0x36e7, -0x36ed,0x36f1,0x36f5,0x36f9,0x36fd,0x2f91,0x2dd9,0x3701,0x3705,0x3709,0x370d,0x3713,0x3717,0x371b,0x371f,0x30af, -0x3723,0x3727,0x372d,0x3731,0x3735,0x373b,0x3741,0x3745,0x30b3,0x3749,0x374d,0x3751,0x3755,0x3759,0x375d,0x3761, -0x3767,0x376b,0x3771,0x3775,0x377b,0x30bb,0x377f,0x3783,0x3789,0x378d,0x3791,0x3797,0x379d,0x37a1,0x37a5,0x37a9, -0x37ad,0x37ad,0x37b1,0x37b5,0x30c3,0x37b9,0x37bd,0x37c1,0x37c5,0x37c9,0x37cf,0x37d3,0x2c45,0x37d9,0x37df,0x37e3, -0x37e9,0x37ef,0x37f5,0x37f9,0x30db,0x37fd,0x3803,0x3809,0x380f,0x3815,0x3819,0x3819,0x30df,0x3159,0x381d,0x3821, -0x3825,0x3829,0x382f,0x2bad,0x30e7,0x3833,0x3837,0x2fbd,0x383d,0x3843,0x2f05,0x3849,0x384d,0x2fcd,0x3851,0x3855, -0x3859,0x385f,0x385f,0x3865,0x3869,0x386d,0x3873,0x3877,0x387b,0x387f,0x3885,0x3889,0x388d,0x3891,0x3895,0x3899, -0x389f,0x38a3,0x38a7,0x38ab,0x38af,0x38b3,0x38b7,0x38bd,0x38c3,0x38c7,0x38cd,0x38d1,0x38d7,0x38db,0x2fe5,0x38df, -0x38e5,0x38eb,0x38ef,0x38f5,0x38f9,0x38ff,0x3903,0x3907,0x390b,0x390f,0x3913,0x3917,0x391d,0x3923,0x3929,0x3565, -0x392f,0x3933,0x3937,0x393b,0x393f,0x3943,0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x2c8d,0x3961,0x3965,0x3969, -0x396d,0x3971,0x3975,0x2ff1,0x3979,0x397d,0x3981,0x3985,0x3989,0x398f,0x3995,0x399b,0x399f,0x39a3,0x39a7,0x39ab, -0x39b1,0x39b5,0x39bb,0x39bf,0x39c3,0x39c9,0x39cf,0x39d3,0x2b99,0x39d7,0x39db,0x39df,0x39e3,0x39e7,0x39eb,0x3103, -0x39ef,0x39f3,0x39f7,0x39fb,0x39ff,0x3a03,0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a19,0x3a1d,0x3a21,0x3a25,0x3a29,0x3a2d, -0x3a33,0x3a39,0x3a3d,0x3a41,0x3117,0x311b,0x3a45,0x3a49,0x3a4f,0x3a53,0x3a57,0x3a5b,0x3a5f,0x3a65,0x3a6b,0x3a6f, -0x3a73,0x3a77,0x3a7d,0x311f,0x3a81,0x3a87,0x3a8d,0x3a91,0x3a95,0x3a99,0x3a9f,0x3aa3,0x3aa7,0x3aab,0x3aaf,0x3ab3, -0x3ab7,0x3abb,0x3ac1,0x3ac5,0x3ac9,0x3acd,0x3ad3,0x3ad7,0x3adb,0x3adf,0x3ae3,0x3ae9,0x3aef,0x3af3,0x3af7,0x3afb, -0x3b01,0x3b05,0x3137,0x3137,0x3b0b,0x3b0f,0x3b15,0x3b19,0x3b1d,0x3b21,0x3b25,0x3b29,0x3b2d,0x3b31,0x313b,0x3b37, -0x3b3b,0x3b3f,0x3b43,0x3b47,0x3b4b,0x3b51,0x3b55,0x3b5b,0x3b61,0x3b67,0x3b6b,0x3b6f,0x3b73,0x3b77,0x3b7b,0x3b7f, -0x3b83,0x3b87,1,1,2,2,2,2,2,2,2,2,2,2,2,2, -2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, -0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1,1,1,1,1,1, -1,1,1,1,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00, -0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,0xfe00,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0xadc,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0xffb4,0xffc8,0xffd0,0xffbc,0xffc0, +0xffc0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,0x9ac,1,1,1,1,0x9b0,0x27fd,0x9b4,0x2803,0x9b8,0x2809,0x9bc,0x280f,0x9c0,0x2815, +0x9c4,0x281b,0x9c8,0x2821,0x9cc,0x2827,0x9d0,0x282d,0x9d4,0x2833,0x9d8,0x2839,0x9dc,0x283f,1,0x9e0, +0x2845,0x9e4,0x284b,0x9e8,0x2851,1,1,1,1,1,0x9ec,0x2857,0x285d,0x9f4,0x2863,0x2869, +0x9fc,0x286f,0x2875,0xa04,0x287b,0x2881,0xa0c,0x2887,0x288d,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,0x2893,1,1, +1,1,0xfc10,0xfc10,1,1,0xa14,0x2899,1,1,1,1,1,1,1,0xa18, +1,1,1,1,0xa1c,0x289f,0xa20,0x28a5,0xa24,0x28ab,0xa28,0x28b1,0xa2c,0x28b7,0xa30,0x28bd, +0xa34,0x28c3,0xa38,0x28c9,0xa3c,0x28cf,0xa40,0x28d5,0xa44,0x28db,0xa48,0x28e1,1,0xa4c,0x28e7,0xa50, +0x28ed,0xa54,0x28f3,1,1,1,1,1,0xa58,0x28f9,0x28ff,0xa60,0x2905,0x290b,0xa68,0x2911, +0x2917,0xa70,0x291d,0x2923,0xa78,0x2929,0x292f,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,0xa80,0xa84,0xa88,0xa8c,1,0x2935,1,1, +0x293b,0x2941,0x2947,0x294d,1,1,0xa90,0x2953,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0xffcc,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,0x1283,0x1283,0x1283,0x1283, -0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, -0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x3c54,1,0x3c54,0x3c54, -0x3c54,0x3c54,0x3c54,0x3c54,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x3c54,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,0x3c54,1,1,1,1,0x3c54,1,1,1, -0x3c54,1,0x3c54,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,0x3b87,1,1,1,1,1 +0xfe12,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffb8,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfe12, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xffcc, +1,0xffcc,0xffcc,0xffb8,1,1,0xffcc,0xffcc,1,1,1,1,1,0xffcc,0xffcc,1, +0xffcc,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0xfe12,1,1,1,1,1,1,1,1,1,0xadc, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0xadc,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283, +0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,0x1283,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x3c56,1,0x3c56,0x3c56,0x3c56, +0x3c56,0x3c56,0x3c56,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0x3c56,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,0x3c56,1,1,1,1,0x3c56, +1,1,1,0x3c56,1,0x3c56,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0x3b87,1,0x2ac5,0x2ac9,0x2acd,0x2ad1,0x2ad5,0x2ad9,0x2add,0x2ae1,0x2ae1,0x2ae5, +0x2ae9,0x2aed,0x2af1,0x2af5,0x2af9,0x2afd,0x2b01,0x2b05,0x2b09,0x2b0d,0x2b11,0x2b15,0x2b19,0x2b1d,0x2b21,0x2b25, +0x2b29,0x2b2d,0x2b31,0x2b35,0x2b39,0x2b3d,0x2b41,0x2b45,0x2b49,0x2b4d,0x2b51,0x2b55,0x2b59,0x2b5d,0x2b61,0x2b65, +0x2b69,0x2b6d,0x2b71,0x2b75,0x2b79,0x2b7d,0x2b81,0x2b85,0x2b89,0x2b8d,0x2b91,0x2b95,0x2b99,0x2b9d,0x2ba1,0x2ba5, +0x2ba9,0x2bad,0x2bb1,0x2bb5,0x2bb9,0x2bbd,0x2bc1,0x2bc5,0x2bc9,0x2bcd,0x2bd1,0x2bd5,0x2bd9,0x2bdd,0x2be1,0x2be5, +0x2be9,0x2bed,0x2bf1,0x2bf5,0x2bf9,0x2bfd,0x2c01,0x2c05,0x2c09,0x2c0d,0x2c11,0x2c15,0x2c19,0x2c1d,0x2c21,0x2c25, +0x2c29,0x2c2d,0x2b11,0x2c31,0x2c35,0x2c39,0x2c3d,0x2c41,0x2c45,0x2c49,0x2c4d,0x2c51,0x2c55,0x2c59,0x2c5d,0x2c61, +0x2c65,0x2c69,0x2c6d,0x2c71,0x2c75,0x2c79,0x2c7d,0x2c81,0x2c85,0x2c89,0x2c8d,0x2c91,0x2c95,0x2c99,0x2c9d,0x2ca1, +0x2ca5,0x2ca9,0x2cad,0x2cb1,0x2cb5,0x2cb9,0x2cbd,0x2cc1,0x2cc5,0x2cc9,0x2ccd,0x2cd1,0x2cd5,0x2cd9,0x2cdd,0x2ce1, +0x2ce5,0x2ce9,0x2ced,0x2cf1,0x2cf5,0x2cf9,0x2cfd,0x2d01,0x2d05,0x2d09,0x2d0d,0x2d11,0x2d15,0x2d19,0x2d1d,0x2d21, +0x2d25,0x2d29,0x2d2d,0x2d31,0x2d35,0x2d39,0x2d3d,0x2c79,0x2d41,0x2d45,0x2d49,0x2d4d,0x2d51,0x2d55,0x2d59,0x2d5d, +0x2c39,0x2d61,0x2d65,0x2d69,0x2d6d,0x2d71,0x2d75,0x2d79,0x2d7d,0x2d81,0x2d85,0x2d89,0x2d8d,0x2d91,0x2d95,0x2d99, +0x2d9d,0x2da1,0x2da5,0x2da9,0x2dad,0x2b11,0x2db1,0x2db5,0x2db9,0x2dbd,0x2dc1,0x2dc5,0x2dc9,0x2dcd,0x2dd1,0x2dd5, +0x2dd9,0x2ddd,0x2de1,0x2de5,0x2de9,0x2ded,0x2df1,0x2df5,0x2df9,0x2dfd,0x2e01,0x2e05,0x2e09,0x2e0d,0x2e11,0x2e15, +0x2e19,0x2c41,0x2e1d,0x2e21,0x2e25,0x2e29,0x2e2d,0x2e31,0x2e35,0x2e39,0x2e3d,0x2e41,0x2e45,0x2e49,0x2e4d,0x2e51, +0x2e55,0x2e59,0x2e5d,0x2e61,0x2e65,0x2e69,0x2e6d,0x2e71,0x2e75,0x2e79,0x2e7d,0x2e81,0x2e85,0x2e89,0x2e8d,0x2e91, +0x2e95,0x2e99,0x2e9d,0x2ea1,0x2ea5,0x2ea9,0x2ead,0x2eb1,0x2eb5,0x2eb9,0x2ebd,0x2ec1,0x2ec5,0x2ec9,0x2ecd,0x2ed1, +0x2ed5,0x2ed9,0x2edd,0x2ee1,1,1,0x2ee5,1,0x2ee9,1,1,0x2eed,0x2ef1,0x2ef5,0x2ef9,0x2efd, +0x2f01,0x2f05,0x2f09,0x2f0d,0x2f11,1,0x2f15,1,0x2f19,1,1,0x2f1d,0x2f21,1,1,1, +0x2f25,0x2f29,0x2f2d,0x2f31,0x2f35,0x2f39,0x2f3d,0x2f41,0x2f45,0x2f49,0x2f4d,0x2f51,0x2f55,0x2f59,0x2f5d,0x2f61, +0x2f65,0x2f69,0x2f6d,0x2f71,0x2f75,0x2f79,0x2f7d,0x2f81,0x2f85,0x2f89,0x2f8d,0x2f91,0x2f95,0x2f99,0x2f9d,0x2fa1, +0x2fa5,0x2fa9,0x2fad,0x2fb1,0x2fb5,0x2fb9,0x2fbd,0x2fc1,0x2fc5,0x2fc9,0x2fcd,0x2fd1,0x2fd5,0x2d15,0x2fd9,0x2fdd, +0x2fe1,0x2fe5,0x2fe9,0x2fed,0x2fed,0x2ff1,0x2ff5,0x2ff9,0x2ffd,0x3001,0x3005,0x3009,0x300d,0x2f1d,0x3011,0x3015, +0x3019,0x301d,0x3021,0x3027,1,1,0x302b,0x302f,0x3033,0x3037,0x303b,0x303f,0x3043,0x3047,0x2f55,0x304b, +0x304f,0x3053,0x2ee5,0x3057,0x305b,0x305f,0x3063,0x3067,0x306b,0x306f,0x3073,0x3077,0x307b,0x307f,0x3083,0x2f79, +0x3087,0x2f7d,0x308b,0x308f,0x3093,0x3097,0x309b,0x2ee9,0x2b65,0x309f,0x30a3,0x30a7,0x2c7d,0x2dd9,0x30ab,0x30af, +0x2f99,0x30b3,0x2f9d,0x30b7,0x30bb,0x30bf,0x2ef1,0x30c3,0x30c7,0x30cb,0x30cf,0x30d3,0x2ef5,0x30d7,0x30db,0x30df, +0x30e3,0x30e7,0x30eb,0x2fd5,0x30ef,0x30f3,0x2d15,0x30f7,0x2fe5,0x30fb,0x30ff,0x3103,0x3107,0x310b,0x2ff9,0x310f, +0x2f19,0x3113,0x2ffd,0x2c31,0x3117,0x3001,0x311b,0x3009,0x311f,0x3123,0x3127,0x312b,0x312f,0x3011,0x2f09,0x3133, +0x3015,0x3137,0x3019,0x313b,0x2ae1,0x313f,0x3145,0x314b,0x3151,0x3155,0x3159,0x315d,0x3163,0x3169,0x316f,0x3173, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0x3176,0xfe34,0x317c,1,1,1,1,1,1,1, +1,1,1,0x3182,0x3188,0x3190,0x319a,0x31a2,0x31a8,0x31ae,0x31b4,0x31ba,0x31c0,0x31c6,0x31cc,0x31d2, +1,0x31d8,0x31de,0x31e4,0x31ea,0x31f0,1,0x31f6,1,0x31fc,0x3202,1,0x3208,0x320e,1,0x3214, +0x321a,0x3220,0x3226,0x322c,0x3232,0x3238,0x323e,0x3244,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xffb8,1,0xffcc,1,1,1,1, +1,1,1,1,0xffcc,0xfe02,0xffb8,1,1,1,1,0xfe12,1,1,1,1, +0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,0xffb8,0xffb8,0xffcc,0xffcc, +0xffcc,0xffb8,0xffcc,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,0xa94, +0x2959,0xa9a,0x2963,1,1,1,1,1,0xaa0,1,1,1,1,1,0x296d,1, +1,1,1,1,1,1,1,1,0xfe12,0xfc0e,1,1,1,1,1,1, +1,0xfc00,1,1,1,1,1,1,0x2977,0x2981,1,0xaa6,0xaac,0xfe12,0xfe12,1, +1,1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1, +1,1,1,1,0xfe0e,1,1,1,1,1,0xfe12,0xfe0e,1,1,1,1, +1,1,1,1,1,0xfe0e,0xfe12,1,1,1,1,1,1,1,1,1, +1,1,0xfe0e,0xfe0e,1,0xfc00,1,1,1,1,1,1,1,0xab2,1,1, +1,0x298b,0x2995,0xfe12,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,1,1,1,0xfe12,1,1,1,0xfe0e,1,1,1,1,1,1,1, +1,1,0xfc00,1,1,1,1,1,1,1,1,0xabe,0xfc00,0x299f,0x29a9,0xfc00, +0x29b3,1,1,0xfe12,0xfe0e,1,1,1,1,1,1,1,1,1,1,1, +1,0xad0,0xad6,0x29bd,0x29c7,1,1,1,0xfe12,0xfe0e,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0xfe12,0xfe0e,1,1,1,1,1, +1,1,1,0xfe02,0xfe02,0xfe02,0xfe02,0xfe02,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0xfe02,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,0x324a,0x3254,0x3268,0x3280,0x3298,0x32b0,0x32c8,0xffb0,0xffb0,0xfe02,0xfe02, +0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,1,1,1,1,1,1,1,1, +0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,1,1, +1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1,1, +1,1,1,1,1,1,1,0x32d6,0x32e0,0x32f4,0x330c,0x3324,0x333c,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, +0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,0xffcc, +0xffcc,0xffcc,0xffcc,0xffcc,1,0xffcc,0xffcc,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,1,1,1, +1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,1,1,1,1,1, +1,1,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xffcc,0xfe0e,1,1,1,1,1,0x334b,0x334f, +0x3353,0x3357,0x335d,0x2f3d,0x3361,0x3365,0x3369,0x336d,0x2f41,0x3371,0x3375,0x3379,0x2f45,0x337f,0x3383,0x3387, +0x338b,0x3391,0x3395,0x3399,0x339d,0x33a3,0x33a7,0x33ab,0x33af,0x302f,0x33b3,0x33b9,0x33bd,0x33c1,0x33c5,0x33c9, +0x33cd,0x33d1,0x33d5,0x3043,0x2f49,0x2f4d,0x3047,0x33d9,0x33dd,0x2c49,0x33e1,0x2f51,0x33e5,0x33e9,0x33ed,0x33f1, +0x33f1,0x33f1,0x33f5,0x33fb,0x33ff,0x3403,0x3407,0x340d,0x3411,0x3415,0x3419,0x341d,0x3421,0x3425,0x3429,0x342d, +0x3431,0x3435,0x3439,0x343d,0x343d,0x304f,0x3441,0x3445,0x3449,0x344d,0x2f59,0x3451,0x3455,0x3459,0x2ead,0x345d, +0x3461,0x3465,0x3469,0x346d,0x3471,0x3475,0x3479,0x347d,0x3483,0x3487,0x348b,0x348f,0x3493,0x3497,0x349b,0x34a1, +0x34a7,0x34ab,0x34af,0x34b3,0x34b7,0x34bb,0x34bf,0x34c3,0x34c7,0x34c7,0x34cb,0x34d1,0x34d5,0x2c39,0x34d9,0x34dd, +0x34e3,0x34e7,0x34eb,0x34ef,0x34f3,0x34f7,0x2f6d,0x34fb,0x34ff,0x3503,0x3509,0x350d,0x3513,0x3517,0x351b,0x351f, +0x3523,0x3527,0x352b,0x352f,0x3533,0x3537,0x353b,0x353f,0x3545,0x3549,0x354d,0x3551,0x2b61,0x3555,0x355b,0x355f, +0x355f,0x3565,0x3569,0x3569,0x356d,0x3571,0x3577,0x357d,0x3581,0x3585,0x3589,0x358d,0x3591,0x3595,0x3599,0x359d, +0x35a1,0x2f71,0x35a5,0x35ab,0x35af,0x35b3,0x307f,0x35b3,0x35b7,0x2f79,0x35bb,0x35bf,0x35c3,0x35c7,0x2f7d,0x2af5, +0x35cb,0x35cf,0x35d3,0x35d7,0x35db,0x35df,0x35e3,0x35e9,0x35ed,0x35f1,0x35f5,0x35f9,0x35fd,0x3603,0x3607,0x360b, +0x360f,0x3613,0x3617,0x361b,0x361f,0x3623,0x2f81,0x3627,0x362b,0x3631,0x3635,0x3639,0x363d,0x2f89,0x3641,0x3645, +0x3649,0x364d,0x3651,0x3655,0x3659,0x365d,0x2b65,0x309f,0x3661,0x3665,0x3669,0x366d,0x3673,0x3677,0x367b,0x367f, +0x2f8d,0x3683,0x3689,0x368d,0x3691,0x3151,0x3695,0x3699,0x369d,0x36a1,0x36a5,0x36ab,0x36af,0x36b3,0x36b7,0x36bd, +0x36c1,0x36c5,0x36c9,0x2c7d,0x36cd,0x36d1,0x36d7,0x36dd,0x36e3,0x36e7,0x36ed,0x36f1,0x36f5,0x36f9,0x36fd,0x2f91, +0x2dd9,0x3701,0x3705,0x3709,0x370d,0x3713,0x3717,0x371b,0x371f,0x30af,0x3723,0x3727,0x372d,0x3731,0x3735,0x373b, +0x3741,0x3745,0x30b3,0x3749,0x374d,0x3751,0x3755,0x3759,0x375d,0x3761,0x3767,0x376b,0x3771,0x3775,0x377b,0x30bb, +0x377f,0x3783,0x3789,0x378d,0x3791,0x3797,0x379d,0x37a1,0x37a5,0x37a9,0x37ad,0x37ad,0x37b1,0x37b5,0x30c3,0x37b9, +0x37bd,0x37c1,0x37c5,0x37c9,0x37cf,0x37d3,0x2c45,0x37d9,0x37df,0x37e3,0x37e9,0x37ef,0x37f5,0x37f9,0x30db,0x37fd, +0x3803,0x3809,0x380f,0x3815,0x3819,0x3819,0x30df,0x3159,0x381d,0x3821,0x3825,0x3829,0x382f,0x2bad,0x30e7,0x3833, +0x3837,0x2fbd,0x383d,0x3843,0x2f05,0x3849,0x384d,0x2fcd,0x3851,0x3855,0x3859,0x385f,0x385f,0x3865,0x3869,0x386d, +0x3873,0x3877,0x387b,0x387f,0x3885,0x3889,0x388d,0x3891,0x3895,0x3899,0x389f,0x38a3,0x38a7,0x38ab,0x38af,0x38b3, +0x38b7,0x38bd,0x38c3,0x38c7,0x38cd,0x38d1,0x38d7,0x38db,0x2fe5,0x38df,0x38e5,0x38eb,0x38ef,0x38f5,0x38f9,0x38ff, +0x3903,0x3907,0x390b,0x390f,0x3913,0x3917,0x391d,0x3923,0x3929,0x3565,0x392f,0x3933,0x3937,0x393b,0x393f,0x3943, +0x3947,0x394b,0x394f,0x3953,0x3957,0x395b,0x2c8d,0x3961,0x3965,0x3969,0x396d,0x3971,0x3975,0x2ff1,0x3979,0x397d, +0x3981,0x3985,0x3989,0x398f,0x3995,0x399b,0x399f,0x39a3,0x39a7,0x39ab,0x39b1,0x39b5,0x39bb,0x39bf,0x39c3,0x39c9, +0x39cf,0x39d3,0x2b99,0x39d7,0x39db,0x39df,0x39e3,0x39e7,0x39eb,0x3103,0x39ef,0x39f3,0x39f7,0x39fb,0x39ff,0x3a03, +0x3a07,0x3a0b,0x3a0f,0x3a13,0x3a19,0x3a1d,0x3a21,0x3a25,0x3a29,0x3a2d,0x3a33,0x3a39,0x3a3d,0x3a41,0x3117,0x311b, +0x3a45,0x3a49,0x3a4f,0x3a53,0x3a57,0x3a5b,0x3a5f,0x3a65,0x3a6b,0x3a6f,0x3a73,0x3a77,0x3a7d,0x311f,0x3a81,0x3a87, +0x3a8d,0x3a91,0x3a95,0x3a99,0x3a9f,0x3aa3,0x3aa7,0x3aab,0x3aaf,0x3ab3,0x3ab7,0x3abb,0x3ac1,0x3ac5,0x3ac9,0x3acd, +0x3ad3,0x3ad7,0x3adb,0x3adf,0x3ae3,0x3ae9,0x3aef,0x3af3,0x3af7,0x3afb,0x3b01,0x3b05,0x3137,0x3137,0x3b0b,0x3b0f, +0x3b15,0x3b19,0x3b1d,0x3b21,0x3b25,0x3b29,0x3b2d,0x3b31,0x313b,0x3b37,0x3b3b,0x3b3f,0x3b43,0x3b47,0x3b4b,0x3b51, +0x3b55,0x3b5b,0x3b61,0x3b67,0x3b6b,0x3b6f,0x3b73,0x3b77,0x3b7b,0x3b7f,0x3b83,0x3b87,1,1 +}; + +static const UCPTrie norm2_nfc_data_trie={ + norm2_nfc_data_trieIndex, + { norm2_nfc_data_trieData }, + 1690, 7822, + 0x2fc00, 0x30, + 0, 0, + 0, 0, + 0xc4, 0x226, + 0x1, }; static const uint16_t norm2_nfc_data_extraData[7724]={ @@ -1151,19 +1136,4 @@ static const uint8_t norm2_nfc_data_smallFCD[256]={ 0,0,0,0,0,0,0,0,0,0,0,7,0,0,2,0 }; -static const UTrie2 norm2_nfc_data_trie={ - norm2_nfc_data_trieIndex, - norm2_nfc_data_trieIndex+2728, - NULL, - 2728, - 7248, - 0x188, - 0xb24, - 0x1, - 0x1, - 0x30000, - 0x26f4, - NULL, 0, FALSE, FALSE, 0, NULL -}; - #endif // INCLUDED_FROM_NORMALIZER2_CPP diff --git a/deps/icu-small/source/common/normalizer2.cpp b/deps/icu-small/source/common/normalizer2.cpp index 133770cbc4d9aa..ca5d3aba1a1874 100644 --- a/deps/icu-small/source/common/normalizer2.cpp +++ b/deps/icu-small/source/common/normalizer2.cpp @@ -34,9 +34,11 @@ using icu::Normalizer2Impl; +#if NORM2_HARDCODE_NFC_DATA // NFC/NFD data machine-generated by gennorm2 --csource #define INCLUDED_FROM_NORMALIZER2_CPP #include "norm2_nfc_data.h" +#endif U_NAMESPACE_BEGIN @@ -176,6 +178,36 @@ FCDNormalizer2::~FCDNormalizer2() {} // instance cache ---------------------------------------------------------- *** +U_CDECL_BEGIN +static UBool U_CALLCONV uprv_normalizer2_cleanup(); +U_CDECL_END + +static Normalizer2 *noopSingleton; +static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; + +static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { + return; + } + noopSingleton=new NoopNormalizer2; + if(noopSingleton==NULL) { + errorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); +} + +const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { + if(U_FAILURE(errorCode)) { return NULL; } + umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode); + return noopSingleton; +} + +const Normalizer2Impl * +Normalizer2Factory::getImpl(const Normalizer2 *norm2) { + return &((Normalizer2WithImpl *)norm2)->impl; +} + Norm2AllModes::~Norm2AllModes() { delete impl; } @@ -195,6 +227,7 @@ Norm2AllModes::createInstance(Normalizer2Impl *impl, UErrorCode &errorCode) { return allModes; } +#if NORM2_HARDCODE_NFC_DATA Norm2AllModes * Norm2AllModes::createNFCInstance(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { @@ -210,48 +243,15 @@ Norm2AllModes::createNFCInstance(UErrorCode &errorCode) { return createInstance(impl, errorCode); } -U_CDECL_BEGIN -static UBool U_CALLCONV uprv_normalizer2_cleanup(); -U_CDECL_END - static Norm2AllModes *nfcSingleton; -static Normalizer2 *noopSingleton; static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER; -static icu::UInitOnce noopInitOnce = U_INITONCE_INITIALIZER; -// UInitOnce singleton initialization functions static void U_CALLCONV initNFCSingleton(UErrorCode &errorCode) { nfcSingleton=Norm2AllModes::createNFCInstance(errorCode); ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); } -static void U_CALLCONV initNoopSingleton(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - noopSingleton=new NoopNormalizer2; - if(noopSingleton==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - ucln_common_registerCleanup(UCLN_COMMON_NORMALIZER2, uprv_normalizer2_cleanup); -} - -U_CDECL_BEGIN - -static UBool U_CALLCONV uprv_normalizer2_cleanup() { - delete nfcSingleton; - nfcSingleton = NULL; - delete noopSingleton; - noopSingleton = NULL; - nfcInitOnce.reset(); - noopInitOnce.reset(); - return TRUE; -} - -U_CDECL_END - const Norm2AllModes * Norm2AllModes::getNFCInstance(UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return NULL; } @@ -281,23 +281,29 @@ const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) { return allModes!=NULL ? &allModes->fcc : NULL; } -const Normalizer2 *Normalizer2Factory::getNoopInstance(UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { return NULL; } - umtx_initOnce(noopInitOnce, &initNoopSingleton, errorCode); - return noopSingleton; -} - const Normalizer2Impl * Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) { const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode); return allModes!=NULL ? allModes->impl : NULL; } +#endif // NORM2_HARDCODE_NFC_DATA -const Normalizer2Impl * -Normalizer2Factory::getImpl(const Normalizer2 *norm2) { - return &((Normalizer2WithImpl *)norm2)->impl; +U_CDECL_BEGIN + +static UBool U_CALLCONV uprv_normalizer2_cleanup() { + delete noopSingleton; + noopSingleton = NULL; + noopInitOnce.reset(); +#if NORM2_HARDCODE_NFC_DATA + delete nfcSingleton; + nfcSingleton = NULL; + nfcInitOnce.reset(); +#endif + return TRUE; } +U_CDECL_END + U_NAMESPACE_END // C API ------------------------------------------------------------------- *** diff --git a/deps/icu-small/source/common/normalizer2impl.cpp b/deps/icu-small/source/common/normalizer2impl.cpp index 15b4a528934779..e7ae646c41ae6d 100644 --- a/deps/icu-small/source/common/normalizer2impl.cpp +++ b/deps/icu-small/source/common/normalizer2impl.cpp @@ -16,6 +16,8 @@ * created by: Markus W. Scherer */ +// #define UCPTRIE_DEBUG + #include "unicode/utypes.h" #if !UCONFIG_NO_NORMALIZATION @@ -24,7 +26,9 @@ #include "unicode/edits.h" #include "unicode/normalizer2.h" #include "unicode/stringoptions.h" +#include "unicode/ucptrie.h" #include "unicode/udata.h" +#include "unicode/umutablecptrie.h" #include "unicode/ustring.h" #include "unicode/utf16.h" #include "unicode/utf8.h" @@ -34,8 +38,8 @@ #include "normalizer2impl.h" #include "putilimp.h" #include "uassert.h" +#include "ucptrie_impl.h" #include "uset_imp.h" -#include "utrie2.h" #include "uvector.h" U_NAMESPACE_BEGIN @@ -62,7 +66,7 @@ inline uint8_t leadByteForCP(UChar32 c) { * Returns the code point from one single well-formed UTF-8 byte sequence * between cpStart and cpLimit. * - * UTrie2 UTF-8 macros do not assemble whole code points (for efficiency). + * Trie UTF-8 macros do not assemble whole code points (for efficiency). * When we do need the code point, we call this function. * We should not need it for normalization-inert data (norm16==0). * Illegal sequences yield the error value norm16==0 just like real normalization-inert code points. @@ -122,7 +126,7 @@ int32_t getJamoTMinusBase(const uint8_t *src, const uint8_t *limit) { } } else if (src[1] == 0x87) { uint8_t t = src[2]; - if ((int8_t)t <= (int8_t)0x82) { + if ((int8_t)t <= (int8_t)0x82u) { return t - (0xa7 - 0x40); } } @@ -253,7 +257,7 @@ UBool ReorderingBuffer::appendSupplementary(UChar32 c, uint8_t cc, UErrorCode &e return TRUE; } -UBool ReorderingBuffer::append(const UChar *s, int32_t length, +UBool ReorderingBuffer::append(const UChar *s, int32_t length, UBool isNFD, uint8_t leadCC, uint8_t trailCC, UErrorCode &errorCode) { if(length==0) { @@ -280,8 +284,11 @@ UBool ReorderingBuffer::append(const UChar *s, int32_t length, while(i>DELTA_SHIFT)-MAX_DELTA-1; + minDecompNoCP = static_cast(inIndexes[IX_MIN_DECOMP_NO_CP]); + minCompNoMaybeCP = static_cast(inIndexes[IX_MIN_COMP_NO_MAYBE_CP]); + minLcccCP = static_cast(inIndexes[IX_MIN_LCCC_CP]); + + minYesNo = static_cast(inIndexes[IX_MIN_YES_NO]); + minYesNoMappingsOnly = static_cast(inIndexes[IX_MIN_YES_NO_MAPPINGS_ONLY]); + minNoNo = static_cast(inIndexes[IX_MIN_NO_NO]); + minNoNoCompBoundaryBefore = static_cast(inIndexes[IX_MIN_NO_NO_COMP_BOUNDARY_BEFORE]); + minNoNoCompNoMaybeCC = static_cast(inIndexes[IX_MIN_NO_NO_COMP_NO_MAYBE_CC]); + minNoNoEmpty = static_cast(inIndexes[IX_MIN_NO_NO_EMPTY]); + limitNoNo = static_cast(inIndexes[IX_LIMIT_NO_NO]); + minMaybeYes = static_cast(inIndexes[IX_MIN_MAYBE_YES]); + U_ASSERT((minMaybeYes & 7) == 0); // 8-aligned for noNoDelta bit fields + centerNoNoDelta = (minMaybeYes >> DELTA_SHIFT) - MAX_DELTA - 1; normTrie=inTrie; @@ -445,75 +453,8 @@ Normalizer2Impl::init(const int32_t *inIndexes, const UTrie2 *inTrie, smallFCD=inSmallFCD; } -class LcccContext { -public: - LcccContext(const Normalizer2Impl &ni, UnicodeSet &s) : impl(ni), set(s) {} - - void handleRange(UChar32 start, UChar32 end, uint16_t norm16) { - if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES && - norm16 != Normalizer2Impl::JAMO_VT) { - set.add(start, end); - } else if (impl.minNoNoCompNoMaybeCC <= norm16 && norm16 < impl.limitNoNo) { - uint16_t fcd16=impl.getFCD16(start); - if(fcd16>0xff) { set.add(start, end); } - } - } - -private: - const Normalizer2Impl &impl; - UnicodeSet &set; -}; - -namespace { - -struct PropertyStartsContext { - PropertyStartsContext(const Normalizer2Impl &ni, const USetAdder *adder) - : impl(ni), sa(adder) {} - - const Normalizer2Impl &impl; - const USetAdder *sa; -}; - -} // namespace - U_CDECL_BEGIN -static UBool U_CALLCONV -enumLcccRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - ((LcccContext *)context)->handleRange(start, end, (uint16_t)value); - return TRUE; -} - -static UBool U_CALLCONV -enumNorm16PropertyStartsRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { - /* add the start code point to the USet */ - const PropertyStartsContext *ctx=(const PropertyStartsContext *)context; - const USetAdder *sa=ctx->sa; - sa->add(sa->set, start); - if (start != end && ctx->impl.isAlgorithmicNoNo((uint16_t)value) && - (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) { - // Range of code points with same-norm16-value algorithmic decompositions. - // They might have different non-zero FCD16 values. - uint16_t prevFCD16=ctx->impl.getFCD16(start); - while(++start<=end) { - uint16_t fcd16=ctx->impl.getFCD16(start); - if(fcd16!=prevFCD16) { - sa->add(sa->set, start); - prevFCD16=fcd16; - } - } - } - return TRUE; -} - -static UBool U_CALLCONV -enumPropertyStartsRange(const void *context, UChar32 start, UChar32 /*end*/, uint32_t /*value*/) { - /* add the start code point to the USet */ - const USetAdder *sa=(const USetAdder *)context; - sa->add(sa->set, start); - return TRUE; -} - static uint32_t U_CALLCONV segmentStarterMapper(const void * /*context*/, uint32_t value) { return value&CANON_NOT_SEGMENT_STARTER; @@ -523,15 +464,44 @@ U_CDECL_END void Normalizer2Impl::addLcccChars(UnicodeSet &set) const { - LcccContext context(*this, set); - utrie2_enum(normTrie, NULL, enumLcccRange, &context); + UChar32 start = 0, end; + uint32_t norm16; + while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT, + nullptr, nullptr, &norm16)) >= 0) { + if (norm16 > Normalizer2Impl::MIN_NORMAL_MAYBE_YES && + norm16 != Normalizer2Impl::JAMO_VT) { + set.add(start, end); + } else if (minNoNoCompNoMaybeCC <= norm16 && norm16 < limitNoNo) { + uint16_t fcd16 = getFCD16(start); + if (fcd16 > 0xff) { set.add(start, end); } + } + start = end + 1; + } } void Normalizer2Impl::addPropertyStarts(const USetAdder *sa, UErrorCode & /*errorCode*/) const { - /* add the start code point of each same-value range of each trie */ - PropertyStartsContext context(*this, sa); - utrie2_enum(normTrie, NULL, enumNorm16PropertyStartsRange, &context); + // Add the start code point of each same-value range of the trie. + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(normTrie, start, UCPMAP_RANGE_FIXED_LEAD_SURROGATES, INERT, + nullptr, nullptr, &value)) >= 0) { + sa->add(sa->set, start); + if (start != end && isAlgorithmicNoNo((uint16_t)value) && + (value & Normalizer2Impl::DELTA_TCCC_MASK) > Normalizer2Impl::DELTA_TCCC_1) { + // Range of code points with same-norm16-value algorithmic decompositions. + // They might have different non-zero FCD16 values. + uint16_t prevFCD16 = getFCD16(start); + while (++start <= end) { + uint16_t fcd16 = getFCD16(start); + if (fcd16 != prevFCD16) { + sa->add(sa->set, start); + prevFCD16 = fcd16; + } + } + } + start = end + 1; + } /* add Hangul LV syllables and LV+1 because of skippables */ for(UChar c=Hangul::HANGUL_BASE; ctrie, segmentStarterMapper, enumPropertyStartsRange, sa); + // Add the start code point of each same-value range of the canonical iterator data trie. + if (!ensureCanonIterData(errorCode)) { return; } + // Currently only used for the SEGMENT_STARTER property. + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(fCanonIterData->trie, start, UCPMAP_RANGE_NORMAL, 0, + segmentStarterMapper, nullptr, &value)) >= 0) { + sa->add(sa->set, start); + start = end + 1; } } @@ -633,27 +608,23 @@ Normalizer2Impl::decompose(const UChar *src, const UChar *limit, // count code units below the minimum or with irrelevant data for the quick check for(prevSrc=src; src!=limit;) { if( (c=*src)= limitNoNo) { @@ -789,7 +760,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, } c = codePointFromValidUTF8(prevSrc, src); c = mapAlgorithmic(c, norm16); - norm16 = getNorm16(c); + norm16 = getRawNorm16(c); } else if (stopAtCompBoundary && norm16 < minNoNoCompNoMaybeCC) { return prevSrc; } @@ -828,7 +799,7 @@ Normalizer2Impl::decomposeShort(const uint8_t *src, const uint8_t *limit, } else { leadCC = 0; } - if (!buffer.append((const char16_t *)mapping+1, length, leadCC, trailCC, errorCode)) { + if (!buffer.append((const char16_t *)mapping+1, length, TRUE, leadCC, trailCC, errorCode)) { return nullptr; } } @@ -854,7 +825,7 @@ Normalizer2Impl::getDecomposition(UChar32 c, UChar buffer[4], int32_t &length) c length=0; U16_APPEND_UNSAFE(buffer, length, c); // The mapping might decompose further. - norm16 = getNorm16(c); + norm16 = getRawNorm16(c); } if (norm16 < minYesNo) { return decomp; @@ -926,19 +897,30 @@ void Normalizer2Impl::decomposeAndAppend(const UChar *src, const UChar *limit, return; } // Just merge the strings at the boundary. - ForwardUTrie2StringIterator iter(normTrie, src, limit); - uint8_t firstCC, prevCC, cc; - firstCC=prevCC=cc=getCC(iter.next16()); - while(cc!=0) { - prevCC=cc; - cc=getCC(iter.next16()); - }; + bool isFirst = true; + uint8_t firstCC = 0, prevCC = 0, cc; + const UChar *p = src; + while (p != limit) { + const UChar *codePointStart = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); + if ((cc = getCC(norm16)) == 0) { + p = codePointStart; + break; + } + if (isFirst) { + firstCC = cc; + isFirst = false; + } + prevCC = cc; + } if(limit==NULL) { // appendZeroCC() needs limit!=NULL - limit=u_strchr(iter.codePointStart, 0); + limit=u_strchr(p, 0); } - if (buffer.append(src, (int32_t)(iter.codePointStart-src), firstCC, prevCC, errorCode)) { - buffer.appendZeroCC(iter.codePointStart, limit, errorCode); + if (buffer.append(src, (int32_t)(p - src), FALSE, firstCC, prevCC, errorCode)) { + buffer.appendZeroCC(p, limit, errorCode); } } @@ -1085,7 +1067,7 @@ void Normalizer2Impl::addComposites(const uint16_t *list, UnicodeSet &set) const } UChar32 composite=compositeAndFwd>>1; if((compositeAndFwd&1)!=0) { - addComposites(getCompositionsListForComposite(getNorm16(composite)), set); + addComposites(getCompositionsListForComposite(getRawNorm16(composite)), set); } set.add(composite); } while((firstUnit&COMP_1_LAST_TUPLE)==0); @@ -1124,7 +1106,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart prevCC=0; for(;;) { - UTRIE2_U16_NEXT16(normTrie, p, limit, c, norm16); + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); cc=getCCFromYesOrMaybe(norm16); if( // this character combines backward and isMaybe(norm16) && @@ -1229,7 +1211,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart // Is the composite a starter that combines forward? if(compositeAndFwd&1) { compositionsList= - getCompositionsListForComposite(getNorm16(composite)); + getCompositionsListForComposite(getRawNorm16(composite)); } else { compositionsList=NULL; } @@ -1268,7 +1250,7 @@ void Normalizer2Impl::recompose(ReorderingBuffer &buffer, int32_t recomposeStart UChar32 Normalizer2Impl::composePair(UChar32 a, UChar32 b) const { - uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16=0 + uint16_t norm16=getNorm16(a); // maps an out-of-range 'a' to inert norm16 const uint16_t *list; if(isInert(norm16)) { return U_SENTINEL; @@ -1359,29 +1341,23 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit, return TRUE; } if( (c=*src)= MIN_YES_YES_WITH_CC) { cc = getCCFromNormalYesOrMaybe(n16); if (prevCC > cc) { @@ -1559,7 +1535,7 @@ Normalizer2Impl::compose(const UChar *src, const UChar *limit, // decompose and recompose. if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { const UChar *p = prevSrc; - UTRIE2_U16_PREV16(normTrie, prevBoundary, p, c, norm16); + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, prevBoundary, p, c, norm16); if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { prevSrc = p; } @@ -1626,29 +1602,23 @@ Normalizer2Impl::composeQuickCheck(const UChar *src, const UChar *limit, return src; } if( (c=*src)= MIN_YES_YES_WITH_CC) { cc = getCCFromNormalYesOrMaybe(n16); if (prevCC > cc) { @@ -1975,7 +1945,7 @@ Normalizer2Impl::composeUTF8(uint32_t options, UBool onlyContiguous, // decompose and recompose. if (prevBoundary != prevSrc && !norm16HasCompBoundaryBefore(norm16)) { const uint8_t *p = prevSrc; - UTRIE2_U8_PREV16(normTrie, prevBoundary, p, norm16); + UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, prevBoundary, p, norm16); if (!norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { prevSrc = p; } @@ -2023,7 +1993,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const UChar *src, const UChar *limi } UChar32 c; uint16_t norm16; - UTRIE2_U16_NEXT16(normTrie, src, limit, c, norm16); + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, src, limit, c, norm16); return norm16HasCompBoundaryBefore(norm16); } @@ -2032,7 +2002,7 @@ UBool Normalizer2Impl::hasCompBoundaryBefore(const uint8_t *src, const uint8_t * return TRUE; } uint16_t norm16; - UTRIE2_U8_NEXT16(normTrie, src, limit, norm16); + UCPTRIE_FAST_U8_NEXT(normTrie, UCPTRIE_16, src, limit, norm16); return norm16HasCompBoundaryBefore(norm16); } @@ -2043,7 +2013,7 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const UChar *start, const UChar *p, } UChar32 c; uint16_t norm16; - UTRIE2_U16_PREV16(normTrie, start, p, c, norm16); + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); return norm16HasCompBoundaryAfter(norm16, onlyContiguous); } @@ -2053,36 +2023,42 @@ UBool Normalizer2Impl::hasCompBoundaryAfter(const uint8_t *start, const uint8_t return TRUE; } uint16_t norm16; - UTRIE2_U8_PREV16(normTrie, start, p, norm16); + UCPTRIE_FAST_U8_PREV(normTrie, UCPTRIE_16, start, p, norm16); return norm16HasCompBoundaryAfter(norm16, onlyContiguous); } const UChar *Normalizer2Impl::findPreviousCompBoundary(const UChar *start, const UChar *p, UBool onlyContiguous) const { - BackwardUTrie2StringIterator iter(normTrie, start, p); - for(;;) { - uint16_t norm16=iter.previous16(); + while (p != start) { + const UChar *codePointLimit = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_PREV(normTrie, UCPTRIE_16, start, p, c, norm16); if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { - return iter.codePointLimit; + return codePointLimit; } - if (hasCompBoundaryBefore(iter.codePoint, norm16)) { - return iter.codePointStart; + if (hasCompBoundaryBefore(c, norm16)) { + return p; } } + return p; } const UChar *Normalizer2Impl::findNextCompBoundary(const UChar *p, const UChar *limit, UBool onlyContiguous) const { - ForwardUTrie2StringIterator iter(normTrie, p, limit); - for(;;) { - uint16_t norm16=iter.next16(); - if (hasCompBoundaryBefore(iter.codePoint, norm16)) { - return iter.codePointStart; + while (p != limit) { + const UChar *codePointStart = p; + UChar32 c; + uint16_t norm16; + UCPTRIE_FAST_U16_NEXT(normTrie, UCPTRIE_16, p, limit, c, norm16); + if (hasCompBoundaryBefore(c, norm16)) { + return codePointStart; } if (norm16HasCompBoundaryAfter(norm16, onlyContiguous)) { - return iter.codePointLimit; + return p; } } + return p; } uint8_t Normalizer2Impl::getPreviousTrailCC(const UChar *start, const UChar *p) const { @@ -2130,7 +2106,7 @@ uint16_t Normalizer2Impl::getFCD16FromNormData(UChar32 c) const { } // Maps to an isCompYesAndZeroCC. c=mapAlgorithmic(c, norm16); - norm16=getNorm16(c); + norm16=getRawNorm16(c); } } if(norm16<=minYesNo || isHangulLVT(norm16)) { @@ -2195,17 +2171,10 @@ Normalizer2Impl::makeFCD(const UChar *src, const UChar *limit, prevFCD16=0; ++src; } else { - if(U16_IS_SURROGATE(c)) { + if(U16_IS_LEAD(c)) { UChar c2; - if(U16_IS_SURROGATE_LEAD(c)) { - if((src+1)!=limit && U16_IS_TRAIL(c2=src[1])) { - c=U16_GET_SUPPLEMENTARY(c, c2); - } - } else /* trail surrogate */ { - if(prevSrcadd(firstOrigin); @@ -2406,7 +2376,6 @@ void CanonIterData::addToStartSet(UChar32 origin, UChar32 decompLead, UErrorCode class InitCanonIterData { public: static void doInit(Normalizer2Impl *impl, UErrorCode &errorCode); - static void handleRange(Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode); }; U_CDECL_BEGIN @@ -2417,18 +2386,6 @@ initCanonIterData(Normalizer2Impl *impl, UErrorCode &errorCode) { InitCanonIterData::doInit(impl, errorCode); } -// Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters. -// context: the Normalizer2Impl -static UBool U_CALLCONV -enumCIDRangeHandler(const void *context, UChar32 start, UChar32 end, uint32_t value) { - UErrorCode errorCode = U_ZERO_ERROR; - if (value != Normalizer2Impl::INERT) { - Normalizer2Impl *impl = (Normalizer2Impl *)context; - InitCanonIterData::handleRange(impl, start, end, (uint16_t)value, errorCode); - } - return U_SUCCESS(errorCode); -} - U_CDECL_END void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) { @@ -2438,8 +2395,24 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) { errorCode=U_MEMORY_ALLOCATION_ERROR; } if (U_SUCCESS(errorCode)) { - utrie2_enum(impl->normTrie, NULL, enumCIDRangeHandler, impl); - utrie2_freeze(impl->fCanonIterData->trie, UTRIE2_32_VALUE_BITS, &errorCode); + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(impl->normTrie, start, + UCPMAP_RANGE_FIXED_LEAD_SURROGATES, Normalizer2Impl::INERT, + nullptr, nullptr, &value)) >= 0) { + // Call Normalizer2Impl::makeCanonIterDataFromNorm16() for a range of same-norm16 characters. + if (value != Normalizer2Impl::INERT) { + impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode); + } + start = end + 1; + } +#ifdef UCPTRIE_DEBUG + umutablecptrie_setName(impl->fCanonIterData->mutableTrie, "CanonIterData"); +#endif + impl->fCanonIterData->trie = umutablecptrie_buildImmutable( + impl->fCanonIterData->mutableTrie, UCPTRIE_TYPE_SMALL, UCPTRIE_VALUE_BITS_32, &errorCode); + umutablecptrie_close(impl->fCanonIterData->mutableTrie); + impl->fCanonIterData->mutableTrie = nullptr; } if (U_FAILURE(errorCode)) { delete impl->fCanonIterData; @@ -2447,11 +2420,6 @@ void InitCanonIterData::doInit(Normalizer2Impl *impl, UErrorCode &errorCode) { } } -void InitCanonIterData::handleRange( - Normalizer2Impl *impl, UChar32 start, UChar32 end, uint16_t value, UErrorCode &errorCode) { - impl->makeCanonIterDataFromNorm16(start, end, value, *impl->fCanonIterData, errorCode); -} - void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, const uint16_t norm16, CanonIterData &newData, UErrorCode &errorCode) const { @@ -2465,7 +2433,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co return; } for(UChar32 c=start; c<=end; ++c) { - uint32_t oldValue=utrie2_get32(newData.trie, c); + uint32_t oldValue = umutablecptrie_get(newData.mutableTrie, c); uint32_t newValue=oldValue; if(isMaybeOrNonZeroCC(norm16)) { // not a segment starter if it occurs in a decomposition or has cc!=0 @@ -2483,7 +2451,7 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co if (isDecompNoAlgorithmic(norm16_2)) { // Maps to an isCompYesAndZeroCC. c2 = mapAlgorithmic(c2, norm16_2); - norm16_2 = getNorm16(c2); + norm16_2 = getRawNorm16(c2); // No compatibility mappings for the CanonicalIterator. U_ASSERT(!(isHangulLV(norm16_2) || isHangulLVT(norm16_2))); } @@ -2510,10 +2478,10 @@ void Normalizer2Impl::makeCanonIterDataFromNorm16(UChar32 start, UChar32 end, co if(norm16_2>=minNoNo) { while(itrie, c); + return (int32_t)ucptrie_get(fCanonIterData->trie, c); } const UnicodeSet &Normalizer2Impl::getCanonStartSet(int32_t n) const { @@ -2561,7 +2529,7 @@ UBool Normalizer2Impl::getCanonStartSet(UChar32 c, UnicodeSet &set) const { set.add(value); } if((canonValue&CANON_HAS_COMPOSITIONS)!=0) { - uint16_t norm16=getNorm16(c); + uint16_t norm16=getRawNorm16(c); if(norm16==JAMO_L) { UChar32 syllable= (UChar32)(Hangul::HANGUL_BASE+(c-Hangul::JAMO_L_BASE)*Hangul::JAMO_VT_COUNT); @@ -2608,7 +2576,7 @@ unorm2_swap(const UDataSwapper *ds, pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[2]==0x6d && pInfo->dataFormat[3]==0x32 && - (1<=formatVersion0 && formatVersion0<=3) + (1<=formatVersion0 && formatVersion0<=4) )) { udata_printError(ds, "unorm2_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as Normalizer2 data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], @@ -2669,9 +2637,9 @@ unorm2_swap(const UDataSwapper *ds, ds->swapArray32(ds, inBytes, nextOffset-offset, outBytes, pErrorCode); offset=nextOffset; - /* swap the UTrie2 */ + /* swap the trie */ nextOffset=indexes[Normalizer2Impl::IX_EXTRA_DATA_OFFSET]; - utrie2_swap(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); + utrie_swapAnyVersion(ds, inBytes+offset, nextOffset-offset, outBytes+offset, pErrorCode); offset=nextOffset; /* swap the uint16_t extraData[] */ diff --git a/deps/icu-small/source/common/normalizer2impl.h b/deps/icu-small/source/common/normalizer2impl.h index 9dd4d1e5ab188b..2e6aff308819c5 100644 --- a/deps/icu-small/source/common/normalizer2impl.h +++ b/deps/icu-small/source/common/normalizer2impl.h @@ -24,12 +24,20 @@ #if !UCONFIG_NO_NORMALIZATION #include "unicode/normalizer2.h" +#include "unicode/ucptrie.h" #include "unicode/unistr.h" #include "unicode/unorm.h" +#include "unicode/utf.h" #include "unicode/utf16.h" #include "mutex.h" +#include "udataswp.h" #include "uset_imp.h" -#include "utrie2.h" + +// When the nfc.nrm data is *not* hardcoded into the common library +// (with this constant set to 0), +// then it needs to be built into the data package: +// Add nfc.nrm to icu4c/source/data/Makefile.in DAT_FILES_SHORT +#define NORM2_HARDCODE_NFC_DATA 1 U_NAMESPACE_BEGIN @@ -118,7 +126,7 @@ class U_COMMON_API Hangul { buffer[0]=(UChar)(JAMO_L_BASE+c/JAMO_V_COUNT); buffer[1]=(UChar)(JAMO_V_BASE+c%JAMO_V_COUNT); } else { - buffer[0]=orig-c2; // LV syllable + buffer[0]=(UChar)(orig-c2); // LV syllable buffer[1]=(UChar)(JAMO_T_BASE+c2); } } @@ -158,8 +166,7 @@ class U_COMMON_API ReorderingBuffer : public UMemory { appendBMP((UChar)c, cc, errorCode) : appendSupplementary(c, cc, errorCode); } - // s must be in NFD, otherwise change the implementation. - UBool append(const UChar *s, int32_t length, + UBool append(const UChar *s, int32_t length, UBool isNFD, uint8_t leadCC, uint8_t trailCC, UErrorCode &errorCode); UBool appendBMP(UChar c, uint8_t cc, UErrorCode &errorCode) { @@ -243,7 +250,7 @@ class U_COMMON_API Normalizer2Impl : public UObject { } virtual ~Normalizer2Impl(); - void init(const int32_t *inIndexes, const UTrie2 *inTrie, + void init(const int32_t *inIndexes, const UCPTrie *inTrie, const uint16_t *inExtraData, const uint8_t *inSmallFCD); void addLcccChars(UnicodeSet &set) const; @@ -254,7 +261,12 @@ class U_COMMON_API Normalizer2Impl : public UObject { UBool ensureCanonIterData(UErrorCode &errorCode) const; - uint16_t getNorm16(UChar32 c) const { return UTRIE2_GET16(normTrie, c); } + // The trie stores values for lead surrogate code *units*. + // Surrogate code *points* are inert. + uint16_t getNorm16(UChar32 c) const { + return U_IS_LEAD(c) ? INERT : UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); + } + uint16_t getRawNorm16(UChar32 c) const { return UCPTRIE_FAST_GET(normTrie, UCPTRIE_16, c); } UNormalizationCheckResult getCompQuickCheck(uint16_t norm16) const { if(norm16 # include "unicode/uloc.h" -#if U_PLATFORM_HAS_WINUWP_API == 0 # include "wintz.h" -#else // U_PLATFORM_HAS_WINUWP_API +#if U_PLATFORM_HAS_WINUWP_API typedef PVOID LPMSG; // TODO: figure out how to get rid of this typedef #include #include @@ -1062,53 +1061,13 @@ uprv_tzname_clear_cache() #endif } -// With the Universal Windows Platform we can just ask Windows for the name -#if U_PLATFORM_HAS_WINUWP_API -U_CAPI const char* U_EXPORT2 -uprv_getWindowsTimeZone() -{ - // Get default Windows timezone. - ComPtr calendar; - HRESULT hr = RoActivateInstance( - HStringReference(RuntimeClass_Windows_Globalization_Calendar).Get(), - &calendar); - if (SUCCEEDED(hr)) - { - ComPtr timezone; - hr = calendar.As(&timezone); - if (SUCCEEDED(hr)) - { - HString timezoneString; - hr = timezone->GetTimeZone(timezoneString.GetAddressOf()); - if (SUCCEEDED(hr)) - { - int32_t length = static_cast(wcslen(timezoneString.GetRawBuffer(NULL))); - char* asciiId = (char*)uprv_calloc(length + 1, sizeof(char)); - if (asciiId != nullptr) - { - u_UCharsToChars((UChar*)timezoneString.GetRawBuffer(NULL), asciiId, length); - return asciiId; - } - } - } - } - - // Failed - return nullptr; -} -#endif - U_CAPI const char* U_EXPORT2 uprv_tzname(int n) { (void)n; // Avoid unreferenced parameter warning. const char *tzid = NULL; #if U_PLATFORM_USES_ONLY_WIN32_API -#if U_PLATFORM_HAS_WINUWP_API > 0 - tzid = uprv_getWindowsTimeZone(); -#else tzid = uprv_detectWindowsTimeZone(); -#endif if (tzid != NULL) { return tzid; @@ -1366,6 +1325,43 @@ uprv_pathIsAbsolute(const char *path) # endif #endif +#if U_PLATFORM_HAS_WINUWP_API != 0 +// Helper function to get the ICU Data Directory under the Windows directory location. +static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength) +{ +#if defined(ICU_DATA_DIR_WINDOWS) + wchar_t windowsPath[MAX_PATH]; + char windowsPathUtf8[MAX_PATH]; + + UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath)); + if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) { + // Convert UTF-16 to a UTF-8 string. + UErrorCode status = U_ZERO_ERROR; + int32_t windowsPathUtf8Len = 0; + u_strToUTF8(windowsPathUtf8, static_cast(UPRV_LENGTHOF(windowsPathUtf8)), + &windowsPathUtf8Len, reinterpret_cast(windowsPath), -1, &status); + + if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) && + (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) { + // Ensure it always has a separator, so we can append the ICU data path. + if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) { + windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR; + windowsPathUtf8[windowsPathUtf8Len] = '\0'; + } + // Check if the concatenated string will fit. + if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) { + uprv_strcpy(directoryBuffer, windowsPathUtf8); + uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS); + return TRUE; + } + } + } +#endif + + return FALSE; +} +#endif + static void U_CALLCONV dataDirectoryInitFn() { /* If we already have the directory, then return immediately. Will happen if user called * u_setDataDirectory(). @@ -1425,24 +1421,10 @@ static void U_CALLCONV dataDirectoryInitFn() { } #endif -#if defined(ICU_DATA_DIR_WINDOWS) && U_PLATFORM_HAS_WINUWP_API != 0 - // Use data from the %windir%\globalization\icu directory - // This is only available if ICU is built as a system component +#if U_PLATFORM_HAS_WINUWP_API != 0 && defined(ICU_DATA_DIR_WINDOWS) char datadir_path_buffer[MAX_PATH]; - UINT length = GetWindowsDirectoryA(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer)); - if (length > 0 && length < (UPRV_LENGTHOF(datadir_path_buffer) - sizeof(ICU_DATA_DIR_WINDOWS) - 1)) - { - if (datadir_path_buffer[length - 1] != '\\') - { - datadir_path_buffer[length++] = '\\'; - datadir_path_buffer[length] = '\0'; - } - - if ((length + 1 + sizeof(ICU_DATA_DIR_WINDOWS)) < UPRV_LENGTHOF(datadir_path_buffer)) - { - uprv_strcat(datadir_path_buffer, ICU_DATA_DIR_WINDOWS); - path = datadir_path_buffer; - } + if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { + path = datadir_path_buffer; } #endif @@ -1491,20 +1473,30 @@ static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) { status = U_MEMORY_ALLOCATION_ERROR; return; } -#if U_PLATFORM_HAS_WINUWP_API == 0 - const char *dir = getenv("ICU_TIMEZONE_FILES_DIR"); -#else - // TODO: UWP does not support alternate timezone data directories at this time + const char *dir = ""; + +#if U_PLATFORM_HAS_WINUWP_API != 0 + // The UWP version does not support the environment variable setting, but can possibly pick them up from the Windows directory. + char datadir_path_buffer[MAX_PATH]; + if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) { + dir = datadir_path_buffer; + } +#else + dir = getenv("ICU_TIMEZONE_FILES_DIR"); #endif // U_PLATFORM_HAS_WINUWP_API + #if defined(U_TIMEZONE_FILES_DIR) if (dir == NULL) { + // Build time configuration setting. dir = TO_STRING(U_TIMEZONE_FILES_DIR); } #endif + if (dir == NULL) { dir = ""; } + setTimeZoneFilesDir(dir, status); } @@ -1676,7 +1668,8 @@ The leftmost codepage (.xxx) wins. /* Note that we scan the *uncorrected* ID. */ if ((p = uprv_strrchr(posixID, '@')) != NULL) { if (correctedPOSIXLocale == NULL) { - correctedPOSIXLocale = static_cast(uprv_malloc(uprv_strlen(posixID)+1)); + /* new locale can be 1 char longer than old one if @ -> __ */ + correctedPOSIXLocale = static_cast(uprv_malloc(uprv_strlen(posixID)+2)); /* Exit on memory allocation error. */ if (correctedPOSIXLocale == NULL) { return NULL; @@ -1693,7 +1686,7 @@ The leftmost codepage (.xxx) wins. } if (uprv_strchr(correctedPOSIXLocale,'_') == NULL) { - uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b */ + uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */ } else { uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */ @@ -1747,70 +1740,22 @@ The leftmost codepage (.xxx) wins. #elif U_PLATFORM_USES_ONLY_WIN32_API #define POSIX_LOCALE_CAPACITY 64 UErrorCode status = U_ZERO_ERROR; - char *correctedPOSIXLocale = 0; + char *correctedPOSIXLocale = nullptr; // If we have already figured this out just use the cached value - if (gCorrectedPOSIXLocale != NULL) { + if (gCorrectedPOSIXLocale != nullptr) { return gCorrectedPOSIXLocale; } // No cached value, need to determine the current value - static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH]; -#if U_PLATFORM_HAS_WINUWP_API == 0 - // If not a Universal Windows App, we'll need user default language. - // Vista and above should use Locale Names instead of LCIDs - int length = GetUserDefaultLocaleName(windowsLocale, UPRV_LENGTHOF(windowsLocale)); -#else - // In a UWP app, we want the top language that the application and user agreed upon - ComPtr> languageList; - - ComPtr applicationLanguagesStatics; - HRESULT hr = GetActivationFactory( - HStringReference(RuntimeClass_Windows_Globalization_ApplicationLanguages).Get(), - &applicationLanguagesStatics); - if (SUCCEEDED(hr)) - { - hr = applicationLanguagesStatics->get_Languages(&languageList); - } + static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; + int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH); - if (FAILED(hr)) - { - // If there is no application context, then use the top language from the user language profile - ComPtr globalizationPreferencesStatics; - hr = GetActivationFactory( - HStringReference(RuntimeClass_Windows_System_UserProfile_GlobalizationPreferences).Get(), - &globalizationPreferencesStatics); - if (SUCCEEDED(hr)) - { - hr = globalizationPreferencesStatics->get_Languages(&languageList); - } - } - - // We have a list of languages, ICU knows one, so use the top one for our locale - HString topLanguage; - if (SUCCEEDED(hr)) - { - hr = languageList->GetAt(0, topLanguage.GetAddressOf()); - } - - if (FAILED(hr)) - { - // Unexpected, use en-US by default - if (gCorrectedPOSIXLocale == NULL) { - gCorrectedPOSIXLocale = "en_US"; - } - - return gCorrectedPOSIXLocale; - } - - // ResolveLocaleName will get a likely subtags form consistent with Windows behavior. - int length = ResolveLocaleName(topLanguage.GetRawBuffer(NULL), windowsLocale, UPRV_LENGTHOF(windowsLocale)); -#endif - // Now we should have a Windows locale name that needs converted to the POSIX style, - if (length > 0) + // Now we should have a Windows locale name that needs converted to the POSIX style. + if (length > 0) // If length is 0, then the GetLocaleInfoEx failed. { // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.) - char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH]; + char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {}; int32_t i; for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++) @@ -1858,7 +1803,7 @@ The leftmost codepage (.xxx) wins. } // If unable to find a locale we can agree upon, use en-US by default - if (gCorrectedPOSIXLocale == NULL) { + if (gCorrectedPOSIXLocale == nullptr) { gCorrectedPOSIXLocale = "en_US"; } return gCorrectedPOSIXLocale; diff --git a/deps/icu-small/source/common/putilimp.h b/deps/icu-small/source/common/putilimp.h index 023e06879a0655..f744746b1f0feb 100644 --- a/deps/icu-small/source/common/putilimp.h +++ b/deps/icu-small/source/common/putilimp.h @@ -94,7 +94,7 @@ typedef size_t uintptr_t; # define U_NL_LANGINFO_CODESET CODESET #endif -#ifdef U_TZSET +#if defined(U_TZSET) || defined(U_HAVE_TZSET) /* Use the predefined value. */ #elif U_PLATFORM_USES_ONLY_WIN32_API // UWP doesn't support tzset or environment variables for tz @@ -132,7 +132,7 @@ typedef size_t uintptr_t; # define U_TIMEZONE timezone #endif -#ifdef U_TZNAME +#if defined(U_TZNAME) || defined(U_HAVE_TZNAME) /* Use the predefined value. */ #elif U_PLATFORM_USES_ONLY_WIN32_API /* not usable on all windows platforms */ @@ -204,30 +204,18 @@ typedef size_t uintptr_t; /** * \def U_HAVE_STD_ATOMICS - * Defines whether the standard C++11 is available. - * ICU will use this when available, - * otherwise will fall back to compiler or platform specific alternatives. + * Defines whether to use the standard C++11 functions + * If false, ICU will fall back to compiler or platform specific alternatives. + * Note: support for these fall back options for atomics will be removed in a future version + * of ICU, and the use of C++ 11 atomics will be required. * @internal */ #ifdef U_HAVE_STD_ATOMICS /* Use the predefined value. */ -#elif U_CPLUSPLUS_VERSION < 11 - /* Not C++11, disable use of atomics */ -# define U_HAVE_STD_ATOMICS 0 -#elif __clang__ && __clang_major__==3 && __clang_minor__<=1 - /* Clang 3.1, has atomic variable initializer bug. */ -# define U_HAVE_STD_ATOMICS 0 #else - /* U_HAVE_ATOMIC is typically set by an autoconf test of #include */ - /* Can be set manually, or left undefined, on platforms without autoconf. */ -# if defined(U_HAVE_ATOMIC) && U_HAVE_ATOMIC -# define U_HAVE_STD_ATOMICS 1 -# else -# define U_HAVE_STD_ATOMICS 0 -# endif +# define U_HAVE_STD_ATOMICS 1 #endif - /** * \def U_HAVE_CLANG_ATOMICS * Defines whether Clang c11 style built-in atomics are available. @@ -586,6 +574,49 @@ U_INTERNAL void * U_EXPORT2 uprv_maximumPtr(void *base); # endif #endif + +#ifdef __cplusplus +/** + * Pin a buffer capacity such that doing pointer arithmetic + * on the destination pointer and capacity cannot overflow. + * + * The pinned capacity must fulfill the following conditions (for positive capacities): + * - dest + capacity is a valid pointer according to the machine arcitecture (AS/400, 64-bit, etc.) + * - (dest + capacity) >= dest + * - The size (in bytes) of T[capacity] does not exceed 0x7fffffff + * + * @param dest the destination buffer pointer. + * @param capacity the requested buffer capacity, in units of type T. + * @return the pinned capacity. + * @internal + */ +template +inline int32_t pinCapacity(T *dest, int32_t capacity) { + if (capacity <= 0) { return capacity; } + + uintptr_t destInt = (uintptr_t)dest; + uintptr_t maxInt; + +# if U_PLATFORM == U_PF_OS390 && !defined(_LP64) + // We have 31-bit pointers. + maxInt = 0x7fffffff; +# elif U_PLATFORM == U_PF_OS400 + maxInt = (uintptr_t)uprv_maximumPtr((void *)dest); +# else + maxInt = destInt + 0x7fffffffu; + if (maxInt < destInt) { + // Less than 2GB to the end of the address space. + // Pin to that to prevent address overflow. + maxInt = (uintptr_t)-1; + } +# endif + + uintptr_t maxBytes = maxInt - destInt; // max. 2GB + int32_t maxCapacity = (int32_t)(maxBytes / sizeof(T)); + return capacity <= maxCapacity ? capacity : maxCapacity; +} +#endif // __cplusplus + /* Dynamic Library Functions */ typedef void (UVoidFunction)(void); diff --git a/deps/icu-small/source/common/rbbi.cpp b/deps/icu-small/source/common/rbbi.cpp index c5ea2770ba9854..cb3766506f4082 100644 --- a/deps/icu-small/source/common/rbbi.cpp +++ b/deps/icu-small/source/common/rbbi.cpp @@ -18,6 +18,8 @@ #if !UCONFIG_NO_BREAK_ITERATION +#include + #include "unicode/rbbi.h" #include "unicode/schriter.h" #include "unicode/uchriter.h" @@ -628,7 +630,7 @@ int32_t RuleBasedBreakIterator::preceding(int32_t offset) { // or on a trail byte if the input is UTF-8. utext_setNativeIndex(&fText, offset); - int32_t adjustedOffset = utext_getNativeIndex(&fText); + int32_t adjustedOffset = static_cast(utext_getNativeIndex(&fText)); UErrorCode status = U_ZERO_ERROR; fBreakCache->preceding(adjustedOffset, status); @@ -655,7 +657,7 @@ UBool RuleBasedBreakIterator::isBoundary(int32_t offset) { // But we still need the side effect of leaving iteration at the following boundary. utext_setNativeIndex(&fText, offset); - int32_t adjustedOffset = utext_getNativeIndex(&fText); + int32_t adjustedOffset = static_cast(utext_getNativeIndex(&fText)); bool result = false; UErrorCode status = U_ZERO_ERROR; @@ -848,7 +850,7 @@ int32_t RuleBasedBreakIterator::handleNext() { #ifdef RBBI_DEBUG if (gTrace) { - RBBIDebugPrintf(" %4ld ", utext_getNativeIndex(&fText)); + RBBIDebugPrintf(" %4" PRId64 " ", utext_getNativeIndex(&fText)); if (0x20<=c && c<0x7f) { RBBIDebugPrintf("\"%c\" ", c); } else { diff --git a/deps/icu-small/source/common/rbbi_cache.cpp b/deps/icu-small/source/common/rbbi_cache.cpp index 60316ce6420dc5..519c61049894e6 100644 --- a/deps/icu-small/source/common/rbbi_cache.cpp +++ b/deps/icu-small/source/common/rbbi_cache.cpp @@ -603,7 +603,7 @@ void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t fStartBufIdx = modChunkSize(fStartBufIdx + 6); // TODO: experiment. Probably revert to 1. } fBoundaries[nextIdx] = position; - fStatuses[nextIdx] = ruleStatusIdx; + fStatuses[nextIdx] = static_cast(ruleStatusIdx); fEndBufIdx = nextIdx; if (update == UpdateCachePosition) { // Set current position to the newly added boundary. @@ -631,7 +631,7 @@ bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t fEndBufIdx = modChunkSize(fEndBufIdx - 1); } fBoundaries[nextIdx] = position; - fStatuses[nextIdx] = ruleStatusIdx; + fStatuses[nextIdx] = static_cast(ruleStatusIdx); fStartBufIdx = nextIdx; if (update == UpdateCachePosition) { fBufIdx = nextIdx; diff --git a/deps/icu-small/source/common/rbbirb.cpp b/deps/icu-small/source/common/rbbirb.cpp index a46f483d23334a..5f5661af94776d 100644 --- a/deps/icu-small/source/common/rbbirb.cpp +++ b/deps/icu-small/source/common/rbbirb.cpp @@ -303,17 +303,24 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) { } void RBBIRuleBuilder::optimizeTables() { + bool didSomething; + do { + didSomething = false; + + // Begin looking for duplicates with char class 3. + // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively, + // and should not have other categories merged into them. + IntPair duplPair = {3, 0}; + while (fForwardTable->findDuplCharClassFrom(&duplPair)) { + fSetBuilder->mergeCategories(duplPair); + fForwardTable->removeColumn(duplPair.second); + didSomething = true; + } - // Begin looking for duplicates with char class 3. - // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively, - // and should not have other categories merged into them. - IntPair duplPair = {3, 0}; - - while (fForwardTable->findDuplCharClassFrom(&duplPair)) { - fSetBuilder->mergeCategories(duplPair); - fForwardTable->removeColumn(duplPair.second); - } - fForwardTable->removeDuplicateStates(); + while (fForwardTable->removeDuplicateStates() > 0) { + didSomething = true; + } + } while (didSomething); } U_NAMESPACE_END diff --git a/deps/icu-small/source/common/rbbiscan.cpp b/deps/icu-small/source/common/rbbiscan.cpp index ecc1663d8f84f7..170c212e13fd1f 100644 --- a/deps/icu-small/source/common/rbbiscan.cpp +++ b/deps/icu-small/source/common/rbbiscan.cpp @@ -380,7 +380,7 @@ UBool RBBIRuleScanner::doParseActions(int32_t action) // with the current rule expression (on the Node Stack) // with the resulting OR expression going to *destRules // - RBBINode *thisRule = fNodeStack[fNodeStackPtr]; + thisRule = fNodeStack[fNodeStackPtr]; RBBINode *prevRules = *destRules; RBBINode *orNode = pushNewNode(RBBINode::opOr); if (U_FAILURE(*fRB->fStatus)) { diff --git a/deps/icu-small/source/common/rbbitblb.cpp b/deps/icu-small/source/common/rbbitblb.cpp index 42116b0f95575f..e6ae2dc654a88c 100644 --- a/deps/icu-small/source/common/rbbitblb.cpp +++ b/deps/icu-small/source/common/rbbitblb.cpp @@ -428,8 +428,8 @@ void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree) { addRuleRootNodes(&ruleRootNodes, tree); UVector matchStartNodes(*fStatus); - for (int i=0; i(ruleRootNodes.elementAt(i)); + for (int j=0; j(ruleRootNodes.elementAt(j)); if (node->fChainIn) { setAdd(&matchStartNodes, node->fFirstPosSet); } @@ -1082,21 +1082,22 @@ bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) { int32_t numStates = fDStates->size(); int32_t numCols = fRB->fSetBuilder->getNumCharCategories(); - uint16_t table_base; - uint16_t table_dupl; for (; categories->first < numCols-1; categories->first++) { for (categories->second=categories->first+1; categories->second < numCols; categories->second++) { - for (int32_t state=0; stateelementAt(state); - table_base = (uint16_t)sd->fDtran->elementAti(categories->first); - table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second); - if (table_base != table_dupl) { - break; - } - } - if (table_base == table_dupl) { - return true; - } + // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates). + uint16_t table_base = 0; + uint16_t table_dupl = 1; + for (int32_t state=0; stateelementAt(state); + table_base = (uint16_t)sd->fDtran->elementAti(categories->first); + table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second); + if (table_base != table_dupl) { + break; + } + } + if (table_base == table_dupl) { + return true; + } } } return false; @@ -1236,7 +1237,7 @@ void RBBITableBuilder::removeSafeState(IntPair duplStates) { } else if (existingVal > duplState) { newVal = existingVal - 1; } - sd->setCharAt(col, newVal); + sd->setCharAt(col, static_cast(newVal)); } } } @@ -1245,12 +1246,16 @@ void RBBITableBuilder::removeSafeState(IntPair duplStates) { /* * RemoveDuplicateStates */ -void RBBITableBuilder::removeDuplicateStates() { +int32_t RBBITableBuilder::removeDuplicateStates() { IntPair dupls = {3, 0}; + int32_t numStatesRemoved = 0; + while (findDuplicateState(&dupls)) { // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); removeState(dupls); + ++numStatesRemoved; } + return numStatesRemoved; } @@ -1411,7 +1416,7 @@ void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) { UnicodeString &startState = *static_cast(fSafeTable->elementAt(1)); for (int32_t charClass=0; charClass < numCharClasses; ++charClass) { // Note: +2 for the start & stop state. - startState.setCharAt(charClass, charClass+2); + startState.setCharAt(charClass, static_cast(charClass+2)); } // Initially make every other state table row look like the start state row, diff --git a/deps/icu-small/source/common/rbbitblb.h b/deps/icu-small/source/common/rbbitblb.h index eea243e4cdd6c3..bc6077bb4da3dc 100644 --- a/deps/icu-small/source/common/rbbitblb.h +++ b/deps/icu-small/source/common/rbbitblb.h @@ -15,6 +15,9 @@ #define RBBITBLB_H #include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + #include "unicode/uobject.h" #include "unicode/rbbi.h" #include "rbbirb.h" @@ -66,8 +69,11 @@ class RBBITableBuilder : public UMemory { */ void removeColumn(int32_t column); - /** Check for, and remove dupicate states (table rows). */ - void removeDuplicateStates(); + /** + * Check for, and remove dupicate states (table rows). + * @return the number of states removed. + */ + int32_t removeDuplicateStates(); /** Build the safe reverse table from the already-constructed forward table. */ void buildSafeReverseTable(UErrorCode &status); @@ -204,4 +210,7 @@ class RBBIStateDescriptor : public UMemory { U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ + #endif diff --git a/deps/icu-small/source/common/serv.cpp b/deps/icu-small/source/common/serv.cpp index 35e362b71a8fc2..2fb35bd1a5994f 100644 --- a/deps/icu-small/source/common/serv.cpp +++ b/deps/icu-small/source/common/serv.cpp @@ -702,9 +702,9 @@ ICUService::getDisplayName(const UnicodeString& id, UnicodeString& result, const } // fallback - UErrorCode status = U_ZERO_ERROR; + status = U_ZERO_ERROR; ICUServiceKey* fallbackKey = createKey(&id, status); - while (fallbackKey->fallback()) { + while (fallbackKey != NULL && fallbackKey->fallback()) { UnicodeString us; fallbackKey->currentID(us); f = (ICUServiceFactory*)map->get(us); diff --git a/deps/icu-small/source/common/sharedobject.h b/deps/icu-small/source/common/sharedobject.h index 54655d0d716720..878594c7ffa772 100644 --- a/deps/icu-small/source/common/sharedobject.h +++ b/deps/icu-small/source/common/sharedobject.h @@ -104,7 +104,7 @@ class U_COMMON_API SharedObject : public UObject { /** * Deletes this object if it has no references. * Available for non-cached SharedObjects only. Ownership of cached objects - * is with the UnifiedCache, which is soley responsible for eviction and deletion. + * is with the UnifiedCache, which is solely responsible for eviction and deletion. */ void deleteIfZeroRefCount() const; diff --git a/deps/icu-small/source/common/static_unicode_sets.cpp b/deps/icu-small/source/common/static_unicode_sets.cpp index 9e731f5781e215..5d598a0e33b6d4 100644 --- a/deps/icu-small/source/common/static_unicode_sets.cpp +++ b/deps/icu-small/source/common/static_unicode_sets.cpp @@ -27,6 +27,7 @@ UnicodeSet* gUnicodeSets[COUNT] = {}; // Save the empty instance in static memory to have well-defined behavior if a // regular UnicodeSet cannot be allocated. +alignas(UnicodeSet) char gEmptyUnicodeSet[sizeof(UnicodeSet)]; // Whether the gEmptyUnicodeSet is initialized and ready to use. diff --git a/deps/icu-small/source/common/stringtriebuilder.cpp b/deps/icu-small/source/common/stringtriebuilder.cpp index cf5b7b73ae2c1f..6f9cc2e5c22b55 100644 --- a/deps/icu-small/source/common/stringtriebuilder.cpp +++ b/deps/icu-small/source/common/stringtriebuilder.cpp @@ -373,7 +373,7 @@ StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) { return newNode; } -UBool +int32_t StringTrieBuilder::hashNode(const void *node) { return ((const Node *)node)->hashCode(); } diff --git a/deps/icu-small/source/common/ubidi.cpp b/deps/icu-small/source/common/ubidi.cpp index 531ed64cff6ec8..4b65d491859bfa 100644 --- a/deps/icu-small/source/common/ubidi.cpp +++ b/deps/icu-small/source/common/ubidi.cpp @@ -624,7 +624,7 @@ getDirProps(UBiDi *pBiDi) { pBiDi->paras[pBiDi->paraCount-1].level=1; } if(isDefaultLevel) { - pBiDi->paraLevel=pBiDi->paras[0].level; + pBiDi->paraLevel=static_cast(pBiDi->paras[0].level); } /* The following is needed to resolve the text direction for default level paragraphs containing no strong character */ @@ -825,28 +825,28 @@ bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) { N0c1. */ if((direction==0 && pOpening->flags&FOUND_L) || - (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ - newProp=direction; + (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ + newProp=static_cast(direction); } - else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ + else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ /* it is stable if there is no containing pair or in conditions too complicated and not worth checking */ stable=(openIdx==pLastIsoRun->start); if(direction!=pOpening->contextDir) - newProp=pOpening->contextDir; /* N0c1 */ + newProp= static_cast(pOpening->contextDir); /* N0c1 */ else - newProp=direction; /* N0c2 */ + newProp= static_cast(direction); /* N0c2 */ } else { /* forget this and any brackets nested within this pair */ - pLastIsoRun->limit=openIdx; - return ON; /* N0d */ + pLastIsoRun->limit= static_cast(openIdx); + return ON; /* N0d */ } bd->pBiDi->dirProps[pOpening->position]=newProp; bd->pBiDi->dirProps[position]=newProp; /* Update nested N0c pairs that may be affected */ fixN0c(bd, openIdx, pOpening->position, newProp); if(stable) { - pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */ + pLastIsoRun->limit= static_cast(openIdx); /* forget any brackets nested within this pair */ /* remove lower located synonyms if any */ while(pLastIsoRun->limit>pLastIsoRun->start && bd->openings[pLastIsoRun->limit-1].position==pOpening->position) @@ -918,7 +918,7 @@ bracketProcessChar(BracketData *bd, int32_t position) { bracket or it is a case of N0d */ /* Now see if it is an opening bracket */ if(c) - match=u_getBidiPairedBracket(c); /* get the matching char */ + match= static_cast(u_getBidiPairedBracket(c)); /* get the matching char */ else match=0; if(match!=c && /* has a matching char */ @@ -948,7 +948,7 @@ bracketProcessChar(BracketData *bd, int32_t position) { pLastIsoRun->contextPos=position; } else if(dirProp<=R || dirProp==AL) { - newProp=DIR_FROM_STRONG(dirProp); + newProp= static_cast(DIR_FROM_STRONG(dirProp)); pLastIsoRun->lastBase=dirProp; pLastIsoRun->lastStrong=dirProp; pLastIsoRun->contextDir=(UBiDiDirection)newProp; @@ -1101,7 +1101,7 @@ resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { else start=pBiDi->paras[paraIndex-1].limit; limit=pBiDi->paras[paraIndex].limit; - level=pBiDi->paras[paraIndex].level; + level= static_cast(pBiDi->paras[paraIndex].level); for(i=start; iparas[paraIndex-1].limit; limit=pBiDi->paras[paraIndex].limit; - level=pBiDi->paras[paraIndex].level; + level= static_cast(pBiDi->paras[paraIndex].level); for(i=start; iparaCount; i++) { last=(pBiDi->paras[i].limit)-1; - level=pBiDi->paras[i].level; + level= static_cast(pBiDi->paras[i].level); if(level==0) continue; /* LTR paragraph */ start= i==0 ? 0 : pBiDi->paras[i-1].limit; diff --git a/deps/icu-small/source/common/ubiditransform.cpp b/deps/icu-small/source/common/ubiditransform.cpp index 80261d391e753b..394df6092d21b1 100644 --- a/deps/icu-small/source/common/ubiditransform.cpp +++ b/deps/icu-small/source/common/ubiditransform.cpp @@ -146,7 +146,7 @@ static UBool action_reorder(UBiDiTransform *pTransform, UErrorCode *pErrorCode) { ubidi_writeReordered(pTransform->pBidi, pTransform->dest, pTransform->destSize, - pTransform->reorderingOptions, pErrorCode); + static_cast(pTransform->reorderingOptions), pErrorCode); *pTransform->pDestLength = pTransform->srcLength; pTransform->reorderingOptions = UBIDI_REORDER_DEFAULT; @@ -393,9 +393,9 @@ resolveBaseDirection(const UChar *text, uint32_t length, switch (*pInLevel) { case UBIDI_DEFAULT_LTR: case UBIDI_DEFAULT_RTL: { - UBiDiLevel level = ubidi_getBaseDirection(text, length); - *pInLevel = level != UBIDI_NEUTRAL ? level - : *pInLevel == UBIDI_DEFAULT_RTL ? RTL : LTR; + UBiDiLevel level = static_cast(ubidi_getBaseDirection(text, length)); + *pInLevel = static_cast(level != UBIDI_NEUTRAL) ? level + : *pInLevel == UBIDI_DEFAULT_RTL ? static_cast(RTL) : static_cast(LTR); break; } default: diff --git a/deps/icu-small/source/common/ucase.cpp b/deps/icu-small/source/common/ucase.cpp index 43c57f896e9348..50c8d20c1fce73 100644 --- a/deps/icu-small/source/common/ucase.cpp +++ b/deps/icu-small/source/common/ucase.cpp @@ -270,6 +270,7 @@ ucase_addCaseClosure(UChar32 c, const USetAdder *sa) { } } if(HAS_SLOT(excWord, UCASE_EXC_DELTA)) { + pe=pe0; int32_t delta; GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); sa->add(sa->set, (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta); @@ -1167,7 +1168,7 @@ ucase_toFullLower(UChar32 c, if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta); return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; } if(HAS_SLOT(excWord, UCASE_EXC_LOWER)) { @@ -1261,7 +1262,7 @@ toUpperOrTitle(UChar32 c, if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_GET_TYPE(props)==UCASE_LOWER) { int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta); return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; } if(!upperNotTitle && HAS_SLOT(excWord, UCASE_EXC_TITLE)) { @@ -1469,7 +1470,7 @@ ucase_toFullFolding(UChar32 c, } if(HAS_SLOT(excWord, UCASE_EXC_DELTA) && UCASE_IS_UPPER_OR_TITLE(props)) { int32_t delta; - GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe, delta); + GET_SLOT_VALUE(excWord, UCASE_EXC_DELTA, pe2, delta); return (excWord&UCASE_EXC_DELTA_IS_NEGATIVE)==0 ? c+delta : c-delta; } if(HAS_SLOT(excWord, UCASE_EXC_FOLD)) { diff --git a/deps/icu-small/source/common/ucln_cmn.h b/deps/icu-small/source/common/ucln_cmn.h index 9b6c2058135c52..0ca911b47d9875 100644 --- a/deps/icu-small/source/common/ucln_cmn.h +++ b/deps/icu-small/source/common/ucln_cmn.h @@ -45,6 +45,7 @@ typedef enum ECleanupCommonType { UCLN_COMMON_CURRENCY, UCLN_COMMON_LOADED_NORMALIZER2, UCLN_COMMON_NORMALIZER2, + UCLN_COMMON_CHARACTERPROPERTIES, UCLN_COMMON_USET, UCLN_COMMON_UNAMES, UCLN_COMMON_UPROPS, @@ -52,7 +53,6 @@ typedef enum ECleanupCommonType { UCLN_COMMON_UCNV_IO, UCLN_COMMON_UDATA, UCLN_COMMON_PUTIL, - UCLN_COMMON_LIST_FORMATTER, UCLN_COMMON_UINIT, /* diff --git a/deps/icu-small/source/common/ucnv.cpp b/deps/icu-small/source/common/ucnv.cpp index 39ea5dfa6636f5..abf302eaddb7a8 100644 --- a/deps/icu-small/source/common/ucnv.cpp +++ b/deps/icu-small/source/common/ucnv.cpp @@ -1743,13 +1743,9 @@ ucnv_fromUChars(UConverter *cnv, } if(srcLength>0) { srcLimit=src+srcLength; + destCapacity=pinCapacity(dest, destCapacity); destLimit=dest+destCapacity; - /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ - if(destLimit0) { srcLimit=src+srcLength; + destCapacity=pinCapacity(dest, destCapacity); destLimit=dest+destCapacity; - /* pin the destination limit to U_MAX_PTR; NULL check is for OS/400 */ - if(destLimit(0x10000 | (mySourceChar << 8) | trailByte); } } else { args->converter->toUBytes[0] = (uint8_t)mySourceChar; @@ -3304,7 +3304,7 @@ UConverter_toUnicode_ISO_2022_CN_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, myData->isEmptySegment = FALSE; /* we are handling it, reset to avoid future spurious errors */ *err = U_ILLEGAL_ESCAPE_SEQUENCE; args->converter->toUCallbackReason = UCNV_IRREGULAR; - args->converter->toUBytes[0] = mySourceChar; + args->converter->toUBytes[0] = static_cast(mySourceChar); args->converter->toULength = 1; args->target = myTarget; args->source = mySource; diff --git a/deps/icu-small/source/common/ucnv_ct.cpp b/deps/icu-small/source/common/ucnv_ct.cpp index 51e31aa4116bd3..b40e1b2c970e51 100644 --- a/deps/icu-small/source/common/ucnv_ct.cpp +++ b/deps/icu-small/source/common/ucnv_ct.cpp @@ -180,7 +180,7 @@ _CompoundTextgetName(const UConverter* cnv); static int32_t findNextEsc(const char *source, const char *sourceLimit) { - int32_t length = sourceLimit - source; + int32_t length = static_cast(sourceLimit - source); int32_t i; for (i = 1; i < length; i++) { if (*(source + i) == 0x1B) { diff --git a/deps/icu-small/source/common/ucnv_u16.cpp b/deps/icu-small/source/common/ucnv_u16.cpp index a289fd4acfac1a..6c1b87d3c939c6 100644 --- a/deps/icu-small/source/common/ucnv_u16.cpp +++ b/deps/icu-small/source/common/ucnv_u16.cpp @@ -71,7 +71,7 @@ _UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, /* write the BOM if necessary */ if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xfe, (char)0xff }; + static const char bom[]={ (char)0xfeu, (char)0xffu }; ucnv_fromUWriteBytes(cnv, bom, 2, &pArgs->target, pArgs->targetLimit, @@ -672,7 +672,7 @@ _UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, /* write the BOM if necessary */ if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xff, (char)0xfe }; + static const char bom[]={ (char)0xffu, (char)0xfeu }; ucnv_fromUWriteBytes(cnv, bom, 2, &pArgs->target, pArgs->targetLimit, diff --git a/deps/icu-small/source/common/ucnv_u32.cpp b/deps/icu-small/source/common/ucnv_u32.cpp index ca8c6788d3dd97..13444a3afd97f1 100644 --- a/deps/icu-small/source/common/ucnv_u32.cpp +++ b/deps/icu-small/source/common/ucnv_u32.cpp @@ -228,7 +228,7 @@ T_UConverter_fromUnicode_UTF32_BE(UConverterFromUnicodeArgs * args, /* write the BOM if necessary */ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; + static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu }; ucnv_fromUWriteBytes(args->converter, bom, 4, &args->target, args->targetLimit, @@ -331,7 +331,7 @@ T_UConverter_fromUnicode_UTF32_BE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, /* write the BOM if necessary */ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ 0, 0, (char)0xfe, (char)0xff }; + static const char bom[]={ 0, 0, (char)0xfeu, (char)0xffu }; ucnv_fromUWriteBytes(args->converter, bom, 4, &args->target, args->targetLimit, @@ -706,7 +706,7 @@ T_UConverter_fromUnicode_UTF32_LE(UConverterFromUnicodeArgs * args, /* write the BOM if necessary */ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; + static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 }; ucnv_fromUWriteBytes(args->converter, bom, 4, &args->target, args->targetLimit, @@ -817,7 +817,7 @@ T_UConverter_fromUnicode_UTF32_LE_OFFSET_LOGIC(UConverterFromUnicodeArgs * args, /* write the BOM if necessary */ if(args->converter->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { - static const char bom[]={ (char)0xff, (char)0xfe, 0, 0 }; + static const char bom[]={ (char)0xffu, (char)0xfeu, 0, 0 }; ucnv_fromUWriteBytes(args->converter, bom, 4, &args->target, args->targetLimit, @@ -1043,7 +1043,7 @@ _UTF32Open(UConverter *cnv, _UTF32Reset(cnv, UCNV_RESET_BOTH); } -static const char utf32BOM[8]={ 0, 0, (char)0xfe, (char)0xff, (char)0xff, (char)0xfe, 0, 0 }; +static const char utf32BOM[8]={ 0, 0, (char)0xfeu, (char)0xffu, (char)0xffu, (char)0xfeu, 0, 0 }; static void U_CALLCONV _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, @@ -1071,7 +1071,7 @@ _UTF32ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, b=*source; if(b==0) { state=1; /* could be 00 00 FE FF */ - } else if(b==(char)0xff) { + } else if(b==(char)0xffu) { state=5; /* could be FF FE 00 00 */ } else { state=8; /* default to UTF-32BE */ diff --git a/deps/icu-small/source/common/ucnv_u8.cpp b/deps/icu-small/source/common/ucnv_u8.cpp index 5a07244b02bf9c..878d67304c7d89 100644 --- a/deps/icu-small/source/common/ucnv_u8.cpp +++ b/deps/icu-small/source/common/ucnv_u8.cpp @@ -108,7 +108,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, if (mySource < sourceLimit) { toUBytes[i] = (char) (ch2 = *mySource); - if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) && + if (!icu::UTF8::isValidTrail(ch, static_cast(ch2), i, inBytes) && !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2))) { break; /* i < inBytes */ @@ -225,7 +225,7 @@ static void U_CALLCONV ucnv_toUnicode_UTF8_OFFSETS_LOGIC (UConverterToUnicodeAr if (mySource < sourceLimit) { toUBytes[i] = (char) (ch2 = *mySource); - if (!icu::UTF8::isValidTrail(ch, ch2, i, inBytes) && + if (!icu::UTF8::isValidTrail(ch, static_cast(ch2), i, inBytes) && !(isCESU8 && i == 1 && ch == 0xed && U8_IS_TRAIL(ch2))) { break; /* i < inBytes */ diff --git a/deps/icu-small/source/common/ucnvhz.cpp b/deps/icu-small/source/common/ucnvhz.cpp index 5a24575f05c28a..31595374696d8c 100644 --- a/deps/icu-small/source/common/ucnvhz.cpp +++ b/deps/icu-small/source/common/ucnvhz.cpp @@ -199,7 +199,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, *err = U_ILLEGAL_ESCAPE_SEQUENCE; args->converter->toUCallbackReason = UCNV_IRREGULAR; args->converter->toUBytes[0] = UCNV_TILDE; - args->converter->toUBytes[1] = mySourceChar; + args->converter->toUBytes[1] = static_cast(mySourceChar); args->converter->toULength = 2; args->target = myTarget; args->source = mySource; @@ -229,7 +229,7 @@ UConverter_toUnicode_HZ_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, --mySource; } else { /* Include the current byte in the illegal sequence. */ - args->converter->toUBytes[1] = mySourceChar; + args->converter->toUBytes[1] = static_cast(mySourceChar); args->converter->toULength = 2; } args->target = myTarget; diff --git a/deps/icu-small/source/common/ucnvmbcs.cpp b/deps/icu-small/source/common/ucnvmbcs.cpp index 9052394b4ff8d1..e1248a7bd36035 100644 --- a/deps/icu-small/source/common/ucnvmbcs.cpp +++ b/deps/icu-small/source/common/ucnvmbcs.cpp @@ -4164,8 +4164,8 @@ ucnv_MBCSFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, nextSourceIndex=0; /* Get the SI/SO character for the converter */ - siLength = getSISOBytes(SI, cnv->options, siBytes); - soLength = getSISOBytes(SO, cnv->options, soBytes); + siLength = static_cast(getSISOBytes(SI, cnv->options, siBytes)); + soLength = static_cast(getSISOBytes(SO, cnv->options, soBytes)); /* conversion loop */ /* diff --git a/deps/icu-small/source/common/ucnvsel.cpp b/deps/icu-small/source/common/ucnvsel.cpp index 90c7a18b93a514..6ccee1ae61fd70 100644 --- a/deps/icu-small/source/common/ucnvsel.cpp +++ b/deps/icu-small/source/common/ucnvsel.cpp @@ -41,6 +41,7 @@ #include "propsvec.h" #include "uassert.h" #include "ucmndata.h" +#include "udataswp.h" #include "uenumimp.h" #include "cmemory.h" #include "cstring.h" @@ -72,7 +73,7 @@ static void generateSelectorData(UConverterSelector* result, // set errorValue to all-ones for (int32_t col = 0; col < columns; col++) { upvec_setValue(upvec, UPVEC_ERROR_VALUE_CP, UPVEC_ERROR_VALUE_CP, - col, ~0, ~0, status); + col, static_cast(~0), static_cast(~0), status); } for (int32_t i = 0; i < result->encodingsCount; ++i) { @@ -109,7 +110,7 @@ static void generateSelectorData(UConverterSelector* result, // this will be reached for the converters that fill the set with // strings. Those should be ignored by our system } else { - upvec_setValue(upvec, start_char, end_char, column, ~0, mask, + upvec_setValue(upvec, start_char, end_char, column, static_cast(~0), mask, status); } } @@ -130,7 +131,7 @@ static void generateSelectorData(UConverterSelector* result, uset_getItem(excludedCodePoints, j, &start_char, &end_char, NULL, 0, status); for (int32_t col = 0; col < columns; col++) { - upvec_setValue(upvec, start_char, end_char, col, ~0, ~0, + upvec_setValue(upvec, start_char, end_char, col, static_cast(~0), static_cast(~0), status); } } @@ -684,7 +685,7 @@ static int16_t countOnes(uint32_t* mask, int32_t len) { ent &= ent - 1; // clear the least significant bit set } } - return totalOnes; + return static_cast(totalOnes); } diff --git a/deps/icu-small/source/common/ucol_swp.cpp b/deps/icu-small/source/common/ucol_swp.cpp index 3055abaca3ba09..97b5c4aff50c88 100644 --- a/deps/icu-small/source/common/ucol_swp.cpp +++ b/deps/icu-small/source/common/ucol_swp.cpp @@ -28,81 +28,6 @@ /* swapping ----------------------------------------------------------------- */ -/* - * This performs data swapping for a folded trie (see utrie.c for details). - */ - -U_CAPI int32_t U_EXPORT2 -utrie_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UTrieHeader *inTrie; - UTrieHeader trie; - int32_t size; - UBool dataIs32; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - if(length>=0 && (uint32_t)lengthreadUInt32(inTrie->signature); - trie.options=ds->readUInt32(inTrie->options); - trie.indexLength=udata_readInt32(ds, inTrie->indexLength); - trie.dataLength=udata_readInt32(ds, inTrie->dataLength); - - if( trie.signature!=0x54726965 || - (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || - ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || - trie.indexLength=0) { - UTrieHeader *outTrie; - - if(lengthswapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); - - /* swap the index and the data */ - if(dataIs32) { - ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); - ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, - (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); - } else { - ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); - } - } - - return size; -} - #if !UCONFIG_NO_COLLATION U_CAPI UBool U_EXPORT2 diff --git a/deps/icu-small/source/common/ucptrie.cpp b/deps/icu-small/source/common/ucptrie.cpp new file mode 100644 index 00000000000000..13496ad56c5e58 --- /dev/null +++ b/deps/icu-small/source/common/ucptrie.cpp @@ -0,0 +1,590 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrie.cpp (modified from utrie2.cpp) +// created: 2017dec29 Markus W. Scherer + +// #define UCPTRIE_DEBUG +#ifdef UCPTRIE_DEBUG +# include +#endif + +#include "unicode/utypes.h" +#include "unicode/ucptrie.h" +#include "unicode/utf.h" +#include "unicode/utf8.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "uassert.h" +#include "ucptrie_impl.h" + +U_CAPI UCPTrie * U_EXPORT2 +ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, + const void *data, int32_t length, int32_t *pActualLength, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + + if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) || + type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type || + valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + + // Enough data for a trie header? + if (length < (int32_t)sizeof(UCPTrieHeader)) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + // Check the signature. + const UCPTrieHeader *header = (const UCPTrieHeader *)data; + if (header->signature != UCPTRIE_SIG) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + int32_t options = header->options; + int32_t typeInt = (options >> 6) & 3; + int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK; + if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 || + (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + UCPTrieType actualType = (UCPTrieType)typeInt; + UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt; + if (type < 0) { + type = actualType; + } + if (valueWidth < 0) { + valueWidth = actualValueWidth; + } + if (type != actualType || valueWidth != actualValueWidth) { + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + // Get the length values and offsets. + UCPTrie tempTrie; + uprv_memset(&tempTrie, 0, sizeof(tempTrie)); + tempTrie.indexLength = header->indexLength; + tempTrie.dataLength = + ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength; + tempTrie.index3NullOffset = header->index3NullOffset; + tempTrie.dataNullOffset = + ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset; + + tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2; + tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12; + tempTrie.type = type; + tempTrie.valueWidth = valueWidth; + + // Calculate the actual length. + int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2; + if (valueWidth == UCPTRIE_VALUE_BITS_16) { + actualLength += tempTrie.dataLength * 2; + } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { + actualLength += tempTrie.dataLength * 4; + } else { + actualLength += tempTrie.dataLength; + } + if (length < actualLength) { + *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes. + return nullptr; + } + + // Allocate the trie. + UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie)); + if (trie == nullptr) { + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); +#ifdef UCPTRIE_DEBUG + trie->name = "fromSerialized"; +#endif + + // Set the pointers to its index and data arrays. + const uint16_t *p16 = (const uint16_t *)(header + 1); + trie->index = p16; + p16 += trie->indexLength; + + // Get the data. + int32_t nullValueOffset = trie->dataNullOffset; + if (nullValueOffset >= trie->dataLength) { + nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + } + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + trie->data.ptr16 = p16; + trie->nullValue = trie->data.ptr16[nullValueOffset]; + break; + case UCPTRIE_VALUE_BITS_32: + trie->data.ptr32 = (const uint32_t *)p16; + trie->nullValue = trie->data.ptr32[nullValueOffset]; + break; + case UCPTRIE_VALUE_BITS_8: + trie->data.ptr8 = (const uint8_t *)p16; + trie->nullValue = trie->data.ptr8[nullValueOffset]; + break; + default: + // Unreachable because valueWidth was checked above. + *pErrorCode = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + if (pActualLength != nullptr) { + *pActualLength = actualLength; + } + return trie; +} + +U_CAPI void U_EXPORT2 +ucptrie_close(UCPTrie *trie) { + uprv_free(trie); +} + +U_CAPI UCPTrieType U_EXPORT2 +ucptrie_getType(const UCPTrie *trie) { + return (UCPTrieType)trie->type; +} + +U_CAPI UCPTrieValueWidth U_EXPORT2 +ucptrie_getValueWidth(const UCPTrie *trie) { + return (UCPTrieValueWidth)trie->valueWidth; +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) { + int32_t i1 = c >> UCPTRIE_SHIFT_1; + if (trie->type == UCPTRIE_TYPE_FAST) { + U_ASSERT(0xffff < c && c < trie->highStart); + i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; + } else { + U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); + i1 += UCPTRIE_SMALL_INDEX_LENGTH; + } + int32_t i3Block = trie->index[ + (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; + int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; + int32_t dataBlock; + if ((i3Block & 0x8000) == 0) { + // 16-bit indexes + dataBlock = trie->index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + i3 &= 7; + dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000; + dataBlock |= trie->index[i3Block + i3]; + } + return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK); +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) { + UChar32 c = (lt1 << 12) | (t2 << 6) | t3; + if (c >= trie->highStart) { + // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher. + return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + } + return ucptrie_internalSmallIndex(trie, c); +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, + const uint8_t *start, const uint8_t *src) { + int32_t i, length; + // Support 64-bit pointers by avoiding cast of arbitrary difference. + if ((src - start) <= 7) { + i = length = (int32_t)(src - start); + } else { + i = length = 7; + start = src - 7; + } + c = utf8_prevCharSafeBody(start, 0, &i, c, -1); + i = length - i; // Number of bytes read backward from src. + int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c); + return (idx << 3) | i; +} + +namespace { + +inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) { + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + return data.ptr16[dataIndex]; + case UCPTRIE_VALUE_BITS_32: + return data.ptr32[dataIndex]; + case UCPTRIE_VALUE_BITS_8: + return data.ptr8[dataIndex]; + default: + // Unreachable if the trie is properly initialized. + return 0xffffffff; + } +} + +} // namespace + +U_CAPI uint32_t U_EXPORT2 +ucptrie_get(const UCPTrie *trie, UChar32 c) { + int32_t dataIndex; + if ((uint32_t)c <= 0x7f) { + // linear ASCII + dataIndex = c; + } else { + UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX; + dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c); + } + return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex); +} + +namespace { + +constexpr int32_t MAX_UNICODE = 0x10ffff; + +inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue, + UCPMapValueFilter *filter, const void *context) { + if (value == trieNullValue) { + value = nullValue; + } else if (filter != nullptr) { + value = filter(context, value); + } + return value; +} + +UChar32 getRange(const void *t, UChar32 start, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + if ((uint32_t)start > MAX_UNICODE) { + return U_SENTINEL; + } + const UCPTrie *trie = reinterpret_cast(t); + UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; + if (start >= trie->highStart) { + if (pValue != nullptr) { + int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + uint32_t value = getValue(trie->data, valueWidth, di); + if (filter != nullptr) { value = filter(context, value); } + *pValue = value; + } + return MAX_UNICODE; + } + + uint32_t nullValue = trie->nullValue; + if (filter != nullptr) { nullValue = filter(context, nullValue); } + const uint16_t *index = trie->index; + + int32_t prevI3Block = -1; + int32_t prevBlock = -1; + UChar32 c = start; + uint32_t value; + bool haveValue = false; + do { + int32_t i3Block; + int32_t i3; + int32_t i3BlockLength; + int32_t dataBlockLength; + if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) { + i3Block = 0; + i3 = c >> UCPTRIE_FAST_SHIFT; + i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ? + UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; + dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; + } else { + // Use the multi-stage index. + int32_t i1 = c >> UCPTRIE_SHIFT_1; + if (trie->type == UCPTRIE_TYPE_FAST) { + U_ASSERT(0xffff < c && c < trie->highStart); + i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; + } else { + U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); + i1 += UCPTRIE_SMALL_INDEX_LENGTH; + } + i3Block = trie->index[ + (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; + if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) { + // The index-3 block is the same as the previous one, and filled with value. + U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); + c += UCPTRIE_CP_PER_INDEX_2_ENTRY; + continue; + } + prevI3Block = i3Block; + if (i3Block == trie->index3NullOffset) { + // This is the index-3 null block. + if (haveValue) { + if (nullValue != value) { + return c - 1; + } + } else { + value = nullValue; + if (pValue != nullptr) { *pValue = nullValue; } + haveValue = true; + } + prevBlock = trie->dataNullOffset; + c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); + continue; + } + i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; + i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH; + dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + } + // Enumerate data blocks for one index-3 block. + do { + int32_t block; + if ((i3Block & 0x8000) == 0) { + block = index[i3Block + i3]; + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); + int32_t gi = i3 & 7; + block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000; + block |= index[group + gi]; + } + if (block == prevBlock && (c - start) >= dataBlockLength) { + // The block is the same as the previous one, and filled with value. + U_ASSERT((c & (dataBlockLength - 1)) == 0); + c += dataBlockLength; + } else { + int32_t dataMask = dataBlockLength - 1; + prevBlock = block; + if (block == trie->dataNullOffset) { + // This is the data null block. + if (haveValue) { + if (nullValue != value) { + return c - 1; + } + } else { + value = nullValue; + if (pValue != nullptr) { *pValue = nullValue; } + haveValue = true; + } + c = (c + dataBlockLength) & ~dataMask; + } else { + int32_t di = block + (c & dataMask); + uint32_t value2 = getValue(trie->data, valueWidth, di); + value2 = maybeFilterValue(value2, trie->nullValue, nullValue, + filter, context); + if (haveValue) { + if (value2 != value) { + return c - 1; + } + } else { + value = value2; + if (pValue != nullptr) { *pValue = value; } + haveValue = true; + } + while ((++c & dataMask) != 0) { + if (maybeFilterValue(getValue(trie->data, valueWidth, ++di), + trie->nullValue, nullValue, + filter, context) != value) { + return c - 1; + } + } + } + } + } while (++i3 < i3BlockLength); + } while (c < trie->highStart); + U_ASSERT(haveValue); + int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; + uint32_t highValue = getValue(trie->data, valueWidth, di); + if (maybeFilterValue(highValue, trie->nullValue, nullValue, + filter, context) != value) { + return c - 1; + } else { + return MAX_UNICODE; + } +} + +} // namespace + +U_CFUNC UChar32 +ucptrie_internalGetRange(UCPTrieGetRange *getRange, + const void *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + if (option == UCPMAP_RANGE_NORMAL) { + return getRange(trie, start, filter, context, pValue); + } + uint32_t value; + if (pValue == nullptr) { + // We need to examine the range value even if the caller does not want it. + pValue = &value; + } + UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff; + UChar32 end = getRange(trie, start, filter, context, pValue); + if (end < 0xd7ff || start > surrEnd) { + return end; + } + // The range overlaps with surrogates, or ends just before the first one. + if (*pValue == surrogateValue) { + if (end >= surrEnd) { + // Surrogates followed by a non-surrogateValue range, + // or surrogates are part of a larger surrogateValue range. + return end; + } + } else { + if (start <= 0xd7ff) { + return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates. + } + // Start is a surrogate with a non-surrogateValue code *unit* value. + // Return a surrogateValue code *point* range. + *pValue = surrogateValue; + if (end > surrEnd) { + return surrEnd; // Surrogate range ends before non-surrogateValue rest of range. + } + } + // See if the surrogateValue surrogate range can be merged with + // an immediately following range. + uint32_t value2; + UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2); + if (value2 == surrogateValue) { + return end2; + } + return surrEnd; +} + +U_CAPI UChar32 U_EXPORT2 +ucptrie_getRange(const UCPTrie *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + return ucptrie_internalGetRange(getRange, trie, start, + option, surrogateValue, + filter, context, pValue); +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_toBinary(const UCPTrie *trie, + void *data, int32_t capacity, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return 0; + } + + UCPTrieType type = (UCPTrieType)trie->type; + UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; + if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || + valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth || + capacity < 0 || + (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2; + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + length += trie->dataLength * 2; + break; + case UCPTRIE_VALUE_BITS_32: + length += trie->dataLength * 4; + break; + case UCPTRIE_VALUE_BITS_8: + length += trie->dataLength; + break; + default: + // unreachable + break; + } + if (capacity < length) { + *pErrorCode = U_BUFFER_OVERFLOW_ERROR; + return length; + } + + char *bytes = (char *)data; + UCPTrieHeader *header = (UCPTrieHeader *)bytes; + header->signature = UCPTRIE_SIG; // "Tri3" + header->options = (uint16_t)( + ((trie->dataLength & 0xf0000) >> 4) | + ((trie->dataNullOffset & 0xf0000) >> 8) | + (trie->type << 6) | + valueWidth); + header->indexLength = (uint16_t)trie->indexLength; + header->dataLength = (uint16_t)trie->dataLength; + header->index3NullOffset = trie->index3NullOffset; + header->dataNullOffset = (uint16_t)trie->dataNullOffset; + header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2; + bytes += sizeof(UCPTrieHeader); + + uprv_memcpy(bytes, trie->index, trie->indexLength * 2); + bytes += trie->indexLength * 2; + + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2); + break; + case UCPTRIE_VALUE_BITS_32: + uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4); + break; + case UCPTRIE_VALUE_BITS_8: + uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength); + break; + default: + // unreachable + break; + } + return length; +} + +namespace { + +#ifdef UCPTRIE_DEBUG +long countNull(const UCPTrie *trie) { + uint32_t nullValue=trie->nullValue; + int32_t length=trie->dataLength; + long count=0; + switch (trie->valueWidth) { + case UCPTRIE_VALUE_BITS_16: + for(int32_t i=0; idata.ptr16[i]==nullValue) { ++count; } + } + break; + case UCPTRIE_VALUE_BITS_32: + for(int32_t i=0; idata.ptr32[i]==nullValue) { ++count; } + } + break; + case UCPTRIE_VALUE_BITS_8: + for(int32_t i=0; idata.ptr8[i]==nullValue) { ++count; } + } + break; + default: + // unreachable + break; + } + return count; +} + +U_CFUNC void +ucptrie_printLengths(const UCPTrie *trie, const char *which) { + long indexLength=trie->indexLength; + long dataLength=(long)trie->dataLength; + long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+ + dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 : + trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1); + printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n", + which, trie->name, indexLength, dataLength, countNull(trie), totalLength); +} +#endif + +} // namespace + +// UCPMap ---- +// Initially, this is the same as UCPTrie. This may well change. + +U_CAPI uint32_t U_EXPORT2 +ucpmap_get(const UCPMap *map, UChar32 c) { + return ucptrie_get(reinterpret_cast(map), c); +} + +U_CAPI UChar32 U_EXPORT2 +ucpmap_getRange(const UCPMap *map, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + return ucptrie_getRange(reinterpret_cast(map), start, + option, surrogateValue, + filter, context, pValue); +} diff --git a/deps/icu-small/source/common/ucptrie_impl.h b/deps/icu-small/source/common/ucptrie_impl.h new file mode 100644 index 00000000000000..1fe6a18ac5319e --- /dev/null +++ b/deps/icu-small/source/common/ucptrie_impl.h @@ -0,0 +1,289 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrie_impl.h (modified from utrie2_impl.h) +// created: 2017dec29 Markus W. Scherer + +#ifndef __UCPTRIE_IMPL_H__ +#define __UCPTRIE_IMPL_H__ + +#include "unicode/ucptrie.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif + +// UCPTrie signature values, in platform endianness and opposite endianness. +// The UCPTrie signature ASCII byte values spell "Tri3". +#define UCPTRIE_SIG 0x54726933 +#define UCPTRIE_OE_SIG 0x33697254 + +/** + * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie. + * @internal + */ +struct UCPTrieHeader { + /** "Tri3" in big-endian US-ASCII (0x54726933) */ + uint32_t signature; + + /** + * Options bit field: + * Bits 15..12: Data length bits 19..16. + * Bits 11..8: Data null block offset bits 19..16. + * Bits 7..6: UCPTrieType + * Bits 5..3: Reserved (0). + * Bits 2..0: UCPTrieValueWidth + */ + uint16_t options; + + /** Total length of the index tables. */ + uint16_t indexLength; + + /** Data length bits 15..0. */ + uint16_t dataLength; + + /** Index-3 null block offset, 0x7fff or 0xffff if none. */ + uint16_t index3NullOffset; + + /** Data null block offset bits 15..0, 0xfffff if none. */ + uint16_t dataNullOffset; + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by UCPTRIE_SHIFT_2. + */ + uint16_t shiftedHighStart; +}; + +/** + * Constants for use with UCPTrieHeader.options. + * @internal + */ +enum { + UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000, + UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00, + UCPTRIE_OPTIONS_RESERVED_MASK = 0x38, + UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7, + /** + * Value for index3NullOffset which indicates that there is no index-3 null block. + * Bit 15 is unused for this value because this bit is used if the index-3 contains + * 18-bit indexes. + */ + UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff, + UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff +}; + +// Internal constants. +enum { + /** The length of the BMP index table. 1024=0x400 */ + UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT, + + UCPTRIE_SMALL_LIMIT = 0x1000, + UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT, + + /** Shift size for getting the index-3 table offset. */ + UCPTRIE_SHIFT_3 = 4, + + /** Shift size for getting the index-2 table offset. */ + UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3, + + /** Shift size for getting the index-1 table offset. */ + UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2, + + /** + * Difference between two shift sizes, + * for getting an index-2 offset from an index-3 offset. 5=9-4 + */ + UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3, + + /** + * Difference between two shift sizes, + * for getting an index-1 offset from an index-2 offset. 5=14-9 + */ + UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2, + + /** + * Number of index-1 entries for the BMP. (4) + * This part of the index-1 table is omitted from the serialized form. + */ + UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1, + + /** Number of entries in an index-2 block. 32=0x20 */ + UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2, + + /** Mask for getting the lower bits for the in-index-2-block offset. */ + UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1, + + /** Number of code points per index-2 table entry. 512=0x200 */ + UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2, + + /** Number of entries in an index-3 block. 32=0x20 */ + UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3, + + /** Mask for getting the lower bits for the in-index-3-block offset. */ + UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1, + + /** Number of entries in a small data block. 16=0x10 */ + UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3, + + /** Mask for getting the lower bits for the in-small-data-block offset. */ + UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1 +}; + +typedef UChar32 +UCPTrieGetRange(const void *trie, UChar32 start, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +U_CFUNC UChar32 +ucptrie_internalGetRange(UCPTrieGetRange *getRange, + const void *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +#ifdef UCPTRIE_DEBUG +U_CFUNC void +ucptrie_printLengths(const UCPTrie *trie, const char *which); + +U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name); +#endif + +/* + * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie. + * For overview information see http://site.icu-project.org/design/struct/utrie + * + * The binary trie data should be 32-bit-aligned. + * The overall layout is: + * + * UCPTrieHeader header; -- 16 bytes, see struct definition above + * uint16_t index[header.indexLength]; + * uintXY_t data[header.dataLength]; + * + * The trie data array is an array of uint16_t, uint32_t, or uint8_t, + * specified via the UCPTrieValueWidth when building the trie. + * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned. + * The overall length of the trie data is a multiple of 4 bytes. + * (Padding is added at the end of the index array and/or near the end of the data array as needed.) + * + * The length of the data array (dataLength) is stored as an integer split across two fields + * of the header struct (high bits in header.options). + * + * The trie type can be "fast" or "small" which determines the index structure, + * specified via the UCPTrieType when building the trie. + * + * The type and valueWidth are stored in the header.options. + * There are reserved type and valueWidth values, and reserved header.options bits. + * They could be used in future format extensions. + * Code reading the trie structure must fail with an error when unknown values or options are set. + * + * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array. + * + * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup. + * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000. + * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000. + * + * All code points in the range highStart..U+10FFFF map to a single highValue + * which is stored at the second-to-last position of the data array. + * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.) + * The highStart value is header.shiftedHighStart<>UCPTRIE_SHIFT_1. + * (For 0x100000 supplementary code points U+10000..U+10ffff.) + * + * After this index-1 table follow the variable-length index-3 and index-2 tables. + * + * The supplementary index tables are omitted completely + * if there is only BMP data (highStart<=U+10000). + * + * For a "small" trie: + * + * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF. + * + * The "supplementary" index tables are always stored. + * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1. + * + * For both trie types: + * + * The last index-2 block may be a partial block, storing indexes only for code points + * below highStart. + * + * Lookup for ASCII code point c: + * + * Linear access from the start of the data array. + * + * value = data[c]; + * + * Lookup for fast-range code point c: + * + * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits, + * fetch the index array value at that offset, + * add the lower code point bits, index into the data array. + * + * value = data[index[c>>6] + (c&0x3f)]; + * + * (This works for ASCII as well.) + * + * Lookup for small-range code point c below highStart: + * + * Split the code point into four bit fields using several sets of shifts & masks + * to read consecutive values from the index-1, index-2, index-3 and data tables. + * + * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff), + * then the data block offsets are stored directly as uint16_t. + * + * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block + * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by + * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored + * in the additional word. + * + * See ucptrie_internalSmallIndex() for details. + * + * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.) + * + * Compaction: + * + * Multiple code point ranges ("blocks") that are aligned on certain boundaries + * (determined by the shifting/bit fields of code points) and + * map to the same data values normally share a single subsequence of the data array. + * Data blocks can also overlap partially. + * (Depending on the builder code finding duplicate and overlapping blocks.) + * + * Iteration over same-value ranges: + * + * Range iteration (ucptrie_getRange()) walks the structure from a start code point + * until some code point is found that maps to a different value; + * the end of the returned range is just before that. + * + * The header.dataNullOffset (split across two header fields, high bits in header.options) + * is the offset of a widely shared data block filled with one single value. + * It helps quickly skip over large ranges of data with that value. + * The builder must ensure that if the start of any data block (fast or small) + * matches the dataNullOffset, then the whole block must be filled with the null value. + * Special care must be taken if there is no fast null data block + * but a small one, which is shorter, and it matches the *start* of some fast data block. + * + * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block + * where all index entries point to the dataNullOffset. + * If there is no such data or index-3 block, then these offsets are set to + * values that cannot be reached (data offset out of range/reserved index offset), + * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively. + */ + +#endif diff --git a/deps/icu-small/source/common/ucurr.cpp b/deps/icu-small/source/common/ucurr.cpp index bfde3a9df9c2d1..5c9bbef70097e7 100644 --- a/deps/icu-small/source/common/ucurr.cpp +++ b/deps/icu-small/source/common/ucurr.cpp @@ -1077,11 +1077,11 @@ collectCurrencyNames(const char* locale, } // currency plurals - UErrorCode ec3 = U_ZERO_ERROR; - UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec3); + UErrorCode ec5 = U_ZERO_ERROR; + UResourceBundle* curr_p = ures_getByKey(rb, CURRENCYPLURALS, NULL, &ec5); n = ures_getSize(curr_p); for (int32_t i=0; ilocale) == 0) { @@ -1469,7 +1469,6 @@ getCacheEntry(const char* locale, UErrorCode& ec) { } umtx_lock(&gCurrencyCacheMutex); // check again. - int8_t found = -1; for (int8_t i = 0; i < CURRENCY_NAME_CACHE_NUM; ++i) { if (currCache[i]!= NULL && uprv_strcmp(locale, currCache[i]->locale) == 0) { diff --git a/deps/icu-small/source/common/udata.cpp b/deps/icu-small/source/common/udata.cpp index c15cb78a74ad7a..99efbc97eed737 100644 --- a/deps/icu-small/source/common/udata.cpp +++ b/deps/icu-small/source/common/udata.cpp @@ -418,7 +418,8 @@ class UDataPathIterator const char *path; /* working path (u_icudata_Dir) */ const char *nextPath; /* path following this one */ const char *basename; /* item's basename (icudt22e_mt.res)*/ - const char *suffix; /* item suffix (can be null) */ + + StringPiece suffix; /* item suffix (can be null) */ uint32_t basenameLen; /* length of basename */ @@ -432,13 +433,15 @@ class UDataPathIterator }; /** - * @param iter The iterator to be initialized. Its current state does not matter. - * @param path The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME - * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leave directories such as /icudt28l - * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat - * @param suffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly. - * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2. - * '/blarg/stuff.dat' would also be found. + * @param iter The iterator to be initialized. Its current state does not matter. + * @param inPath The full pathname to be iterated over. If NULL, defaults to U_ICUDATA_NAME + * @param pkg Package which is being searched for, ex "icudt28l". Will ignore leaf directories such as /icudt28l + * @param item Item to be searched for. Can include full path, such as /a/b/foo.dat + * @param inSuffix Optional item suffix, if not-null (ex. ".dat") then 'path' can contain 'item' explicitly. + * Ex: 'stuff.dat' would be found in '/a/foo:/tmp/stuff.dat:/bar/baz' as item #2. + * '/blarg/stuff.dat' would also be found. + * Note: inSuffix may also be the 'item' being searched for as well, (ex: "ibm-5348_P100-1997.cnv"), in which case + * the 'item' parameter is often the same as pkg. (Though sometimes might have a tree part as well, ex: "icudt62l-curr"). */ UDataPathIterator::UDataPathIterator(const char *inPath, const char *pkg, const char *item, const char *inSuffix, UBool doCheckLastFour, @@ -566,7 +569,7 @@ const char *UDataPathIterator::next(UErrorCode *pErrorCode) if(checkLastFour == TRUE && (pathLen>=4) && - uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix, 4)==0 && /* suffix matches */ + uprv_strncmp(pathBuffer.data() +(pathLen-4), suffix.data(), 4)==0 && /* suffix matches */ uprv_strncmp(findBasename(pathBuffer.data()), basename, basenameLen)==0 && /* base matches */ uprv_strlen(pathBasename)==(basenameLen+4)) { /* base+suffix = full len */ @@ -602,8 +605,13 @@ const char *UDataPathIterator::next(UErrorCode *pErrorCode) /* + basename */ pathBuffer.append(packageStub.data()+1, packageStub.length()-1, *pErrorCode); - if(*suffix) /* tack on suffix */ + if (!suffix.empty()) /* tack on suffix */ { + if (suffix.length() > 4) { + // If the suffix is actually an item ("ibm-5348_P100-1997.cnv") and not an extension (".res") + // then we need to ensure that the path ends with a separator. + pathBuffer.ensureEndsWithFileSeparator(*pErrorCode); + } pathBuffer.append(suffix, *pErrorCode); } } @@ -751,16 +759,19 @@ openCommonData(const char *path, /* Path from OpenChoice? */ UDataPathIterator iter(u_getDataDirectory(), inBasename, path, ".dat", TRUE, pErrorCode); - while((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL) + while ((UDataMemory_isLoaded(&tData)==FALSE) && (pathBuffer = iter.next(pErrorCode)) != NULL) { #ifdef UDATA_DEBUG fprintf(stderr, "ocd: trying path %s - ", pathBuffer); #endif - uprv_mapFile(&tData, pathBuffer); + uprv_mapFile(&tData, pathBuffer, pErrorCode); #ifdef UDATA_DEBUG fprintf(stderr, "%s\n", UDataMemory_isLoaded(&tData)?"LOADED":"not loaded"); #endif } + if (U_FAILURE(*pErrorCode)) { + return NULL; + } #if defined(OS390_STUBDATA) && defined(OS390BATCH) if (!UDataMemory_isLoaded(&tData)) { @@ -769,7 +780,7 @@ openCommonData(const char *path, /* Path from OpenChoice? */ uprv_strncpy(ourPathBuffer, path, 1019); ourPathBuffer[1019]=0; uprv_strcat(ourPathBuffer, ".dat"); - uprv_mapFile(&tData, ourPathBuffer); + uprv_mapFile(&tData, ourPathBuffer, pErrorCode); } #endif @@ -860,7 +871,7 @@ static UBool extendICUData(UErrorCode *pErr) umtx_unlock(&extendICUDataMutex); #endif return didUpdate; /* Return true if ICUData pointer was updated. */ - /* (Could potentialy have been done by another thread racing */ + /* (Could potentially have been done by another thread racing */ /* us through here, but that's fine, we still return true */ /* so that current thread will also examine extended data. */ } @@ -986,12 +997,12 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName, /* init path iterator for individual files */ UDataPathIterator iter(dataPath, pkgName, path, tocEntryPathSuffix, FALSE, pErrorCode); - while((pathBuffer = iter.next(pErrorCode)) != NULL) + while ((pathBuffer = iter.next(pErrorCode)) != NULL) { #ifdef UDATA_DEBUG fprintf(stderr, "UDATA: trying individual file %s\n", pathBuffer); #endif - if(uprv_mapFile(&dataMemory, pathBuffer)) + if (uprv_mapFile(&dataMemory, pathBuffer, pErrorCode)) { pEntryData = checkDataItem(dataMemory.pHeader, isAcceptable, context, type, name, subErrorCode, pErrorCode); if (pEntryData != NULL) { @@ -1007,7 +1018,7 @@ static UDataMemory *doLoadFromIndividualFiles(const char *pkgName, return pEntryData; } - /* the data is not acceptable, or some error occured. Either way, unmap the memory */ + /* the data is not acceptable, or some error occurred. Either way, unmap the memory */ udata_close(&dataMemory); /* If we had a nasty error, bail out completely. */ @@ -1076,6 +1087,11 @@ static UDataMemory *doLoadFromCommonData(UBool isICUData, const char * /*pkgName } } } + // If we failed due to being out-of-memory, then stop early and report the error. + if (*subErrorCode == U_MEMORY_ALLOCATION_ERROR) { + *pErrorCode = *subErrorCode; + return NULL; + } /* Data wasn't found. If we were looking for an ICUData item and there is * more data available, load it and try again, * otherwise break out of this loop. */ @@ -1252,7 +1268,8 @@ doOpenChoice(const char *path, const char *type, const char *name, tocEntryName.append(".", *pErrorCode).append(type, *pErrorCode); tocEntryPath.append(".", *pErrorCode).append(type, *pErrorCode); } - tocEntryPathSuffix = tocEntryPath.data()+tocEntrySuffixIndex; /* suffix starts here */ + // The +1 is for the U_FILE_SEP_CHAR that is always appended above. + tocEntryPathSuffix = tocEntryPath.data() + tocEntrySuffixIndex + 1; /* suffix starts here */ #ifdef UDATA_DEBUG fprintf(stderr, " tocEntryName = %s\n", tocEntryName.data()); diff --git a/deps/icu-small/source/common/udataswp.h b/deps/icu-small/source/common/udataswp.h index 5303870b1d3046..5e7b043c4c934c 100644 --- a/deps/icu-small/source/common/udataswp.h +++ b/deps/icu-small/source/common/udataswp.h @@ -333,6 +333,43 @@ uprv_compareInvEbcdic(const UDataSwapper *ds, # error Unknown charset family! #endif +// utrie_swap.cpp -----------------------------------------------------------*** + +/** + * Swaps a serialized UTrie. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swaps a serialized UTrie2. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swaps a serialized UCPTrie. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +ucptrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); + +/** + * Swaps a serialized UTrie, UTrie2, or UCPTrie. + * @internal + */ +U_CAPI int32_t U_EXPORT2 +utrie_swapAnyVersion(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode); /* material... -------------------------------------------------------------- */ diff --git a/deps/icu-small/source/common/uhash.cpp b/deps/icu-small/source/common/uhash.cpp index a80e7b8ff27b42..239997d05d7e38 100644 --- a/deps/icu-small/source/common/uhash.cpp +++ b/deps/icu-small/source/common/uhash.cpp @@ -218,7 +218,7 @@ _uhash_allocate(UHashtable *hash, U_ASSERT(primeIndex >= 0 && primeIndex < PRIMES_LENGTH); - hash->primeIndex = primeIndex; + hash->primeIndex = static_cast(primeIndex); hash->length = PRIMES[primeIndex]; p = hash->elements = (UHashElement*) @@ -860,13 +860,13 @@ uhash_hashUChars(const UHashTok key) { U_CAPI int32_t U_EXPORT2 uhash_hashChars(const UHashTok key) { const char *s = (const char *)key.pointer; - return s == NULL ? 0 : static_cast(ustr_hashCharsN(s, uprv_strlen(s))); + return s == NULL ? 0 : static_cast(ustr_hashCharsN(s, static_cast(uprv_strlen(s)))); } U_CAPI int32_t U_EXPORT2 uhash_hashIChars(const UHashTok key) { const char *s = (const char *)key.pointer; - return s == NULL ? 0 : ustr_hashICharsN(s, uprv_strlen(s)); + return s == NULL ? 0 : ustr_hashICharsN(s, static_cast(uprv_strlen(s))); } U_CAPI UBool U_EXPORT2 diff --git a/deps/icu-small/source/common/uinvchar.h b/deps/icu-small/source/common/uinvchar.h index c4f9f88b9ad32f..56dddfa8fde9bb 100644 --- a/deps/icu-small/source/common/uinvchar.h +++ b/deps/icu-small/source/common/uinvchar.h @@ -53,22 +53,6 @@ uprv_isInvariantString(const char *s, int32_t length); U_INTERNAL UBool U_EXPORT2 uprv_isInvariantUString(const UChar *s, int32_t length); -#ifdef __cplusplus - -/** - * Check if a UnicodeString only contains invariant characters. - * See utypes.h for details. - * - * @param s Input string. - * @return TRUE if s contains only invariant characters. - */ -U_INTERNAL inline UBool U_EXPORT2 -uprv_isInvariantUnicodeString(const icu::UnicodeString &s) { - return uprv_isInvariantUString(icu::toUCharPtr(s.getBuffer()), s.length()); -} - -#endif /* __cplusplus */ - /** * \def U_UPPER_ORDINAL * Get the ordinal number of an uppercase invariant character diff --git a/deps/icu-small/source/common/ulayout_props_data.h b/deps/icu-small/source/common/ulayout_props_data.h new file mode 100644 index 00000000000000..f42d15fc830afd --- /dev/null +++ b/deps/icu-small/source/common/ulayout_props_data.h @@ -0,0 +1,722 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +// +// file name: ulayout_props_data.h +// +// machine-generated by: icu/tools/unicode/c/genprops/layoutpropsbuilder.cpp + + +#ifdef INCLUDED_FROM_UPROPS_CPP + +static const int32_t maxInPCValue = 14; + +static const uint16_t inpc_trieIndex[765]={ +0,0x40,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x80,0xc0,0xff,0x13f,0x17e,0x1be,0x17e,0x1fe,0x23e,0x27e,0x2bc,0x2fc, +0x33c,0x37b,0x23e,0x3bb,0x3fb,0x439,0x477,0x4ad,0x4e1,0x521,0x531,0x571,0x599,0x5d9,0x619,0x656, +0x2b7,0x2c6,0x2d2,0x2c6,0x2ed,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0,0x10,0x20, +0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20, +0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0x80,0x90,0xa0, +0xb0,0xc0,0xd0,0xe0,0xf0,0xff,0x10f,0x11f,0x12f,0x13f,0x14f,0x15f,0x16f,0x17e,0x18e,0x19e, +0x1ae,0x1be,0x1ce,0x1de,0x1ee,0x17e,0x18e,0x19e,0x1ae,0x1fe,0x20e,0x21e,0x22e,0x23e,0x24e,0x25e, +0x26e,0x27e,0x28e,0x29e,0x2ae,0x2bc,0x2cc,0x2dc,0x2ec,0x2fc,0x30c,0x31c,0x32c,0x33c,0x34c,0x35c, +0x36c,0x37b,0x38b,0x39b,0x3ab,0x23e,0x24e,0x25e,0x26e,0x3bb,0x3cb,0x3db,0x3eb,0x3fb,0x40b,0x41b, +0x42b,0x439,0x449,0x459,0x469,0x477,0x487,0x497,0x4a7,0x4ad,0x4bd,0x4cd,0x4dd,0x4e1,0x4f1,0x501, +0x511,0x521,0x531,0x541,0x551,0x531,0x541,0x551,0x561,0x571,0x581,0x591,0x5a1,0x599,0x5a9,0x5b9, +0x5c9,0x5d9,0x5e9,0x5f9,0x609,0x619,0x629,0x639,0x649,0x656,0x666,0x676,0x686,0,0,0x68b, +0x69a,0,0x6a9,0x6b8,0x6c7,0x6d5,0x6e5,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x6f3,0,0x6f3, +0,0x701,0,0x701,0,0,0,0x70b,0x71b,0x729,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x739,0x749,0,0, +0,0,0,0,0,0x759,0x768,0,0,0,0x772,0,0,0,0x77e,0x78d, +0x79b,0,0,0,0,0,0,0,0,0x7ab,0,0,0x7b7,0x7c7,0,0x7cc, +0x52c,0x81,0,0x7dc,0,0,0,0x7ea,0x3fb,0,0,0x7fa,0x807,0,0,0, +0,0,0,0,0,0,0x817,0x827,0x835,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x2b3,0x83f,0,0x84c,0,0,0,0, +0,0x101,0,0,0x858,0x864,0,0x874,0x882,0,0,0x892,0,0x8a0,0x3fb,0, +0,0x80,0,0,0x8b0,0x8c0,0,0x2b9,0,0,0x8c7,0x8d6,0x8e3,0,0,0x8f1, +0,0,0,0x901,0x2bd,0,0x911,0x151,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0x921,0,0x930,0,0,0x940,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x950,0,0,0x958,0x966,0,0,0, +0x81,0,0,0x976,0,0,0,0,0x52d,0,0x981,0x991,0x3cb,0,0,0x659, +0x81,0,0,0x99e,0x9ae,0,0,0,0x9bb,0x9cb,0,0,0,0,0,0, +0,0,0,0x71,0x9db,0,0xff,0,0,0x9e6,0x9f6,0x14f,0xa04,0x52b,0,0, +0,0,0,0,0,0,0x99c,0xa14,0x16f,0,0,0,0,0,0xa24,0xa33, +0,0,0,0,0,0,0,0,0,0,0,0,0,0x2eb,0xa43,0xe3, +0x214,0,0,0,0xa53,0x2be,0,0,0,0,0,0xa63,0xa73,0,0,0, +0,0,0xa7b,0xa8b,0,0,0,0,0,0,0,0,0,0,0,0, +0,0xa97,0xaa6,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xab5, +0,0,0xac2,0,0xad1,0,0,0xadd,0xae7,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x2eb, +0xaf7,0,0,0,0,0,0xb07,0xb0f,0xb1e,0,0,0,0,0,0,0, +0xb2d,0xb3c,0,0,0,0xb44,0xb54,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0xb61,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0x45,0x4d,0x4d,0x4d,0x5d,0x7d,0x9d,0xbd,0xdd, +2,2,0xec,0x10a,0x129,0x149,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +2,2,2,2,2,2,0x169,0x188,2,2,2,2,2,2,2,2, +2,2,0x1a8,2,2,0x1c8,0x1e6,0x203,0x221,0x23f,0x25f,0x27d,0x297 +}; + +static const uint8_t inpc_trieData[2930]={ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +8,8,8,7,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,8,7,1,0,7,4, +7,1,1,1,1,8,8,8,8,7,7,7,7,1,4,7, +0,8,1,8,8,8,1,1,0,0,0,0,0,0,0,0, +0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +8,7,7,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,1,0,7,4,7, +1,1,1,1,0,0,4,4,0,0,5,5,1,0,0,0, +0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0, +0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,8, +8,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,7,4,7,1, +1,0,0,0,0,8,8,0,0,8,8,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,8, +0,0,0,1,0,0,0,0,0,0,0,0,0,0,7,1, +1,1,1,8,0,8,8,0xd,0,7,7,1,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,8,8,8,8,8,8,0,8, +7,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,0,7,8,7,1, +1,1,1,0,0,4,0xb,0,0,5,0xc,1,0,0,0,0, +0,0,0,0,8,0xd,0,0,0,0,0,0,0,0,0,0, +1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,7,7,8,7,7,0, +0,0,4,4,4,0,5,5,5,8,0,0,0,0,0,0, +0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,8,7,7,7, +8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,8,8,7,7,7,7, +0,8,8,9,0,8,8,8,8,0,0,0,0,0,0,0, +8,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xd,7,7,7,7, +0,8,0xd,0xd,0,0xd,0xd,8,8,0,0,0,0,0,0,0, +7,7,0,0,0,0,0,0,0,0,0,0,0,1,1,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,8,8,7,7,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,8,8,0,7,7,7,1,1,0,4, +4,4,0,5,5,5,8,0,0,0,0,0,0,0,0,0, +7,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,7,7,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,8,0,0,0,0,7,7,7,8, +8,1,0,1,0,7,4,0xb,4,5,0xc,5,7,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7, +7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,7,8,7,7,8,8,8,8,1,1,1,0,0,0,0, +0,0xe,0xe,0xe,0xe,0xe,7,0,8,8,8,8,8,8,8,8, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,7,8,7,7,8,8,8,8,1,1,0,8,1,0,0, +0,0xe,0xe,0xe,0xe,0xe,0,0,0,8,8,8,8,8,8,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0, +1,0,8,0,0,0,0,7,4,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,1,8,9,1,1,9, +9,9,9,8,8,8,8,8,7,8,9,8,8,1,0,8, +8,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1, +1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,0,0,0,0,0,0,1,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,7,7,8,8,1,4,8,8,8,8, +8,1,7,0,8,7,0,1,1,0,0,0,0,0,0,7, +7,1,1,0,0,0,0,1,1,0,7,7,7,0,0,7, +7,7,7,7,7,7,0,0,8,8,8,8,0,0,0,0, +0,0,0,0,0,0,0,1,7,4,8,8,7,7,7,7, +7,7,1,0,7,0,0,0,0,0,0,0,0,0,0,7, +7,7,8,0,0,8,1,1,0,0,0,0,0,0,0,0, +0,0,0,8,1,0,0,0,0,0,0,0,0,0,0,0, +0,7,8,8,8,8,1,1,1,0xb,0xc,5,4,4,4,5, +5,8,7,7,8,8,8,8,8,8,8,0,8,0,0,0, +0,0,0,0,0,0,8,0,0,8,8,1,7,7,0xd,0xd, +8,8,7,7,7,0,0,0,0,7,7,1,7,7,7,7, +7,7,1,8,1,0,0,0,0,7,7,7,7,7,0xe,0xe, +0xe,7,7,0xe,7,7,7,7,7,0,0,0,0,0,0,0, +7,7,0,0,0,0,0,0,0,8,1,4,7,8,0,0, +0,0,0,4,1,7,8,8,8,1,1,1,1,0,7,8, +7,7,8,8,8,8,1,1,8,1,7,4,4,4,8,8, +8,8,8,8,8,8,8,8,0,0,1,8,8,8,8,7, +0,0,0,0,0,0,0,0,0,0,0,8,7,8,8,1, +1,1,3,9,0xa,4,4,5,5,8,0xd,7,0,0,0,0, +0,0,0,0,0,0,0,8,1,8,8,8,0,7,1,1, +8,1,4,7,8,8,7,0,1,1,0,0,0,0,0,0, +8,7,8,8,7,7,7,8,7,8,0,0,0,0,7,7, +7,4,4,0xb,7,7,1,8,8,8,8,4,4,8,1,0, +0,0,0,0,0,0,0,8,8,8,0,6,1,1,1,1, +1,8,8,1,1,1,1,8,7,6,6,6,6,6,6,6, +0,0,0,0,1,0,0,0,0,8,0,0,7,0,0,0, +0,0,0,0,0,8,0,0,0,0,8,0,0,0,0,7, +7,1,8,7,0,0,0,0,0,0,0,0,7,7,7,7, +7,7,7,7,7,7,7,7,1,8,0,0,0,0,0,0, +0,0,0,0,8,8,8,8,8,8,8,8,8,8,8,8, +8,8,8,8,0,0,0,0,0,0,0,0,0,0,0,0, +0,8,0,0,0,0,0,0,0,0,0,0,0,1,1,1, +0,0,0,0,0,0,0,1,1,1,8,1,1,1,1,8, +0,0,0,8,7,7,8,8,1,1,4,4,8,7,7,2, +3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +8,8,8,8,1,8,4,8,1,7,4,1,1,0,0,0, +0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,8, +7,0,0,0,0,0,0,0,0,0,0,0,7,8,7,0, +0,8,7,8,8,1,0xe,0xe,8,8,0xe,7,0xe,0xe,7,8, +8,0,0,0,0,0,0,0,0,0,0,0,4,1,8,4, +7,0,0,0,7,7,8,7,7,1,7,7,0,7,1,0, +0,6,1,1,0,8,6,0,0,0,0,0,1,1,1,8, +0,0,0,0,0,0,0,0,8,1,1,0,0,0,0,0, +7,8,7,0,0,0,0,0,0,0,0,0,0,0,0,0, +8,8,8,8,1,1,1,1,8,8,8,8,8,0,0,0, +0,0,0,0,0,0,7,4,7,1,1,8,8,7,7,1, +1,0,0,0,0,0,0,0,8,8,8,1,1,4,8,9, +9,8,1,1,0,8,0,0,0,0,0,0,0,0,0,0, +0,7,4,7,1,1,1,1,1,1,8,8,8,0xd,7,0, +0,0,0,0,0,0,0,1,0,8,1,0,0,0,0,0, +0,0,0,0,0,0,0,7,7,7,1,8,8,0xd,0xd,8, +7,8,8,0,0,0,0,0,0,8,0,7,4,7,1,1, +8,8,8,8,1,1,0,0,0,0,0,0,0,0,0,0, +0,1,1,0,7,7,8,7,7,7,7,0,0,4,4,0, +0,5,5,7,0,0,7,7,0,0,8,8,8,8,8,8, +8,0,0,0,7,7,1,8,8,7,1,0,0,0,0,0, +0,0,0,0,7,4,7,1,1,1,1,1,1,4,8,0xb, +5,7,5,8,7,1,1,0,0,0,0,0,0,0,0,0, +0,0,0,4,7,1,1,1,1,0,0,4,0xb,5,0xc,8, +8,7,1,7,7,7,1,1,1,1,1,1,8,8,7,7, +8,7,1,0,0,0,0,0,0,0,0,0,0,0,8,7, +8,4,7,1,1,8,8,8,8,7,1,0,0,0,0,0, +0,0,0,0,0,0,0,0,1,0,8,7,7,8,8,1, +1,4,8,1,8,8,8,0,0,0,0,0,0,0,0,0, +0,0,0,7,4,7,1,1,1,8,8,8,8,8,7,1, +1,0,0,0,0,0,8,1,1,8,8,8,8,8,8,1, +0,0,0,0,0,1,1,8,8,8,8,7,0,1,1,1, +1,0,8,1,1,8,8,8,7,7,1,1,1,0,0,0, +0,0,0,0,0,0,0,1,1,1,1,1,1,8,7,8, +0,0,0,0,0,0,0,8,8,1,1,1,1,1,0,8, +8,8,8,8,8,7,1,0,0,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,0,7,1,1,1,1,1,1,4, +1,8,7,8,8,0,0,0,0,0,0,0,0,0,8,8, +8,8,8,1,0,0,0,8,0,8,8,0,8,8,1,8, +1,0,0,1,0,0,0,0,0,0,0,0,0,0,7,7, +7,7,7,0,8,8,0,7,7,8,7,0,0,0,0,0, +0,0,0,0,8,1,4,7,0,0,0,0,0,0,0,0, +0,0 +}; + +static const UCPTrie inpc_trie={ + inpc_trieIndex, + { inpc_trieData }, + 765, 2930, + 0x12000, 0x12, + 1, 2, + 0, 0, + 0x2, 0x0, + 0x0, +}; + +static const int32_t maxInSCValue = 35; + +static const uint16_t insc_trieIndex[834]={ +0,0x40,0x60,0x94,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0xd4,0x112,0x152,0x190,0x1cf,0x20d,0x24c,0x28a,0x2ca,0x308,0x346,0x384, +0x3c4,0x402,0x441,0x47f,0x4bf,0x4fd,0x53d,0x57d,0x5bc,0x5fc,0x63b,0x67b,0x69b,0x6db,0x71b,0x758, +0x2f8,0x30b,0x317,0x30b,0x332,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x60,0x70,0x80, +0x90,0x94,0xa4,0xb4,0xc4,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60, +0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60,0x70,0x40,0x50,0x60, +0x70,0x40,0x50,0x60,0x70,0xd4,0xe4,0xf4,0x104,0x112,0x122,0x132,0x142,0x152,0x162,0x172, +0x182,0x190,0x1a0,0x1b0,0x1c0,0x1cf,0x1df,0x1ef,0x1ff,0x20d,0x21d,0x22d,0x23d,0x24c,0x25c,0x26c, +0x27c,0x28a,0x29a,0x2aa,0x2ba,0x2ca,0x2da,0x2ea,0x2fa,0x308,0x318,0x328,0x338,0x346,0x356,0x366, +0x376,0x384,0x394,0x3a4,0x3b4,0x3c4,0x3d4,0x3e4,0x3f4,0x402,0x412,0x422,0x432,0x441,0x451,0x461, +0x471,0x47f,0x48f,0x49f,0x4af,0x4bf,0x4cf,0x4df,0x4ef,0x4fd,0x50d,0x51d,0x52d,0x53d,0x54d,0x55d, +0x56d,0x57d,0x58d,0x59d,0x5ad,0x5bc,0x5cc,0x5dc,0x5ec,0x5fc,0x60c,0x61c,0x62c,0x63b,0x64b,0x65b, +0x66b,0x67b,0x68b,0x69b,0x6ab,0x69b,0x6ab,0x6bb,0x6cb,0x6db,0x6eb,0x6fb,0x70b,0x71b,0x72b,0x73b, +0x74b,0x758,0x768,0x778,0x788,0xe9,0xe9,0x798,0x7a3,0x7b3,0x7c3,0x7d2,0x7e1,0x7ef,0x7ff,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x80f,0x81d,0xe6,0x81d,0xe6,0x82d,0x80f,0x83d,0xe9,0xe9,0x84d, +0x859,0x863,0x872,0x30,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x882,0x16c,0x892,0x8a2,0x22d,0xe9,0x8b2,0x8c2,0xe9,0xe9,0x374,0x8d2, +0x8e1,0x30,0x40,0x40,0xe9,0x8f1,0xe9,0xe9,0x901,0x90e,0x91e,0x92a,0x30,0x30,0x40,0x40, +0x40,0x40,0x40,0x40,0x93a,0xe6,0xe9,0x94a,0x956,0x30,0x40,0x40,0x966,0xe9,0x975,0x985, +0xe9,0xe9,0x995,0x9a5,0xe9,0xe9,0x9b5,0x9c2,0x9d2,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x9e2,0x9f0,0x9fe,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0xa08,0xa14,0xa24,0x40,0x40,0x40,0x40,0x40,0x75a,0xa32,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x74,0x40,0x40,0x40,0xa42,0xe9,0xa4f, +0x40,0xe9,0xa5f,0xa6d,0xa7c,0xd6,0xe7,0xe9,0xa8c,0xa98,0x30,0xaa8,0xab6,0xac6,0xe9,0xad4, +0xe9,0xae4,0xaf3,0x40,0x40,0xb03,0xe9,0xe9,0xb12,0x297,0x30,0xb22,0xb32,0xe3,0xe9,0x889, +0xb42,0xb52,0x30,0xe9,0xb61,0xe9,0xe9,0xe9,0xb71,0xb81,0x40,0xb91,0xba1,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xbb1,0xbc1,0xbce,0x30,0xbde,0xbee,0xe9, +0xbf8,0x31,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xc08,0xe6,0xe9, +0x88a,0xc18,0xc26,0xc30,0xc40,0xc50,0xe9,0xe9,0xc60,0x40,0x40,0x40,0x40,0xc70,0xe9,0x88b, +0xc80,0xc90,0xca0,0xe9,0xcad,0xd5,0xe8,0xe9,0xcbd,0xccd,0x30,0x6ba,0x35,0xe1,0x3eb,0x886, +0xcdd,0x40,0x40,0x40,0x40,0xced,0x16d,0xcfc,0xdf,0xe9,0xd0c,0xd1c,0x30,0xd2c,0x162,0x172, +0xd3c,0x308,0xd4c,0xd5c,0x9ed,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xdb,0xe9,0xe9, +0xd6c,0xd7a,0xd8a,0x40,0x40,0xd99,0xe9,0xe9,0x91f,0xda9,0x30,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0xdb,0xe9,0xff,0xdb9,0xdc9,0xdd1,0x40,0x40,0xdb,0xe9,0xe9, +0xde1,0xdf1,0x30,0x40,0x40,0xdf,0xe9,0xe01,0xe0e,0x30,0x40,0x40,0x40,0xe9,0xe1e,0xe2e, +0xe3e,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xdf,0xe9,0x886, +0xe4e,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xe5e,0xe9,0xe9, +0xe6b,0xe7b,0xe8b,0xe9,0xe9,0xe97,0xea1,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0xeb1,0xe9,0xff, +0xec1,0xed1,0x6bb,0xee1,0x555,0xe9,0xeef,0x72b,0xeff,0x40,0x40,0x40,0x40,0xf0f,0xe9,0xe9, +0xf1e,0xf2e,0x30,0xf3e,0xe9,0xf4a,0xf57,0x30,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0xe9,0xf67,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, +0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x45,0x55,0x55,0x55,0x65,0x85,0xa5,0xc5, +0xe5,4,4,0xf5,0x114,0x134,0x154,4,0x174,4,0x17d,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,4,4,4,0x19d,0x1bd,4,4,4, +4,4,4,4,4,4,4,0x1dd,4,4,0x1fd,0x21d,0x23d,0x25d,0x27d,0x29d, +0x2bd,0x2d8 +}; + +static const uint8_t insc_trieData[3960]={ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0xc,0,0, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0xc,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0x1c,0x1c,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0xc,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,2,2,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23, +0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x22,0x22, +0x17,1,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f, +0x22,0x22,0,4,4,0,0,0x22,0x22,0x22,5,5,5,5,5,5, +5,5,0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0,0,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,5,5, +5,5,0xc,2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0, +0,0x23,0x23,0,0,0x23,0x23,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,5,5,5,5, +5,5,5,0,5,0,0,0,5,5,5,5,0,0,0x17,1, +0x22,0x22,0x22,0x22,0x22,0,0,0x22,0x22,0,0,0x22,0x22,0x1f,6,0, +0,0,0,0,0,0,0,0x22,0,0,0,0,5,5,0,5, +0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +5,5,0,0,0,0,0,0,0,0,0,0,2,0,0x1c,0, +2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0,0,0x23,0x23, +0,0,0x23,0x23,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,5,5,5,5,5,5,5, +0,5,5,0,5,5,0,5,5,0,0,0x17,0,0x22,0x22,0x22, +0,0,0,0,0x22,0x22,0,0,0x22,0x22,0x1f,0,0,0,4,0, +0,0,0,0,0,0,5,5,5,5,0,5,0,0,0,0, +0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,2,0x12,0xc, +0xc,0,0xb,0,0,0,0,0,0,0,0,0,0,2,2,0x20, +0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,0,0x23, +0x23,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,0,5,5,5,5,5,5,5,0,5,5, +0,5,5,5,5,5,0,0,0x17,1,0x22,0x22,0x22,0x22,0x22,0x22, +0,0x22,0x22,0x22,0,0x22,0x22,0x1f,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x23,0x23,0x22,0x22,0,0, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0, +0,0,0,5,4,4,4,0x17,0x17,0x17,0,2,2,0x20,0,0x23, +0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x23,0x23,0,0,0x23,0x23,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,0,5,5,5,5,5,5,5,0,5,5,0,5, +5,5,5,5,0,0,0x17,1,0x22,0x22,0x22,0x22,0x22,0,0,0x22, +0x22,0,0,0x22,0x22,0x1f,0,0,0,0,0,0,0,0,0x22,0x22, +0,0,0,0,5,5,0,5,0x23,0x23,0x22,0x22,0,0,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,5,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,2,0x15,0,0x23,0x23,0x23,0x23,0x23, +0x23,0,0,0,0x23,0x23,0x23,0,0x23,0x23,0x23,5,0,0,0,5, +5,0,5,0,5,5,0,0,0,5,5,0,0,0,5,5, +5,0,0,0,5,5,5,5,5,5,5,5,5,5,5,5, +0,0,0,0,0x22,0x22,0x22,0,0,0,0x22,0x22,0x22,0,0x22,0x22, +0x22,0x1f,0,0,0,0,0,0,0,0,0,0x22,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,2,2,2,0x20,2,0x23,0x23,0x23,0x23,0x23,0x23,0x23, +0x23,0,0x23,0x23,0x23,0,0x23,0x23,0x23,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,0,0, +0,1,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f, +0,0,0,0,0,0,0,0x22,0x22,0,5,5,5,0,0,0, +0,0,0x23,0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23, +0x23,0x23,0,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0,5,5,5,5,5, +5,5,5,5,5,0,5,5,5,5,5,0,0,0x17,1,0x22, +0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,0,0,0, +0,0,0,0,0x22,0x22,0,0,0,0,0,0,0,5,0,0x23, +0x23,0x22,0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0, +0x11,0x11,0,0,0,0,0,0,0,0,0,0,0,0,0,2, +2,2,0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23, +0,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,0x1a,0x1a,1,0x22,0x22,0x22, +0x22,0x22,0,0x22,0x22,0x22,0,0x22,0x22,0x22,0x1f,0xd,0,0,0,0, +0,6,6,6,0x22,0,0,0,0,0,0,0,0x23,0x23,0x23,0x22, +0x22,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0, +0,0,0,0,0,0,0,6,6,6,6,6,6,0,0,2, +0x20,0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23, +0x23,0x23,0x23,0x23,0,0,0,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0, +5,5,5,5,5,5,5,5,5,0,5,0,0,5,5,5, +5,5,5,5,0,0,0,0x1f,0,0,0,0,0x22,0x22,0x22,0x22, +0x22,0x22,0,0x22,0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0,0, +0,0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0x22, +0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,0,0x22,0x22,0x22,0x22, +0x22,0x22,0x22,0x22,0x22,0x22,0x1a,0,0,0,0,0,0x22,0x22,0x22,0x22, +0x22,0x22,0,0x22,0x1e,0x1e,0x1e,0x1e,0xa,2,0x1a,0,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,0,5, +0,0,5,5,0,5,0,0,5,0,0,0,0,0,0,5, +5,5,5,0,5,5,5,5,5,5,5,0,5,5,5,0, +5,0,5,0,0,5,5,0,5,5,0,0x22,0x22,0x22,0x22,0x22, +0x22,0x22,0x22,0x22,0x22,0,0x22,0xb,0xb,0,0,0x22,0x22,0x22,0x22,0x22, +0,0,0,0x1e,0x1e,0x1e,0x1e,0,2,0,0,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0,0,5,5,5,5,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0, +0x1c,0,0x1c,0,0x17,0,0,0,0,0,0,5,5,5,5,5, +5,5,5,0,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,5,5,5,5,5,5,0,0,0,0,0x22,0x22,0x22,0x22, +0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,0x22,0x22,2,2,0x1a, +1,0,0,8,8,8,8,8,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, +0xf,0xf,0xf,0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, +0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf, +0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0,0,0,0,0,0,0x1c,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,5,0x23,0x23,0x23,0x23,0x23,0x23,0x23, +0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,2,0x1e,0x20,0x13,0x1a,0xb,0xb, +0xb,0xb,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0xc,0, +0,0xc,0,5,5,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,5,5,5, +5,0xb,0xb,5,0x22,0x1e,0x1e,5,5,0x22,0x22,0x1e,0x1e,0x1e,0x1e,0x1e, +5,5,0x22,0x22,0x22,0x22,5,5,5,5,5,5,5,5,5,5, +5,0xb,0x22,0x22,0x22,0x22,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,0x1e,5,0x1e,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x1e,0x1e,0x22,0x22,0,0,0x23, +0x23,0x23,5,5,5,5,5,5,5,5,5,5,0,5,5,0x22, +0x22,0x1a,0,0,0,0,0,0,0,0,0,0,0,5,5,0x22, +0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0x22, +0x22,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5, +0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x22, +0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20,0x22,0x1b,0x1b,0x1c,0x10, +0xa,0x1c,0x1c,0x1a,0x13,0x1c,0,0,0,0,0,0,0,0,1,0x1c, +0,0,0xc,5,5,5,5,5,5,5,5,5,5,5,5,5, +5,5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0xf,0xf,0xf,0,0, +0,0,7,7,2,7,7,7,7,7,7,7,0x22,0x1c,0,0, +0,0,5,5,5,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21, +0,0,0x1d,0x1d,0x1d,0x1d,0x1d,0,0,0,0,0,0,0,0,0, +0,0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22, +0x22,0x22,7,7,7,7,7,7,7,0x1e,0x1e,0,0,0,0,0, +0,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,0x22,0,0,0, +0,5,5,5,5,5,5,5,5,5,5,5,5,5,0x23,0x23, +0x23,5,5,0xb,0xb,0xf,7,7,9,0xf,0xf,0xf,0xf,0,0x13,0x22, +0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x1e, +0x1e,0x1e,0x1e,0x1e,0x1a,0x1c,0x1c,0,0,0x1c,2,2,2,0x10,0x20,0x23, +0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5,5,0x17,0x22, +0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,5,5,5,5,5, +5,5,0,0,0,0,2,0x10,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23, +5,5,5,5,5,5,0xf,0xf,0xf,0x22,0x22,0x22,0x22,0x22,0x22,0x1a, +0x13,0xf,0xf,5,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,1, +5,5,5,7,7,5,5,5,5,0x23,0x23,0x17,0x22,0x22,0x22,0x22, +0x22,0x22,0x22,0x22,0x22,7,7,0x1a,0x1a,0,0,0,0,0,0,0, +0,0,0,0,0,5,5,5,5,0xf,0xf,0x22,0x22,0x22,0x22,0x22, +0x22,0x22,7,7,7,7,2,2,0x1c,0x17,0,0,0,0,0,0, +0,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,5, +5,5,4,4,4,0,4,4,4,4,4,4,4,4,4,4, +4,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x20,0x20,4,0x11,0x11,4,4,4,0,0,0,0,0,0,0,0, +0,0,0,0x1c,0,0,0,0,0,0,0,0,0,0,0,0, +0x16,0x14,0,0,0xc,0xc,0xc,0xc,0xc,0,0,0,0,0,0,0, +0,0,0,0,0x1c,0x1c,0x1c,0,0,0,0,0,0,0,0,0, +0,0,0x23,0x23,0,0x23,0x23,0x23,0x1a,5,5,5,5,2,5,5, +5,5,0x22,0x22,0x22,0x22,0x22,0,0,0,0,0,0,0,0,5, +5,5,5,5,5,5,5,5,5,5,5,5,5,0x21,0x21,5, +5,5,5,0x21,0xf,0xf,5,5,5,5,5,5,5,0xf,5,2, +0,0,0,0,0,0,0,0,0,0,0,0,5,5,5,5, +0xb,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,2,0,0, +0,0,0,0,0,0,0,0,4,4,4,4,4,4,4,4, +4,4,4,4,4,4,4,4,2,2,0,0,0,0,0,0, +0,0,0,0,0x23,0x22,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +5,5,5,5,5,5,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x21,0x1e, +0x1e,0x1e,0,0,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22,0x22, +0x22,0x22,0x22,7,7,7,0x1a,0,0,0,0,0,0,0,0,0, +0,0,0,2,2,0x10,0x20,0x23,0x23,0x23,0x23,0x23,5,5,5,0x23, +0x23,0x23,5,5,5,0x17,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0xf, +0xb,0xb,5,5,5,5,5,0x22,0,5,5,5,5,5,5,5, +5,5,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,5,5,5,5, +5,0,0x22,0x22,0x22,0xb,0xb,0xb,0xb,0,0,0,0,0,0,0, +0,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7, +0,0,5,5,5,0xc,0xc,0xc,0,0,0,5,0x1e,0x1e,0x1e,5, +5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22, +0x1e,0x1d,0x1e,0x1d,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x23,0x23,5,5,5,5,5,5,5,5,5,0x22,0x22,0x22,0x22, +0x22,0,0,0,0,0,0x20,0x13,0,0,0,0,0,0,0,0, +0,5,5,5,5,5,5,5,5,5,5,5,5,5,5,0x23, +0x23,5,0x23,5,5,5,5,5,5,5,5,5,7,7,7,7, +7,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0x1e,0x1a,0,0,5,0x22, +0x22,0x22,0,0x22,0x22,0,0,0,0,0,0x22,0x22,2,0x20,5,5, +5,5,0,5,5,5,0,5,5,5,5,5,5,5,0,0, +0x17,0x17,0x17,0,0,0,0,0x13,2,2,0x20,0x11,0x11,0x23,0x23,0x23, +0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,0, +0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0x19, +2,2,0x20,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,5,5, +0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,0x17,0,0,0,0,0, +2,2,0x20,0x23,0x23,0x23,0x23,5,5,5,5,5,5,5,5,5, +0x22,0x22,0x22,0x13,0x1a,0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, +0,0,0,0,5,0x22,0x22,0,0,0,0,0,0,0,0,0, +0x21,0x21,0x21,0x21,0x21,5,5,5,5,5,5,5,5,5,5,5, +0x17,0,0,0,0,0,0,0,0,0,0,0,0,5,5,5, +0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,1,0xe, +0xe,0,0,0,0,0,0x1c,0x17,0x22,0x22,0,0,0,0x22,0x22,0x22, +0x22,2,0x1f,0x17,0x12,0,0,0,0,0,0,4,0,0x23,0x23,0x23, +0x23,5,5,5,0,5,0,5,5,5,5,0,5,5,5,5, +5,5,5,5,5,0,0,0,0,0,0,0,5,5,5,5, +5,5,5,5,5,5,5,5,5,5,5,2,0x22,0x22,0x22,0x22, +0x22,0x22,0x22,0x22,0x22,0x17,0x1a,0,0,0,0,0,2,2,2,0x20, +0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0,0x23,5,0,5,5, +0,5,5,5,5,5,0,0x17,0x17,1,0x22,0x22,0,0,0,0, +0,0,0,0x22,0,0,0,0,0,0,2,2,0x23,0x23,0x22,0x22, +0,0,4,4,4,4,4,4,4,0,0,0,5,5,5,5, +5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1f,2,2,0x20, +0x17,1,0,0,0,0,0,0,0,0,0x18,0x18,0x18,0x18,0x18,0x18, +0x18,0x18,0x18,0x18,0,0,0,0,0x1c,0,0x23,0x23,0x23,0x23,0x23,0x23, +0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,5,2,0x20,0x1f,0x17,1,0,0, +0,0,0,0,0,0,0,0,0,0x22,0x22,0x22,0x22,0x22,0x22,0, +0,0x22,0x22,0x22,0x22,2,2,0x20,0x1f,0x17,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0x23,0x23,0x23,0x23,0x22,0x22,0, +0,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,2,0x20, +0x1f,0x22,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,5,5,5,5,5,5,5,5,5,5,5,2,0x20,0x22,0x22, +0x22,0x22,0x22,0x22,0x1f,0x17,0,0,0,0,0,0,0,0,5,5, +5,5,5,5,5,5,5,5,5,0,0,0xb,0xb,0xb,0x22,0x22, +0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x1a,0,0,0,0,0x18,0x18, +0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0,0,0,0x22,0x22, +0x22,0x22,0x22,0x22,0x22,2,0x20,0x1f,0x17,0,0,0,0,0,0x23,0x22, +0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0x22,5,5,5,5,5,0x1c,0x1a, +2,2,2,2,0x20,0xe,0xb,0xb,0xb,0xb,0xc,0,0,0,0,0, +0xc,0,0x13,0,0,0,0,0,0,0,0,0x23,0x22,0x22,0x22,0x22, +0x22,0x22,0x22,0x22,0x22,0x22,0x22,5,5,5,5,0,0,0xe,0xe,0xe, +0xe,7,7,7,7,7,7,2,0x20,0x12,0x13,0,0,0,1,0, +0,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0x23,0x23,5, +5,0x22,0x22,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,0x22,0x22,2,2,0x20, +0x1f,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18,0,0, +0,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0xf,0x22, +0x22,0x22,0x22,0x22,2,2,0,0,0,0,0,0,0,0,0,0x23, +0x23,0x23,0x23,0x23,0x23,0x23,0,0x23,0x23,0,0x23,5,5,5,5,0x22, +0x22,0x22,0x22,0x22,0x22,0,0,0,0x22,0,0x22,0x22,0,0x22,2,0x20, +0x17,0x22,0x1a,0x13,0xd,0xb,0,0,0,0,0,0,0,0,0x23,0x23, +0x23,0x23,0x23,0x23,0,0x23,0x23,0,0x23,0x23,5,5,5,5,5,5, +5,5,5,5,0x22,0x22,0x22,0x22,0x22,0,0x22,0x22,2,0x20,0x13,0, +0,0,0,0,0,0,0,5,5,0xc,0x22,0x22,0x22,0x22,0,0, +0,0,0,0,0,0,0,0 +}; + +static const UCPTrie insc_trie={ + insc_trieIndex, + { insc_trieData }, + 834, 3960, + 0x12000, 0x12, + 1, 2, + 0, 0, + 0x4, 0x40, + 0x0, +}; + +static const int32_t maxVoValue = 3; + +static const uint16_t vo_trieIndex[1100]={ +0,0x40,0x59,0x98,0,0,0,0,0,0,0,0xd0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x33b,0x355,0x363,0x379,0x399,0x3b7,0x3d2,0x3ec,0x355,0x355,0x355,0x40c,0x355,0x355,0x355,0x40c, +0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c, +0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c, +0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x42c,0x355,0x355,0x355,0x40c, +0x355,0x355,0x355,0x40c,0,0x10,0x20,0x30,0x40,0x50,0x60,0x70,0x59,0x69,0x79,0x89, +0x98,0xa8,0xb8,0xc8,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30, +0,0x10,0x20,0x30,0xd0,0xe0,0xf0,0x100,0,0x10,0x20,0x30,0,0x10,0x20,0x30, +0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0x10,0x20,0x30, +0,0x10,0x20,0x30,0,0x10,0x20,0x30,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x10f,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0xa9,0x96,0x11e,0x12c,0xae,0xaa,0,0,0,0,0, +0,0x103,0x13c,0,0x14c,0x158,0x166,0x10b,0x175,0x110,0x110,0x110,0x184,0,0,0, +0,0,0,0,0x72,0,0xf6,0,0,0,0,0,0,0,0,0, +0,0,0,0x190,0x110,0x198,0,0,0,0,0x103,0x110,0x115,0,0xec,0x1a8, +0x1b6,0x10e,0x110,0x110,0x1c6,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0,0,0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110, +0x110,0x110,0x116,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x118,0x10a,0x110,0x1d2,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0x10e,0x110,0,0, +0x116,0,0,0,0,0,0x108,0x110,0x1e2,0x114,0x110,0,0,0,0,0, +0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x1f1,0x1ff,0x110,0x20e,0x21d, +0x110,0x22a,0x110,0x237,0x246,0x256,0x110,0x22a,0x110,0x237,0x261,0x110,0x110,0x26e,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x27e,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x27e,0x27e,0x27e,0x27e,0x27e, +0x286,0x110,0x28e,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0x110,0x110,0,0,0,0,0, +0,0,0,0x110,0,0x110,0x117,0x29b,0x2aa,0,0,0,0,0,0,0, +0,0,0x2ba,0x2c9,0x110,0x2d9,0x110,0x2e9,0x2f8,0,0,0,0,0,0,0, +0x308,0x318,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x110,0x110,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0, +0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0,0,0,0,0,0,0,0,0x328,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110, +0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x110,0x112,0x84,0x98,0xa8,0xa8,0xa8, +0xa8,0xa8,0xa8,0xc8,0xc,0xe8,0x100,0x115,0xc,0xc,0xc,0x134,0x153,0x172,0x191,0xc, +0x1ab,0xc,0x1cb,0x1eb,0x20b,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223, +0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223, +0x223,0x223,0x223,0x223,0x223,0xfb,0xc,0x243,0xc,0x223,0x223,0x223,0x223,0x223,0x223,0x223, +0x223,0x223,0x223,0x223,0x223,0xc,0xc,0xc,0xc,0x223,0x223,0x223,0x223,0x223,0x223,0x223, +0x223,0x223,0x223,0x223,0x223,0x223,0xf8,0xc,0x262,0xc,0xc,0xc,0xc,0x282,0xc,0xc, +0xc,0xc,0xc,0x29c,0xc,0xc,0xfd,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc, +0xc,0x223,0x223,0x2b9,0xc,0xc,0xc,0xc,0xc,0x223,0x100,0xc,0xc,0xc,0xc,0xc, +0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x2bc,0x223, +0x223,0x223,0x223,0x223,0x223,0x223,0x223,0xf8,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc, +0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0x2da,0xf8,0xc,0xc,0xc,0xc, +0xc,0xc,0xc,0xc,0x223,0x2fa,0xc,0xc,0x223,0xfd,0xc,0xc,0xc,0xc,0xc,0xc, +0xc,0xc,0xc,0xc,0x223,0x31a,0x223,0x223,0xc8,0x2b5,0xc,0xc,0x223,0x223,0x223,0x223, +0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223, +0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x223,0x31b,0xc,0xc,0xc,0xc, +0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc, +0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc,0xc +}; + +static const uint8_t vo_trieData[828]={ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,0,3,0,0,0,0,3,0,0,3,0,0,0,0,0, +0,0,0,0,0,3,3,3,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,3,3,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +0,0,0,0,0,0,0,0,0,3,3,0,0,0,3,0, +0,0,0,3,3,3,0,0,0,0,0,0,3,0,3,3, +3,0,0,0,0,0,0,0,0,0,0,0,3,3,0,3, +3,3,3,3,3,3,0,0,0,0,0,3,3,0,3,3, +0,0,0,0,0,0,3,3,3,3,0,3,0,3,0,3, +0,0,0,0,3,0,0,0,0,0,3,3,3,3,3,3, +0,3,3,0,3,3,3,3,3,3,3,3,3,3,0,0, +3,3,3,3,3,3,3,3,0,0,0,0,3,3,3,3, +3,1,1,3,0,0,0,0,3,3,3,3,3,3,3,3, +3,3,3,3,3,3,0,3,3,3,3,3,3,3,3,3, +3,3,0,0,0,0,3,3,3,0,3,3,3,3,3,3, +3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0, +0,0,3,3,0,3,3,3,3,3,3,3,3,3,3,3, +3,3,2,2,3,3,3,3,3,1,1,1,1,1,1,1, +1,3,3,1,1,1,1,1,1,1,1,1,1,1,1,3, +3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3, +2,3,2,3,2,3,2,3,3,3,3,3,3,2,3,3, +3,3,3,3,3,3,3,3,3,3,2,3,2,3,2,3, +3,3,3,3,3,2,3,3,3,3,3,2,2,3,3,3, +3,2,2,3,3,3,1,2,3,2,3,2,3,2,3,2, +3,3,3,3,3,3,2,2,3,3,3,3,3,1,3,3, +3,3,3,3,3,2,3,3,3,3,3,3,3,3,2,2, +2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3, +3,3,3,3,3,3,3,3,3,2,2,2,2,2,3,3, +3,3,3,0,1,1,1,1,1,1,3,3,3,0,0,0, +0,3,3,3,3,3,3,3,3,3,0,2,3,3,3,3, +3,3,1,1,3,3,2,0,2,3,3,3,3,3,3,3, +3,3,3,1,1,0,0,0,2,3,3,3,3,3,3,3, +3,3,3,3,1,3,1,3,1,3,3,3,3,3,3,3, +3,3,3,3,1,1,1,1,1,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,3,3,3,1,3,3,3,3, +0,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3, +3,0,0,0,3,3,0,0,2,2,3,3,3,3,3,3, +3,3,3,3,3,3,3,3,0,0,0,0 +}; + +static const UCPTrie vo_trie={ + vo_trieIndex, + { vo_trieData }, + 1100, 828, + 0x110000, 0x110, + 1, 2, + 0, 0, + 0xc, 0x0, + 0x0, +}; + +#endif // INCLUDED_FROM_UPROPS_CPP diff --git a/deps/icu-small/source/common/uloc.cpp b/deps/icu-small/source/common/uloc.cpp index 7a1dc723cff619..81b6e0f68ab88b 100644 --- a/deps/icu-small/source/common/uloc.cpp +++ b/deps/icu-small/source/common/uloc.cpp @@ -798,7 +798,7 @@ _getKeywords(const char *localeID, } keywordsLen += keywordList[i].keywordLen + 1; if(valuesToo) { - if(keywordsLen + keywordList[i].valueLen < keywordCapacity) { + if(keywordsLen + keywordList[i].valueLen <= keywordCapacity) { uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen); } keywordsLen += keywordList[i].valueLen; @@ -1133,7 +1133,7 @@ uloc_setKeywordValue(const char* keywordName, keyValuePrefix = ';'; /* for any subsequent key-value pair */ updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status); updatedKeysAndValues.append('=', *status); - updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status); + updatedKeysAndValues.append(nextEqualsign, static_cast(keyValueTail-nextEqualsign), *status); } if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) { /* append new entry at the end, it sorts later than existing entries */ @@ -1500,7 +1500,7 @@ _deleteVariant(char* variants, int32_t variantsLen, } if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 && (variantsLen == toDeleteLen || - (flag=(variants[toDeleteLen] == '_')))) + (flag=(variants[toDeleteLen] == '_')) != 0)) { int32_t d = toDeleteLen + (flag?1:0); variantsLen -= d; @@ -2412,7 +2412,7 @@ uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult /* eat spaces prior to semi */ for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--) ; - int32_t slen = ((t+1)-s); + int32_t slen = static_cast(((t+1)-s)); if(slen > ULOC_FULLNAME_CAPACITY) { *status = U_BUFFER_OVERFLOW_ERROR; return -1; // too big diff --git a/deps/icu-small/source/common/uloc_keytype.cpp b/deps/icu-small/source/common/uloc_keytype.cpp index 04b566a5d68b40..17ad91da01586d 100644 --- a/deps/icu-small/source/common/uloc_keytype.cpp +++ b/deps/icu-small/source/common/uloc_keytype.cpp @@ -228,7 +228,7 @@ initFromResourceBundle(UErrorCode& sts) { // a timezone key uses a colon instead of a slash in the resource. // e.g. America:Los_Angeles if (uprv_strchr(legacyTypeId, ':') != NULL) { - int32_t legacyTypeIdLen = uprv_strlen(legacyTypeId); + int32_t legacyTypeIdLen = static_cast(uprv_strlen(legacyTypeId)); char* legacyTypeIdBuf = (char*)uprv_malloc(legacyTypeIdLen + 1); if (legacyTypeIdBuf == NULL) { sts = U_MEMORY_ALLOCATION_ERROR; @@ -320,7 +320,7 @@ initFromResourceBundle(UErrorCode& sts) { if (isTZ) { // replace colon with slash if necessary if (uprv_strchr(from, ':') != NULL) { - int32_t fromLen = uprv_strlen(from); + int32_t fromLen = static_cast(uprv_strlen(from)); char* fromBuf = (char*)uprv_malloc(fromLen + 1); if (fromBuf == NULL) { sts = U_MEMORY_ALLOCATION_ERROR; @@ -472,7 +472,6 @@ isSpecialTypeRgKeyValue(const char* val) { p++; } return (subtagLen == 6); - return TRUE; } U_CFUNC const char* diff --git a/deps/icu-small/source/common/uloc_tag.cpp b/deps/icu-small/source/common/uloc_tag.cpp index f8337ec02476c4..8120331c4b91ff 100644 --- a/deps/icu-small/source/common/uloc_tag.cpp +++ b/deps/icu-small/source/common/uloc_tag.cpp @@ -12,11 +12,13 @@ #include "unicode/putil.h" #include "unicode/uloc.h" #include "ustr_imp.h" +#include "charstr.h" #include "cmemory.h" #include "cstring.h" #include "putilimp.h" #include "uinvchar.h" #include "ulocimp.h" +#include "uvector.h" #include "uassert.h" @@ -77,19 +79,34 @@ static const char LOCALE_TYPE_YES[] = "yes"; #define LANG_UND_LEN 3 +/* + Updated on 2018-09-12 from + https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . + + This table has 2 parts. The parts for Grandfathered tags is generated by the + following scripts from the IANA language tag registry. + + curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\ + egrep -A 7 'Type: grandfathered' | \ + egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \ + awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\ + tr 'A-Z' 'a-z' + + + The 2nd part is made of five ICU-specific entries. They're kept for + the backward compatibility for now, even though there are no preferred + values. They may have to be removed for the strict BCP 47 compliance. + +*/ static const char* const GRANDFATHERED[] = { /* grandfathered preferred */ "art-lojban", "jbo", - "cel-gaulish", "xtg-x-cel-gaulish", - "en-GB-oed", "en-GB-x-oed", + "en-gb-oed", "en-gb-oxendict", "i-ami", "ami", "i-bnn", "bnn", - "i-default", "en-x-i-default", - "i-enochian", "und-x-i-enochian", "i-hak", "hak", "i-klingon", "tlh", "i-lux", "lb", - "i-mingo", "see-x-i-mingo", "i-navajo", "nv", "i-pwn", "pwn", "i-tao", "tao", @@ -102,17 +119,175 @@ static const char* const GRANDFATHERED[] = { "sgn-ch-de", "sgg", "zh-guoyu", "cmn", "zh-hakka", "hak", - "zh-min", "nan-x-zh-min", "zh-min-nan", "nan", "zh-xiang", "hsn", - NULL, NULL + + // Grandfathered tags with no preferred value in the IANA + // registry. Kept for now for the backward compatibility + // because ICU has mapped them this way. + "cel-gaulish", "xtg-x-cel-gaulish", + "i-default", "en-x-i-default", + "i-enochian", "und-x-i-enochian", + "i-mingo", "see-x-i-mingo", + "zh-min", "nan-x-zh-min", }; +/* + Updated on 2018-09-12 from + https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . + + The table lists redundant tags with preferred value in the IANA languate tag registry. + It's generated with the following command: + + curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\ + grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \ + awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \ + tr 'A-Z' 'a-z' + + In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because + a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'. +*/ + +static const char* const REDUNDANT[] = { +// redundant preferred + "sgn-br", "bzs", + "sgn-co", "csn", + "sgn-de", "gsg", + "sgn-dk", "dsl", + "sgn-es", "ssp", + "sgn-fr", "fsl", + "sgn-gb", "bfi", + "sgn-gr", "gss", + "sgn-ie", "isg", + "sgn-it", "ise", + "sgn-jp", "jsl", + "sgn-mx", "mfs", + "sgn-ni", "ncs", + "sgn-nl", "dse", + "sgn-no", "nsl", + "sgn-pt", "psr", + "sgn-se", "swl", + "sgn-us", "ase", + "sgn-za", "sfs", + "zh-cmn", "cmn", + "zh-cmn-hans", "cmn-hans", + "zh-cmn-hant", "cmn-hant", + "zh-gan", "gan", + "zh-wuu", "wuu", + "zh-yue", "yue", + + // variant tag with preferred value + "ja-latn-hepburn-heploc", "ja-latn-alalc97", +}; + +/* + Updated on 2018-09-12 from + https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . + + grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \ + grep -B1 'Preferred' | grep -v '^--' | \ + awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' + + Make sure that 2-letter language subtags come before 3-letter subtags. +*/ static const char DEPRECATEDLANGS[][4] = { /* deprecated new */ + "in", "id", "iw", "he", "ji", "yi", - "in", "id" + "jw", "jv", + "mo", "ro", + "aam", "aas", + "adp", "dz", + "aue", "ktz", + "ayx", "nun", + "bgm", "bcg", + "bjd", "drl", + "ccq", "rki", + "cjr", "mom", + "cka", "cmr", + "cmk", "xch", + "coy", "pij", + "cqu", "quh", + "drh", "khk", + "drw", "prs", + "gav", "dev", + "gfx", "vaj", + "ggn", "gvr", + "gti", "nyc", + "guv", "duz", + "hrr", "jal", + "ibi", "opa", + "ilw", "gal", + "jeg", "oyb", + "kgc", "tdf", + "kgh", "kml", + "koj", "kwv", + "krm", "bmf", + "ktr", "dtp", + "kvs", "gdj", + "kwq", "yam", + "kxe", "tvd", + "kzj", "dtp", + "kzt", "dtp", + "lii", "raq", + "lmm", "rmx", + "meg", "cir", + "mst", "mry", + "mwj", "vaj", + "myt", "mry", + "nad", "xny", + "ncp", "kdz", + "nnx", "ngv", + "nts", "pij", + "oun", "vaj", + "pcr", "adx", + "pmc", "huw", + "pmu", "phr", + "ppa", "bfy", + "ppr", "lcq", + "pry", "prt", + "puz", "pub", + "sca", "hle", + "skk", "oyb", + "tdu", "dtp", + "thc", "tpo", + "thx", "oyb", + "tie", "ras", + "tkk", "twm", + "tlw", "weo", + "tmp", "tyj", + "tne", "kak", + "tnf", "prs", + "tsf", "taj", + "uok", "ema", + "xba", "cax", + "xia", "acn", + "xkh", "waw", + "xsj", "suj", + "ybd", "rki", + "yma", "lrr", + "ymt", "mtm", + "yos", "zom", + "yuu", "yug", +}; + +/* + Updated on 2018-04-24 from + + curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \ + grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \ + grep -B1 'Preferred' | \ + awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' +*/ +static const char DEPRECATEDREGIONS[][3] = { +/* deprecated new */ + "BU", "MM", + "DD", "DE", + "FX", "FR", + "TP", "TL", + "YD", "YE", + "ZR", "CD", }; /* @@ -172,6 +347,46 @@ static const char* ultag_getGrandfathered(const ULanguageTag* langtag); #endif +namespace { + +// Helper class to memory manage CharString objects. +// Only ever stack-allocated, does not need to inherit UMemory. +class CharStringPool { +public: + CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {} + ~CharStringPool() = default; + + CharStringPool(const CharStringPool&) = delete; + CharStringPool& operator=(const CharStringPool&) = delete; + + icu::CharString* create() { + if (U_FAILURE(status)) { + return nullptr; + } + icu::CharString* const obj = new icu::CharString; + if (obj == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + pool.addElement(obj, status); + if (U_FAILURE(status)) { + delete obj; + return nullptr; + } + return obj; + } + +private: + static void U_CALLCONV deleter(void* obj) { + delete static_cast(obj); + } + + UErrorCode status; + icu::UVector pool; +}; + +} // namespace + /* * ------------------------------------------------- * @@ -675,6 +890,11 @@ _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capac } else { /* resolve deprecated */ for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { + // 2-letter deprecated subtags are listede before 3-letter + // ones in DEPRECATEDLANGS[]. Get out of loop on coming + // across the 1st 3-letter subtag, if the input is a 2-letter code. + // to avoid continuing to try when there's no match. + if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break; if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); len = (int32_t)uprv_strlen(buf); @@ -721,7 +941,6 @@ _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacit *(appendAt + reslen) = SEP; } reslen++; - if (reslen < capacity) { uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); } @@ -763,6 +982,14 @@ _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacit *(appendAt + reslen) = SEP; } reslen++; + /* resolve deprecated */ + for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) { + if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) { + uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]); + len = (int32_t)uprv_strlen(buf); + break; + } + } if (reslen < capacity) { uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); @@ -900,7 +1127,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac static int32_t _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { - char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; int32_t attrBufLength = 0; UEnumeration *keywordEnum = NULL; @@ -920,22 +1146,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac AttributeListEntry *firstAttr = NULL; AttributeListEntry *attr; char *attrValue; - char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - char *pExtBuf = extBuf; - int32_t extBufCapacity = sizeof(extBuf); + CharStringPool extBufPool; const char *bcpKey=nullptr, *bcpValue=nullptr; UErrorCode tmpStatus = U_ZERO_ERROR; int32_t keylen; UBool isBcpUExt; while (TRUE) { + icu::CharString buf; key = uenum_next(keywordEnum, NULL, status); if (key == NULL) { break; } - len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); - /* buf must be null-terminated */ - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + char* buffer; + int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY; + + for (;;) { + buffer = buf.getAppendBuffer( + /*minCapacity=*/resultCapacity, + /*desiredCapacityHint=*/resultCapacity, + resultCapacity, + tmpStatus); + + if (U_FAILURE(tmpStatus)) { + break; + } + + len = uloc_getKeywordValue( + localeID, key, buffer, resultCapacity, &tmpStatus); + + if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + resultCapacity = len; + tmpStatus = U_ZERO_ERROR; + } + + if (U_FAILURE(tmpStatus)) { + if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } if (strict) { *status = U_ILLEGAL_ARGUMENT_ERROR; break; @@ -945,6 +1197,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac continue; } + buf.append(buffer, len, tmpStatus); + if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. + } + keylen = (int32_t)uprv_strlen(key); isBcpUExt = (keylen > 1); @@ -1007,7 +1264,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac } /* we've checked buf is null-terminated above */ - bcpValue = uloc_toUnicodeLocaleType(key, buf); + bcpValue = uloc_toUnicodeLocaleType(key, buf.data()); if (bcpValue == NULL) { if (strict) { *status = U_ILLEGAL_ARGUMENT_ERROR; @@ -1015,33 +1272,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac } continue; } - if (bcpValue == buf) { + if (bcpValue == buf.data()) { /* When uloc_toUnicodeLocaleType(key, buf) returns the input value as is, the value is well-formed, but has no known mapping. This implementation normalizes the - the value to lower case + value to lower case */ + icu::CharString* extBuf = extBufPool.create(); + if (extBuf == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + break; + } int32_t bcpValueLen = static_cast(uprv_strlen(bcpValue)); - if (bcpValueLen < extBufCapacity) { - uprv_strcpy(pExtBuf, bcpValue); - T_CString_toLowerCase(pExtBuf); + int32_t resultCapacity; + char* pExtBuf = extBuf->getAppendBuffer( + /*minCapacity=*/bcpValueLen, + /*desiredCapacityHint=*/bcpValueLen, + resultCapacity, + tmpStatus); + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + break; + } - bcpValue = pExtBuf; + uprv_strcpy(pExtBuf, bcpValue); + T_CString_toLowerCase(pExtBuf); - pExtBuf += (bcpValueLen + 1); - extBufCapacity -= (bcpValueLen + 1); - } else { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; + extBuf->append(pExtBuf, bcpValueLen, tmpStatus); + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + break; } + + bcpValue = extBuf->data(); } } else { if (*key == PRIVATEUSE) { - if (!_isPrivateuseValueSubtags(buf, len)) { + if (!_isPrivateuseValueSubtags(buf.data(), len)) { if (strict) { *status = U_ILLEGAL_ARGUMENT_ERROR; break; @@ -1049,7 +1317,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac continue; } } else { - if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { + if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) { if (strict) { *status = U_ILLEGAL_ARGUMENT_ERROR; break; @@ -1058,20 +1326,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac } } bcpKey = key; - if ((len + 1) < extBufCapacity) { - uprv_memcpy(pExtBuf, buf, len); - bcpValue = pExtBuf; - - pExtBuf += len; - - *pExtBuf = 0; - pExtBuf++; - - extBufCapacity -= (len + 1); - } else { - *status = U_ILLEGAL_ARGUMENT_ERROR; + icu::CharString* extBuf = extBufPool.create(); + if (extBuf == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; break; } + extBuf->append(buf.data(), len, tmpStatus); + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + break; + } + bcpValue = extBuf->data(); } /* create ExtensionListEntry */ @@ -1242,6 +1507,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT attrBufIdx += (len + 1); } else { *status = U_ILLEGAL_ARGUMENT_ERROR; + uprv_free(attr); goto cleanup; } @@ -1460,9 +1726,9 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT kwd->value = pType; if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; + // duplicate keyword is allowed, Only the first + // is honored. uprv_free(kwd); - goto cleanup; } } @@ -1836,7 +2102,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta } /* check if the tag is grandfathered */ - for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { + for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) { if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { int32_t newTagLength; @@ -1858,6 +2124,37 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta } } + size_t parsedLenDelta = 0; + if (grandfatheredLen == 0) { + for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) { + const char* redundantTag = REDUNDANT[i]; + size_t redundantTagLen = uprv_strlen(redundantTag); + // The preferred tag for a redundant tag is always shorter than redundant + // tag. A redundant tag may or may not be followed by other subtags. + // (i.e. "zh-yue" or "zh-yue-u-co-pinyin"). + if (uprv_strnicmp(redundantTag, tagBuf, static_cast(redundantTagLen)) == 0) { + const char* redundantTagEnd = tagBuf + redundantTagLen; + if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) { + const char* preferredTag = REDUNDANT[i + 1]; + size_t preferredTagLen = uprv_strlen(preferredTag); + uprv_strncpy(t->buf, preferredTag, preferredTagLen); + if (*redundantTagEnd == SEP) { + uprv_memmove(tagBuf + preferredTagLen, + redundantTagEnd, + tagLen - redundantTagLen + 1); + } else { + tagBuf[preferredTagLen] = '\0'; + } + // parsedLen should be the length of the input + // before redundantTag is replaced by preferredTag. + // Save the delta to add it back later. + parsedLenDelta = redundantTagLen - preferredTagLen; + break; + } + } + } + } + /* * langtag = language * ["-" script] @@ -1898,10 +2195,13 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta if (next & LANG) { if (_isLanguageSubtag(pSubtag, subtagLen)) { *pSep = 0; /* terminate */ + // TODO: move deprecated language code handling here. t->language = T_CString_toLowerCase(pSubtag); pLastGoodPosition = pSep; - next = EXTL | SCRT | REGN | VART | EXTS | PRIV; + next = SCRT | REGN | VART | EXTS | PRIV; + if (subtagLen <= 3) + next |= EXTL; continue; } } @@ -1942,6 +2242,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta if (next & REGN) { if (_isRegionSubtag(pSubtag, subtagLen)) { *pSep = 0; + // TODO: move deprecated region code handling here. t->region = T_CString_toUpperCase(pSubtag); pLastGoodPosition = pSep; @@ -2035,7 +2336,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta } } if (next & PRIV) { - if (uprv_tolower(*pSubtag) == PRIVATEUSE) { + if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) { char *pPrivuseVal; if (pExtension != NULL) { @@ -2138,7 +2439,8 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta } if (parsedLen != NULL) { - *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); + *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : + (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta); } return t; @@ -2335,31 +2637,66 @@ uloc_toLanguageTag(const char* localeID, int32_t langtagCapacity, UBool strict, UErrorCode* status) { - /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ - char canonical[256]; - int32_t reslen = 0; + icu::CharString canonical; + int32_t reslen; UErrorCode tmpStatus = U_ZERO_ERROR; UBool hadPosix = FALSE; const char* pKeywordStart; /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ - canonical[0] = 0; - if (uprv_strlen(localeID) > 0) { - uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); - if (tmpStatus != U_ZERO_ERROR) { + int32_t resultCapacity = static_cast(uprv_strlen(localeID)); + if (resultCapacity > 0) { + char* buffer; + + for (;;) { + buffer = canonical.getAppendBuffer( + /*minCapacity=*/resultCapacity, + /*desiredCapacityHint=*/resultCapacity, + resultCapacity, + tmpStatus); + + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + return 0; + } + + reslen = + uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus); + + if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { + break; + } + + resultCapacity = reslen; + tmpStatus = U_ZERO_ERROR; + } + + if (U_FAILURE(tmpStatus)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } + + canonical.append(buffer, reslen, tmpStatus); + if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { + tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. + } + + if (U_FAILURE(tmpStatus)) { + *status = tmpStatus; + return 0; + } } + reslen = 0; + /* For handling special case - private use only tag */ - pKeywordStart = locale_getKeywordsStart(canonical); - if (pKeywordStart == canonical) { + pKeywordStart = locale_getKeywordsStart(canonical.data()); + if (pKeywordStart == canonical.data()) { UEnumeration *kwdEnum; int kwdCnt = 0; UBool done = FALSE; - kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); + kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus); if (kwdEnum != NULL) { kwdCnt = uenum_count(kwdEnum, &tmpStatus); if (kwdCnt == 1) { @@ -2397,12 +2734,12 @@ uloc_toLanguageTag(const char* localeID, } } - reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); - reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); - reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); - reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); + reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status); + reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status); + reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status); + reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); + reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); + reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); return reslen; } @@ -2414,6 +2751,23 @@ uloc_forLanguageTag(const char* langtag, int32_t localeIDCapacity, int32_t* parsedLength, UErrorCode* status) { + return ulocimp_forLanguageTag( + langtag, + -1, + localeID, + localeIDCapacity, + parsedLength, + status); +} + + +U_CAPI int32_t U_EXPORT2 +ulocimp_forLanguageTag(const char* langtag, + int32_t tagLen, + char* localeID, + int32_t localeIDCapacity, + int32_t* parsedLength, + UErrorCode* status) { ULanguageTag *lt; int32_t reslen = 0; const char *subtag, *p; @@ -2421,7 +2775,7 @@ uloc_forLanguageTag(const char* langtag, int32_t i, n; UBool noRegion = TRUE; - lt = ultag_parse(langtag, -1, parsedLength, status); + lt = ultag_parse(langtag, tagLen, parsedLength, status); if (U_FAILURE(*status)) { return 0; } diff --git a/deps/icu-small/source/common/ulocimp.h b/deps/icu-small/source/common/ulocimp.h index 855f9235dc636f..6dd8e33e09c88c 100644 --- a/deps/icu-small/source/common/ulocimp.h +++ b/deps/icu-small/source/common/ulocimp.h @@ -61,6 +61,38 @@ ulocimp_getCountry(const char *localeID, char *country, int32_t countryCapacity, const char **pEnd); +/** + * Returns a locale ID for the specified BCP47 language tag string. + * If the specified language tag contains any ill-formed subtags, + * the first such subtag and all following subtags are ignored. + *

+ * This implements the 'Language-Tag' production of BCP47, and so + * supports grandfathered (regular and irregular) as well as private + * use language tags. Private use tags are represented as 'x-whatever', + * and grandfathered tags are converted to their canonical replacements + * where they exist. Note that a few grandfathered tags have no modern + * replacement, these will be converted using the fallback described in + * the first paragraph, so some information might be lost. + * @param langtag the input BCP47 language tag. + * @param tagLen the length of langtag, or -1 to call uprv_strlen(). + * @param localeID the output buffer receiving a locale ID for the + * specified BCP47 language tag. + * @param localeIDCapacity the size of the locale ID output buffer. + * @param parsedLength if not NULL, successfully parsed length + * for the input language tag is set. + * @param err error information if receiving the locald ID + * failed. + * @return the length of the locale ID. + * @internal ICU 63 + */ +U_CAPI int32_t U_EXPORT2 +ulocimp_forLanguageTag(const char* langtag, + int32_t tagLen, + char* localeID, + int32_t localeIDCapacity, + int32_t* parsedLength, + UErrorCode* err); + /** * Get the region to use for supplemental data lookup. Uses * (1) any region specified by locale tag "rg"; if none then diff --git a/deps/icu-small/source/common/umapfile.cpp b/deps/icu-small/source/common/umapfile.cpp index 5084913cbf742a..a32573bbf70c95 100644 --- a/deps/icu-small/source/common/umapfile.cpp +++ b/deps/icu-small/source/common/umapfile.cpp @@ -22,6 +22,7 @@ #include "uposixdefs.h" #include "unicode/putil.h" +#include "unicode/ustring.h" #include "udatamem.h" #include "umapfile.h" @@ -64,7 +65,7 @@ # include "unicode/udata.h" # define LIB_PREFIX "lib" # define LIB_SUFFIX ".dll" - /* This is inconvienient until we figure out what to do with U_ICUDATA_NAME in utypes.h */ + /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */ # define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat" # endif #elif MAP_IMPLEMENTATION==MAP_STDIO @@ -84,7 +85,10 @@ *----------------------------------------------------------------------------*/ #if MAP_IMPLEMENTATION==MAP_NONE U_CFUNC UBool - uprv_mapFile(UDataMemory *pData, const char *path) { + uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { + if (U_FAILURE(*status)) { + return FALSE; + } UDataMemory_init(pData); /* Clear the output struct. */ return FALSE; /* no file access */ } @@ -97,12 +101,17 @@ uprv_mapFile( UDataMemory *pData, /* Fill in with info on the result doing the mapping. */ /* Output only; any original contents are cleared. */ - const char *path /* File path to be opened/mapped */ + const char *path, /* File path to be opened/mapped. */ + UErrorCode *status /* Error status, used to report out-of-memory errors. */ ) { HANDLE map; HANDLE file; + if (U_FAILURE(*status)) { + return FALSE; + } + UDataMemory_init(pData); /* Clear the output struct. */ /* open the input file */ @@ -111,28 +120,29 @@ OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, NULL); #else - // First we need to go from char to UTF-16 - // u_UCharsToChars could work but it requires length. - WCHAR utf16Path[MAX_PATH]; - int32_t i; - for (i = 0; i < UPRV_LENGTHOF(utf16Path); i++) - { - utf16Path[i] = path[i]; - if (path[i] == '\0') - { - break; - } + // Convert from UTF-8 string to UTF-16 string. + wchar_t utf16Path[MAX_PATH]; + int32_t pathUtf16Len = 0; + u_strFromUTF8(reinterpret_cast(utf16Path), static_cast(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status); + + if (U_FAILURE(*status)) { + return FALSE; } - if (i >= UPRV_LENGTHOF(utf16Path)) - { - // Ran out of room, unlikely but be safe - utf16Path[UPRV_LENGTHOF(utf16Path) - 1] = '\0'; + if (*status == U_STRING_NOT_TERMINATED_WARNING) { + // Report back an error instead of a warning. + *status = U_BUFFER_OVERFLOW_ERROR; + return FALSE; } // TODO: Is it worth setting extended parameters to specify random access? file = CreateFile2(utf16Path, GENERIC_READ, FILE_SHARE_READ, OPEN_EXISTING, NULL); #endif - if(file==INVALID_HANDLE_VALUE) { + if (file == INVALID_HANDLE_VALUE) { + // If we failed to open the file due to an out-of-memory error, then we want + // to report that error back to the caller. + if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { + *status = U_MEMORY_ALLOCATION_ERROR; + } return FALSE; } @@ -165,7 +175,12 @@ map = CreateFileMappingFromApp(file, NULL, PAGE_READONLY, 0, NULL); #endif CloseHandle(file); - if(map==NULL) { + if (map == NULL) { + // If we failed to create the mapping due to an out-of-memory error, then + // we want to report that error back to the caller. + if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) { + *status = U_MEMORY_ALLOCATION_ERROR; + } return FALSE; } @@ -193,12 +208,16 @@ #elif MAP_IMPLEMENTATION==MAP_POSIX U_CFUNC UBool - uprv_mapFile(UDataMemory *pData, const char *path) { + uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { int fd; int length; struct stat mystat; void *data; + if (U_FAILURE(*status)) { + return FALSE; + } + UDataMemory_init(pData); /* Clear the output struct. */ /* determine the length of the file */ @@ -221,6 +240,7 @@ #endif close(fd); /* no longer needed */ if(data==MAP_FAILED) { + // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? return FALSE; } @@ -263,11 +283,15 @@ } U_CFUNC UBool - uprv_mapFile(UDataMemory *pData, const char *path) { + uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { FILE *file; int32_t fileLength; void *p; + if (U_FAILURE(*status)) { + return FALSE; + } + UDataMemory_init(pData); /* Clear the output struct. */ /* open the input file */ file=fopen(path, "rb"); @@ -286,6 +310,7 @@ p=uprv_malloc(fileLength); if(p==NULL) { fclose(file); + *status = U_MEMORY_ALLOCATION_ERROR; return FALSE; } @@ -351,7 +376,7 @@ * * TODO: This works the way ICU historically has, but the * whole data fallback search path is so complicated that - * proabably almost no one will ever really understand it, + * probably almost no one will ever really understand it, * the potential for confusion is large. (It's not just * this one function, but the whole scheme.) * @@ -391,7 +416,7 @@ # define DATA_TYPE "dat" - U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path) { + U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) { const char *inBasename; char *basename; char pathBuffer[1024]; @@ -399,6 +424,10 @@ dllhandle *handle; void *val=0; + if (U_FAILURE(*status)) { + return FALSE; + } + inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR); if(inBasename==NULL) { inBasename = path; @@ -430,6 +459,7 @@ data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); /* no longer needed */ if(data==MAP_FAILED) { + // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR? return FALSE; } pData->map = (char *)data + length; diff --git a/deps/icu-small/source/common/umapfile.h b/deps/icu-small/source/common/umapfile.h index 24e476b11e93d0..92bd567a2a9895 100644 --- a/deps/icu-small/source/common/umapfile.h +++ b/deps/icu-small/source/common/umapfile.h @@ -29,7 +29,7 @@ #include "unicode/udata.h" #include "putilimp.h" -U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path); +U_CFUNC UBool uprv_mapFile(UDataMemory *pdm, const char *path, UErrorCode *status); U_CFUNC void uprv_unmapFile(UDataMemory *pData); /* MAP_NONE: no memory mapping, no file access at all */ diff --git a/deps/icu-small/source/common/umutablecptrie.cpp b/deps/icu-small/source/common/umutablecptrie.cpp new file mode 100644 index 00000000000000..40af4b6c16a163 --- /dev/null +++ b/deps/icu-small/source/common/umutablecptrie.cpp @@ -0,0 +1,1678 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// umutablecptrie.cpp (inspired by utrie2_builder.cpp) +// created: 2017dec29 Markus W. Scherer + +// #define UCPTRIE_DEBUG +#ifdef UCPTRIE_DEBUG +# include +#endif + +#include "unicode/utypes.h" +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "unicode/uobject.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "uassert.h" +#include "ucptrie_impl.h" + +U_NAMESPACE_BEGIN + +namespace { + +constexpr int32_t MAX_UNICODE = 0x10ffff; + +constexpr int32_t UNICODE_LIMIT = 0x110000; +constexpr int32_t BMP_LIMIT = 0x10000; +constexpr int32_t ASCII_LIMIT = 0x80; + +constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3; +constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3; +constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3; + +constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3)); + +// Flag values for data blocks. +constexpr uint8_t ALL_SAME = 0; +constexpr uint8_t MIXED = 1; +constexpr uint8_t SAME_AS = 2; + +/** Start with allocation of 16k data entries. */ +constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14); + +/** Grow about 8x each time. */ +constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17); + +/** + * Maximum length of the build-time data array. + * One entry per 0x110000 code points. + */ +constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT; + +// Flag values for index-3 blocks while compacting/building. +constexpr uint8_t I3_NULL = 0; +constexpr uint8_t I3_BMP = 1; +constexpr uint8_t I3_16 = 2; +constexpr uint8_t I3_18 = 3; + +constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8; + +class AllSameBlocks; + +class MutableCodePointTrie : public UMemory { +public: + MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode); + MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode); + MutableCodePointTrie(const MutableCodePointTrie &other) = delete; + ~MutableCodePointTrie(); + + MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete; + + static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode); + static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode); + + uint32_t get(UChar32 c) const; + int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context, + uint32_t *pValue) const; + + void set(UChar32 c, uint32_t value, UErrorCode &errorCode); + void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode); + + UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode); + +private: + void clear(); + + bool ensureHighStart(UChar32 c); + int32_t allocDataBlock(int32_t blockLength); + int32_t getDataBlock(int32_t i); + + void maskValues(uint32_t mask); + UChar32 findHighStart() const; + int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks); + int32_t compactData(int32_t fastILimit, uint32_t *newData, int32_t dataNullIndex); + int32_t compactIndex(int32_t fastILimit, UErrorCode &errorCode); + int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode); + + uint32_t *index = nullptr; + int32_t indexCapacity = 0; + int32_t index3NullOffset = -1; + uint32_t *data = nullptr; + int32_t dataCapacity = 0; + int32_t dataLength = 0; + int32_t dataNullOffset = -1; + + uint32_t origInitialValue; + uint32_t initialValue; + uint32_t errorValue; + UChar32 highStart; + uint32_t highValue; +#ifdef UCPTRIE_DEBUG +public: + const char *name; +#endif +private: + /** Temporary array while building the final data. */ + uint16_t *index16 = nullptr; + uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3]; +}; + +MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) : + origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue), + highStart(0), highValue(initialValue) +#ifdef UCPTRIE_DEBUG + , name("open") +#endif + { + if (U_FAILURE(errorCode)) { return; } + index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4); + data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4); + if (index == nullptr || data == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + indexCapacity = BMP_I_LIMIT; + dataCapacity = INITIAL_DATA_LENGTH; +} + +MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) : + index3NullOffset(other.index3NullOffset), + dataNullOffset(other.dataNullOffset), + origInitialValue(other.origInitialValue), initialValue(other.initialValue), + errorValue(other.errorValue), + highStart(other.highStart), highValue(other.highValue) +#ifdef UCPTRIE_DEBUG + , name("mutable clone") +#endif + { + if (U_FAILURE(errorCode)) { return; } + int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT; + index = (uint32_t *)uprv_malloc(iCapacity * 4); + data = (uint32_t *)uprv_malloc(other.dataCapacity * 4); + if (index == nullptr || data == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + indexCapacity = iCapacity; + dataCapacity = other.dataCapacity; + + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + uprv_memcpy(flags, other.flags, iLimit); + uprv_memcpy(index, other.index, iLimit * 4); + uprv_memcpy(data, other.data, (size_t)other.dataLength * 4); + dataLength = other.dataLength; + U_ASSERT(other.index16 == nullptr); +} + +MutableCodePointTrie::~MutableCodePointTrie() { + uprv_free(index); + uprv_free(data); + uprv_free(index16); +} + +MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) { + // Use the highValue as the initialValue to reduce the highStart. + uint32_t errorValue = ucpmap_get(map, -1); + uint32_t initialValue = ucpmap_get(map, 0x10ffff); + LocalPointer mutableTrie( + new MutableCodePointTrie(initialValue, errorValue, errorCode), + errorCode); + if (U_FAILURE(errorCode)) { + return nullptr; + } + UChar32 start = 0, end; + uint32_t value; + while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, + nullptr, nullptr, &value)) >= 0) { + if (value != initialValue) { + if (start == end) { + mutableTrie->set(start, value, errorCode); + } else { + mutableTrie->setRange(start, end, value, errorCode); + } + } + start = end + 1; + } + if (U_SUCCESS(errorCode)) { + return mutableTrie.orphan(); + } else { + return nullptr; + } +} + +MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) { + // Use the highValue as the initialValue to reduce the highStart. + uint32_t errorValue; + uint32_t initialValue; + switch (trie->valueWidth) { + case UCPTRIE_VALUE_BITS_16: + errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; + initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; + break; + case UCPTRIE_VALUE_BITS_32: + errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; + initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; + break; + case UCPTRIE_VALUE_BITS_8: + errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET]; + initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET]; + break; + default: + // Unreachable if the trie is properly initialized. + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + LocalPointer mutableTrie( + new MutableCodePointTrie(initialValue, errorValue, errorCode), + errorCode); + if (U_FAILURE(errorCode)) { + return nullptr; + } + UChar32 start = 0, end; + uint32_t value; + while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, + nullptr, nullptr, &value)) >= 0) { + if (value != initialValue) { + if (start == end) { + mutableTrie->set(start, value, errorCode); + } else { + mutableTrie->setRange(start, end, value, errorCode); + } + } + start = end + 1; + } + if (U_SUCCESS(errorCode)) { + return mutableTrie.orphan(); + } else { + return nullptr; + } +} + +void MutableCodePointTrie::clear() { + index3NullOffset = dataNullOffset = -1; + dataLength = 0; + highValue = initialValue = origInitialValue; + highStart = 0; + uprv_free(index16); + index16 = nullptr; +} + +uint32_t MutableCodePointTrie::get(UChar32 c) const { + if ((uint32_t)c > MAX_UNICODE) { + return errorValue; + } + if (c >= highStart) { + return highValue; + } + int32_t i = c >> UCPTRIE_SHIFT_3; + if (flags[i] == ALL_SAME) { + return index[i]; + } else { + return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)]; + } +} + +inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue, + UCPMapValueFilter *filter, const void *context) { + if (value == initialValue) { + value = nullValue; + } else if (filter != nullptr) { + value = filter(context, value); + } + return value; +} + +UChar32 MutableCodePointTrie::getRange( + UChar32 start, UCPMapValueFilter *filter, const void *context, + uint32_t *pValue) const { + if ((uint32_t)start > MAX_UNICODE) { + return U_SENTINEL; + } + if (start >= highStart) { + if (pValue != nullptr) { + uint32_t value = highValue; + if (filter != nullptr) { value = filter(context, value); } + *pValue = value; + } + return MAX_UNICODE; + } + uint32_t nullValue = initialValue; + if (filter != nullptr) { nullValue = filter(context, nullValue); } + UChar32 c = start; + uint32_t value; + bool haveValue = false; + int32_t i = c >> UCPTRIE_SHIFT_3; + do { + if (flags[i] == ALL_SAME) { + uint32_t value2 = maybeFilterValue(index[i], initialValue, nullValue, + filter, context); + if (haveValue) { + if (value2 != value) { + return c - 1; + } + } else { + value = value2; + if (pValue != nullptr) { *pValue = value; } + haveValue = true; + } + c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK; + } else /* MIXED */ { + int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK); + uint32_t value2 = maybeFilterValue(data[di], initialValue, nullValue, + filter, context); + if (haveValue) { + if (value2 != value) { + return c - 1; + } + } else { + value = value2; + if (pValue != nullptr) { *pValue = value; } + haveValue = true; + } + while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) { + if (maybeFilterValue(data[++di], initialValue, nullValue, + filter, context) != value) { + return c - 1; + } + } + } + ++i; + } while (c < highStart); + U_ASSERT(haveValue); + if (maybeFilterValue(highValue, initialValue, nullValue, + filter, context) != value) { + return c - 1; + } else { + return MAX_UNICODE; + } +} + +void +writeBlock(uint32_t *block, uint32_t value) { + uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + while (block < limit) { + *block++ = value; + } +} + +bool MutableCodePointTrie::ensureHighStart(UChar32 c) { + if (c >= highStart) { + // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction. + c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); + int32_t i = highStart >> UCPTRIE_SHIFT_3; + int32_t iLimit = c >> UCPTRIE_SHIFT_3; + if (iLimit > indexCapacity) { + uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4); + if (newIndex == nullptr) { return false; } + uprv_memcpy(newIndex, index, i * 4); + uprv_free(index); + index = newIndex; + indexCapacity = I_LIMIT; + } + do { + flags[i] = ALL_SAME; + index[i] = initialValue; + } while(++i < iLimit); + highStart = c; + } + return true; +} + +int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) { + int32_t newBlock = dataLength; + int32_t newTop = newBlock + blockLength; + if (newTop > dataCapacity) { + int32_t capacity; + if (dataCapacity < MEDIUM_DATA_LENGTH) { + capacity = MEDIUM_DATA_LENGTH; + } else if (dataCapacity < MAX_DATA_LENGTH) { + capacity = MAX_DATA_LENGTH; + } else { + // Should never occur. + // Either MAX_DATA_LENGTH is incorrect, + // or the code writes more values than should be possible. + return -1; + } + uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4); + if (newData == nullptr) { + return -1; + } + uprv_memcpy(newData, data, (size_t)dataLength * 4); + uprv_free(data); + data = newData; + dataCapacity = capacity; + } + dataLength = newTop; + return newBlock; +} + +/** + * No error checking for illegal arguments. + * + * @return -1 if no new data block available (out of memory in data array) + * @internal + */ +int32_t MutableCodePointTrie::getDataBlock(int32_t i) { + if (flags[i] == MIXED) { + return index[i]; + } + if (i < BMP_I_LIMIT) { + int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH); + if (newBlock < 0) { return newBlock; } + int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1); + int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + do { + U_ASSERT(flags[iStart] == ALL_SAME); + writeBlock(data + newBlock, index[iStart]); + flags[iStart] = MIXED; + index[iStart++] = newBlock; + newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + } while (iStart < iLimit); + return index[i]; + } else { + int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH); + if (newBlock < 0) { return newBlock; } + writeBlock(data + newBlock, index[i]); + flags[i] = MIXED; + index[i] = newBlock; + return newBlock; + } +} + +void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + if ((uint32_t)c > MAX_UNICODE) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + int32_t block; + if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value; +} + +void +fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) { + uint32_t *pLimit = block + limit; + block += start; + while (block < pLimit) { + *block++ = value; + } +} + +void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return; + } + if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (!ensureHighStart(end)) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + UChar32 limit = end + 1; + if (start & UCPTRIE_SMALL_DATA_MASK) { + // Set partial block at [start..following block boundary[. + int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); + if (block < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK; + if (nextStart <= limit) { + fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, + value); + start = nextStart; + } else { + fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK, + value); + return; + } + } + + // Number of positions in the last, partial block. + int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK; + + // Round down limit to a block boundary. + limit &= ~UCPTRIE_SMALL_DATA_MASK; + + // Iterate over all-value blocks. + while (start < limit) { + int32_t i = start >> UCPTRIE_SHIFT_3; + if (flags[i] == ALL_SAME) { + index[i] = value; + } else /* MIXED */ { + fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value); + } + start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + } + + if (rest > 0) { + // Set partial block at [last block boundary..limit[. + int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3); + if (block < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + fillBlock(data + block, 0, rest, value); + } +} + +/* compaction --------------------------------------------------------------- */ + +void MutableCodePointTrie::maskValues(uint32_t mask) { + initialValue &= mask; + errorValue &= mask; + highValue &= mask; + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + for (int32_t i = 0; i < iLimit; ++i) { + if (flags[i] == ALL_SAME) { + index[i] &= mask; + } + } + for (int32_t i = 0; i < dataLength; ++i) { + data[i] &= mask; + } +} + +inline bool +equalBlocks(const uint32_t *s, const uint32_t *t, int32_t length) { + while (length > 0 && *s == *t) { + ++s; + ++t; + --length; + } + return length == 0; +} + +inline bool +equalBlocks(const uint16_t *s, const uint32_t *t, int32_t length) { + while (length > 0 && *s == *t) { + ++s; + ++t; + --length; + } + return length == 0; +} + +inline bool +equalBlocks(const uint16_t *s, const uint16_t *t, int32_t length) { + while (length > 0 && *s == *t) { + ++s; + ++t; + --length; + } + return length == 0; +} + +bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) { + const uint32_t *pLimit = p + length; + while (p < pLimit && *p == value) { ++p; } + return p == pLimit; +} + +/** Search for an identical block. */ +int32_t findSameBlock(const uint32_t *p, int32_t pStart, int32_t length, + const uint32_t *q, int32_t qStart, int32_t blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + q += qStart; + while (pStart <= length) { + if (equalBlocks(p + pStart, q, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; +} + +int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, + const uint32_t *q, int32_t qStart, int32_t blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + q += qStart; + while (pStart <= length) { + if (equalBlocks(p + pStart, q, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; +} + +int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length, + const uint16_t *q, int32_t qStart, int32_t blockLength) { + // Ensure that we do not even partially get past length. + length -= blockLength; + + q += qStart; + while (pStart <= length) { + if (equalBlocks(p + pStart, q, blockLength)) { + return pStart; + } + ++pStart; + } + return -1; +} + +int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit, + uint32_t value, int32_t blockLength) { + // Ensure that we do not even partially get past limit. + limit -= blockLength; + + for (int32_t block = start; block <= limit; ++block) { + if (p[block] == value) { + for (int32_t i = 1;; ++i) { + if (i == blockLength) { + return block; + } + if (p[block + i] != value) { + block += i; + break; + } + } + } + } + return -1; +} + +/** + * Look for maximum overlap of the beginning of the other block + * with the previous, adjacent block. + */ +int32_t getOverlap(const uint32_t *p, int32_t length, + const uint32_t *q, int32_t qStart, int32_t blockLength) { + int32_t overlap = blockLength - 1; + U_ASSERT(overlap <= length); + q += qStart; + while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { + --overlap; + } + return overlap; +} + +int32_t getOverlap(const uint16_t *p, int32_t length, + const uint32_t *q, int32_t qStart, int32_t blockLength) { + int32_t overlap = blockLength - 1; + U_ASSERT(overlap <= length); + q += qStart; + while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { + --overlap; + } + return overlap; +} + +int32_t getOverlap(const uint16_t *p, int32_t length, + const uint16_t *q, int32_t qStart, int32_t blockLength) { + int32_t overlap = blockLength - 1; + U_ASSERT(overlap <= length); + q += qStart; + while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { + --overlap; + } + return overlap; +} + +int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value, + int32_t blockLength) { + int32_t min = length - (blockLength - 1); + int32_t i = length; + while (min < i && p[i - 1] == value) { --i; } + return length - i; +} + +bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) { + for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { + if (index[i] == dataOffset) { + return true; + } + } + return false; +} + +/** + * Finds the start of the last range in the trie by enumerating backward. + * Indexes for code points higher than this will be omitted. + */ +UChar32 MutableCodePointTrie::findHighStart() const { + int32_t i = highStart >> UCPTRIE_SHIFT_3; + while (i > 0) { + bool match; + if (flags[--i] == ALL_SAME) { + match = index[i] == highValue; + } else /* MIXED */ { + const uint32_t *p = data + index[i]; + for (int32_t j = 0;; ++j) { + if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) { + match = true; + break; + } + if (p[j] != highValue) { + match = false; + break; + } + } + } + if (!match) { + return (i + 1) << UCPTRIE_SHIFT_3; + } + } + return 0; +} + +class AllSameBlocks { +public: + static constexpr int32_t NEW_UNIQUE = -1; + static constexpr int32_t OVERFLOW = -2; + + AllSameBlocks() : length(0), mostRecent(-1) {} + + int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) { + if (mostRecent >= 0 && values[mostRecent] == value) { + refCounts[mostRecent] += count; + return indexes[mostRecent]; + } + for (int32_t i = 0; i < length; ++i) { + if (values[i] == value) { + mostRecent = i; + refCounts[i] += count; + return indexes[i]; + } + } + if (length == CAPACITY) { + return OVERFLOW; + } + mostRecent = length; + indexes[length] = index; + values[length] = value; + refCounts[length++] = count; + return NEW_UNIQUE; + } + + /** Replaces the block which has the lowest reference count. */ + void add(int32_t index, int32_t count, uint32_t value) { + U_ASSERT(length == CAPACITY); + int32_t least = -1; + int32_t leastCount = I_LIMIT; + for (int32_t i = 0; i < length; ++i) { + U_ASSERT(values[i] != value); + if (refCounts[i] < leastCount) { + least = i; + leastCount = refCounts[i]; + } + } + U_ASSERT(least >= 0); + mostRecent = least; + indexes[least] = index; + values[least] = value; + refCounts[least] = count; + } + + int32_t findMostUsed() const { + if (length == 0) { return -1; } + int32_t max = -1; + int32_t maxCount = 0; + for (int32_t i = 0; i < length; ++i) { + if (refCounts[i] > maxCount) { + max = i; + maxCount = refCounts[i]; + } + } + return indexes[max]; + } + +private: + static constexpr int32_t CAPACITY = 32; + + int32_t length; + int32_t mostRecent; + + int32_t indexes[CAPACITY]; + uint32_t values[CAPACITY]; + int32_t refCounts[CAPACITY]; +}; + +int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) { +#ifdef UCPTRIE_DEBUG + bool overflow = false; +#endif + + // ASCII data will be stored as a linear table, even if the following code + // does not yet count it that way. + int32_t newDataCapacity = ASCII_LIMIT; + // Add room for a small data null block in case it would match the start of + // a fast data block where dataNullOffset must not be set in that case. + newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + // Add room for special values (errorValue, highValue) and padding. + newDataCapacity += 4; + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; + int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int32_t i = 0; i < iLimit; i += inc) { + if (i == fastILimit) { + blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + inc = 1; + } + uint32_t value = index[i]; + if (flags[i] == MIXED) { + // Really mixed? + const uint32_t *p = data + value; + value = *p; + if (allValuesSameAs(p + 1, blockLength - 1, value)) { + flags[i] = ALL_SAME; + index[i] = value; + // Fall through to ALL_SAME handling. + } else { + newDataCapacity += blockLength; + continue; + } + } else { + U_ASSERT(flags[i] == ALL_SAME); + if (inc > 1) { + // Do all of the fast-range data block's ALL_SAME parts have the same value? + bool allSame = true; + int32_t next_i = i + inc; + for (int32_t j = i + 1; j < next_i; ++j) { + U_ASSERT(flags[j] == ALL_SAME); + if (index[j] != value) { + allSame = false; + break; + } + } + if (!allSame) { + // Turn it into a MIXED block. + if (getDataBlock(i) < 0) { + return -1; + } + newDataCapacity += blockLength; + continue; + } + } + } + // Is there another ALL_SAME block with the same value? + int32_t other = allSameBlocks.findOrAdd(i, inc, value); + if (other == AllSameBlocks::OVERFLOW) { + // The fixed-size array overflowed. Slow check for a duplicate block. +#ifdef UCPTRIE_DEBUG + if (!overflow) { + puts("UCPTrie AllSameBlocks overflow"); + overflow = true; + } +#endif + int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + for (int32_t j = 0;; j += jInc) { + if (j == i) { + allSameBlocks.add(i, inc, value); + break; + } + if (j == fastILimit) { + jInc = 1; + } + if (flags[j] == ALL_SAME && index[j] == value) { + allSameBlocks.add(j, jInc + inc, value); + other = j; + break; + // We could keep counting blocks with the same value + // before we add the first one, which may improve compaction in rare cases, + // but it would make it slower. + } + } + } + if (other >= 0) { + flags[i] = SAME_AS; + index[i] = other; + } else { + // New unique same-value block. + newDataCapacity += blockLength; + } + } + return newDataCapacity; +} + +#ifdef UCPTRIE_DEBUG +# define DEBUG_DO(expr) expr +#else +# define DEBUG_DO(expr) +#endif + +#ifdef UCPTRIE_DEBUG +// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF +int32_t appendValue(char s[], int32_t length, uint32_t value) { + value ^= value >> 16; + value ^= value >> 8; + s[length] = 0xE2; + s[length + 1] = (char)(0xA0 + ((value >> 6) & 3)); + s[length + 2] = (char)(0x80 + (value & 0x3F)); + return length + 3; +} + +void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value, + UChar32 start, int32_t overlap, uint32_t initialValue) { + char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3]; + int32_t length = 0; + int32_t i; + for (i = 0; i < overlap; ++i) { + length = appendValue(s, length, 0); // Braille blank + } + s[length++] = '|'; + for (; i < blockLength; ++i) { + if (block != nullptr) { + value = block[i]; + } + if (value == initialValue) { + value = 0x40; // Braille lower left dot + } + length = appendValue(s, length, value); + } + s[length] = 0; + start += overlap; + if (start <= 0xffff) { + printf(" %04lX %s|\n", (long)start, s); + } else if (start <= 0xfffff) { + printf(" %5lX %s|\n", (long)start, s); + } else { + printf(" %6lX %s|\n", (long)start, s); + } +} +#endif + +/** + * Compacts a build-time trie. + * + * The compaction + * - removes blocks that are identical with earlier ones + * - overlaps each new non-duplicate block as much as possible with the previously-written one + * - works with fast-range data blocks whose length is a multiple of that of + * higher-code-point data blocks + * + * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks. + */ +int32_t MutableCodePointTrie::compactData(int32_t fastILimit, + uint32_t *newData, int32_t dataNullIndex) { +#ifdef UCPTRIE_DEBUG + int32_t countSame=0, sumOverlaps=0; + bool printData = dataLength == 29088 /* line.brk */ || + // dataLength == 30048 /* CanonIterData */ || + dataLength == 50400 /* zh.txt~stroke */; +#endif + + // The linear ASCII data has been copied into newData already. + int32_t newDataLength = 0; + for (int32_t i = 0; newDataLength < ASCII_LIMIT; + newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { + index[i] = newDataLength; +#ifdef UCPTRIE_DEBUG + if (printData) { + printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue); + } +#endif + } + + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; + int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + int32_t fastLength = 0; + for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) { + if (i == fastILimit) { + blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + inc = 1; + fastLength = newDataLength; + } + if (flags[i] == ALL_SAME) { + uint32_t value = index[i]; + int32_t n; + // Find an earlier part of the data array of length blockLength + // that is filled with this value. + // If we find a match, and the current block is the data null block, + // and it is not a fast block but matches the start of a fast block, + // then we need to continue looking. + // This is because this small block is shorter than the fast block, + // and not all of the rest of the fast block is filled with this value. + // Otherwise trie.getRange() would detect that the fast block starts at + // dataNullOffset and assume incorrectly that it is filled with the null value. + for (int32_t start = 0; + (n = findAllSameBlock(newData, start, newDataLength, + value, blockLength)) >= 0 && + i == dataNullIndex && i >= fastILimit && n < fastLength && + isStartOfSomeFastBlock(n, index, fastILimit); + start = n + 1) {} + if (n >= 0) { + DEBUG_DO(++countSame); + index[i] = n; + } else { + n = getAllSameOverlap(newData, newDataLength, value, blockLength); + DEBUG_DO(sumOverlaps += n); +#ifdef UCPTRIE_DEBUG + if (printData) { + printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue); + } +#endif + index[i] = newDataLength - n; + while (n < blockLength) { + newData[newDataLength++] = value; + ++n; + } + } + } else if (flags[i] == MIXED) { + const uint32_t *block = data + index[i]; + int32_t n = findSameBlock(newData, 0, newDataLength, block, 0, blockLength); + if (n >= 0) { + DEBUG_DO(++countSame); + index[i] = n; + } else { + n = getOverlap(newData, newDataLength, block, 0, blockLength); + DEBUG_DO(sumOverlaps += n); +#ifdef UCPTRIE_DEBUG + if (printData) { + printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue); + } +#endif + index[i] = newDataLength - n; + while (n < blockLength) { + newData[newDataLength++] = block[n++]; + } + } + } else /* SAME_AS */ { + uint32_t j = index[i]; + index[i] = index[j]; + } + } + +#ifdef UCPTRIE_DEBUG + /* we saved some space */ + printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", + (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps); +#endif + return newDataLength; +} + +int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, UErrorCode &errorCode) { + int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3); + if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) { + // Only the linear fast index, no multi-stage index tables. + index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; + return fastIndexLength; + } + + // Condense the fast index table. + // Also, does it contain an index-3 block with all dataNullOffset? + uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength + int32_t i3FirstNull = -1; + for (int32_t i = 0, j = 0; i < fastILimit; ++j) { + uint32_t i3 = index[i]; + fastIndex[j] = (uint16_t)i3; + if (i3 == (uint32_t)dataNullOffset) { + if (i3FirstNull < 0) { + i3FirstNull = j; + } else if (index3NullOffset < 0 && + (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) { + index3NullOffset = i3FirstNull; + } + } else { + i3FirstNull = -1; + } + // Set the index entries that compactData() skipped. + // Needed when the multi-stage index covers the fast index range as well. + int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK; + while (++i < iNext) { + i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH; + index[i] = i3; + } + } + + // Examine index-3 blocks. For each determine one of: + // - same as the index-3 null block + // - same as a fast-index block + // - 16-bit indexes + // - 18-bit indexes + // We store this in the first flags entry for the index-3 block. + // + // Also determine an upper limit for the index-3 table length. + int32_t index3Capacity = 0; + i3FirstNull = index3NullOffset; + // If the fast index covers the whole BMP, then + // the multi-stage index is only for supplementary code points. + // Otherwise, the multi-stage index covers all of Unicode. + int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT; + int32_t iLimit = highStart >> UCPTRIE_SHIFT_3; + for (int32_t i = iStart; i < iLimit;) { + int32_t j = i; + int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; + uint32_t oredI3 = 0; + bool isNull = true; + do { + uint32_t i3 = index[j]; + oredI3 |= i3; + if (i3 != (uint32_t)dataNullOffset) { + isNull = false; + } + } while (++j < jLimit); + if (isNull) { + flags[i] = I3_NULL; + if (i3FirstNull < 0) { + if (oredI3 <= 0xffff) { + index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; + } else { + index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; + } + i3FirstNull = 0; + } + } else { + if (oredI3 <= 0xffff) { + int32_t n = findSameBlock(fastIndex, 0, fastIndexLength, + index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); + if (n >= 0) { + flags[i] = I3_BMP; + index[i] = n; + } else { + flags[i] = I3_16; + index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH; + } + } else { + flags[i] = I3_18; + index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH; + } + } + i = j; + } + + int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3; + + // Length of the index-1 table, rounded up. + int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2; + + // Index table: Fast index, index-1, index-3, index-2. + // +1 for possible index table padding. + int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1; + index16 = (uint16_t *)uprv_malloc(index16Capacity * 2); + if (index16 == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uprv_memcpy(index16, fastIndex, fastIndexLength * 2); + + // Compact the index-3 table and write an uncompacted version of the index-2 table. + uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity + int32_t i2Length = 0; + i3FirstNull = index3NullOffset; + int32_t index3Start = fastIndexLength + index1Length; + int32_t indexLength = index3Start; + for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) { + int32_t i3; + uint8_t f = flags[i]; + if (f == I3_NULL && i3FirstNull < 0) { + // First index-3 null block. Write & overlap it like a normal block, then remember it. + f = dataNullOffset <= 0xffff ? I3_16 : I3_18; + i3FirstNull = 0; + } + if (f == I3_NULL) { + i3 = index3NullOffset; + } else if (f == I3_BMP) { + i3 = index[i]; + } else if (f == I3_16) { + int32_t n = findSameBlock(index16, index3Start, indexLength, + index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); + if (n >= 0) { + i3 = n; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, + index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH); + } + i3 = indexLength - n; + while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) { + index16[indexLength++] = index[i + n++]; + } + } + } else { + U_ASSERT(f == I3_18); + // Encode an index-3 block that contains one or more data indexes exceeding 16 bits. + int32_t j = i; + int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH; + int32_t k = indexLength; + do { + ++k; + uint32_t v = index[j++]; + uint32_t upperBits = (v & 0x30000) >> 2; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 4; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 6; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 8; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 10; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 12; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 14; + index16[k++] = v; + v = index[j++]; + upperBits |= (v & 0x30000) >> 16; + index16[k++] = v; + index16[k - 9] = upperBits; + } while (j < jLimit); + int32_t n = findSameBlock(index16, index3Start, indexLength, + index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); + if (n >= 0) { + i3 = n | 0x8000; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, + index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH); + } + i3 = (indexLength - n) | 0x8000; + if (n > 0) { + int32_t start = indexLength; + while (n < INDEX_3_18BIT_BLOCK_LENGTH) { + index16[indexLength++] = index16[start + n++]; + } + } else { + indexLength += INDEX_3_18BIT_BLOCK_LENGTH; + } + } + } + if (index3NullOffset < 0 && i3FirstNull >= 0) { + index3NullOffset = i3; + } + // Set the index-2 table entry. + index2[i2Length++] = i3; + } + U_ASSERT(i2Length == index2Capacity); + U_ASSERT(indexLength <= index3Start + index3Capacity); + + if (index3NullOffset < 0) { + index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET; + } + if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) { + // The index-3 offsets exceed 15 bits, or + // the last one cannot be distinguished from the no-null-block value. + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + // Compact the index-2 table and write the index-1 table. + int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH; + int32_t i1 = fastIndexLength; + for (int32_t i = 0; i < i2Length; i += blockLength) { + if ((i2Length - i) < blockLength) { + // highStart is inside the last index-2 block. Shorten it. + blockLength = i2Length - i; + } + int32_t i2; + int32_t n = findSameBlock(index16, index3Start, indexLength, + index2, i, blockLength); + if (n >= 0) { + i2 = n; + } else { + if (indexLength == index3Start) { + // No overlap at the boundary between the index-1 and index-3/2 tables. + n = 0; + } else { + n = getOverlap(index16, indexLength, index2, i, blockLength); + } + i2 = indexLength - n; + while (n < blockLength) { + index16[indexLength++] = index2[i + n++]; + } + } + // Set the index-1 table entry. + index16[i1++] = i2; + } + U_ASSERT(i1 == index3Start); + U_ASSERT(indexLength <= index16Capacity); + +#ifdef UCPTRIE_DEBUG + /* we saved some space */ + printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n", + (long)iLimit, (long)indexLength); +#endif + + return indexLength; +} + +int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) { + // Find the real highStart and round it up. + U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); + highValue = get(MAX_UNICODE); + int32_t realHighStart = findHighStart(); + realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) & + ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); + if (realHighStart == UNICODE_LIMIT) { + highValue = initialValue; + } + +#ifdef UCPTRIE_DEBUG + printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", + (long)realHighStart, (long)highValue, (long)initialValue); +#endif + + // We always store indexes and data values for the fast range. + // Pin highStart to the top of that range while building. + UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3; + if (realHighStart < fastLimit) { + for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) { + flags[i] = ALL_SAME; + index[i] = highValue; + } + highStart = fastLimit; + } else { + highStart = realHighStart; + } + + uint32_t asciiData[ASCII_LIMIT]; + for (int32_t i = 0; i < ASCII_LIMIT; ++i) { + asciiData[i] = get(i); + } + + // First we look for which data blocks have the same value repeated over the whole block, + // deduplicate such blocks, find a good null data block (for faster enumeration), + // and get an upper bound for the necessary data array length. + AllSameBlocks allSameBlocks; + int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks); + if (newDataCapacity < 0) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4); + if (newData == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + return 0; + } + uprv_memcpy(newData, asciiData, sizeof(asciiData)); + + int32_t dataNullIndex = allSameBlocks.findMostUsed(); + int32_t newDataLength = compactData(fastILimit, newData, dataNullIndex); + U_ASSERT(newDataLength <= newDataCapacity); + uprv_free(data); + data = newData; + dataCapacity = newDataCapacity; + dataLength = newDataLength; + if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) { + // The offset of the last data block is too high to be stored in the index table. + errorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + if (dataNullIndex >= 0) { + dataNullOffset = index[dataNullIndex]; +#ifdef UCPTRIE_DEBUG + if (data[dataNullOffset] != initialValue) { + printf("UCPTrie initialValue %lx -> more common nullValue %lx\n", + (long)initialValue, (long)data[dataNullOffset]); + } +#endif + initialValue = data[dataNullOffset]; + } else { + dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET; + } + + int32_t indexLength = compactIndex(fastILimit, errorCode); + highStart = realHighStart; + return indexLength; +} + +UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return nullptr; + } + if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || + valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) { + errorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + + // The mutable trie always stores 32-bit values. + // When we build a UCPTrie for a smaller value width, we first mask off unused bits + // before compacting the data. + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_32: + break; + case UCPTRIE_VALUE_BITS_16: + maskValues(0xffff); + break; + case UCPTRIE_VALUE_BITS_8: + maskValues(0xff); + break; + default: + break; + } + + UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT; + int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode); + if (U_FAILURE(errorCode)) { + clear(); + return nullptr; + } + + // Ensure data table alignment: The index length must be even for uint32_t data. + if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) { + index16[indexLength++] = 0xffee; // arbitrary value + } + + // Make the total trie structure length a multiple of 4 bytes by padding the data table, + // and store special values as the last two data values. + int32_t length = indexLength * 2; + if (valueWidth == UCPTRIE_VALUE_BITS_16) { + if (((indexLength ^ dataLength) & 1) != 0) { + // padding + data[dataLength++] = errorValue; + } + if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { + data[dataLength++] = highValue; + data[dataLength++] = errorValue; + } + length += dataLength * 2; + } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { + // 32-bit data words never need padding to a multiple of 4 bytes. + if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { + if (data[dataLength - 1] != highValue) { + data[dataLength++] = highValue; + } + data[dataLength++] = errorValue; + } + length += dataLength * 4; + } else { + int32_t and3 = (length + dataLength) & 3; + if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) { + // all set + } else if(and3 == 3 && data[dataLength - 1] == highValue) { + data[dataLength++] = errorValue; + } else { + while (and3 != 2) { + data[dataLength++] = highValue; + and3 = (and3 + 1) & 3; + } + data[dataLength++] = highValue; + data[dataLength++] = errorValue; + } + length += dataLength; + } + + // Calculate the total length of the UCPTrie as a single memory block. + length += sizeof(UCPTrie); + U_ASSERT((length & 3) == 0); + + uint8_t *bytes = (uint8_t *)uprv_malloc(length); + if (bytes == nullptr) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + clear(); + return nullptr; + } + UCPTrie *trie = reinterpret_cast(bytes); + uprv_memset(trie, 0, sizeof(UCPTrie)); + trie->indexLength = indexLength; + trie->dataLength = dataLength; + + trie->highStart = highStart; + // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes. + // Runtime code needs to then test for the real highStart as well. + trie->shifted12HighStart = (highStart + 0xfff) >> 12; + trie->type = type; + trie->valueWidth = valueWidth; + + trie->index3NullOffset = index3NullOffset; + trie->dataNullOffset = dataNullOffset; + trie->nullValue = initialValue; + + bytes += sizeof(UCPTrie); + + // Fill the index and data arrays. + uint16_t *dest16 = (uint16_t *)bytes; + trie->index = dest16; + + if (highStart <= fastLimit) { + // Condense only the fast index from the mutable-trie index. + for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) { + *dest16++ = (uint16_t)index[i]; // dest16[j] + } + } else { + uprv_memcpy(dest16, index16, indexLength * 2); + dest16 += indexLength; + } + bytes += indexLength * 2; + + // Write the data array. + const uint32_t *p = data; + switch (valueWidth) { + case UCPTRIE_VALUE_BITS_16: + // Write 16-bit data values. + trie->data.ptr16 = dest16; + for (int32_t i = dataLength; i > 0; --i) { + *dest16++ = (uint16_t)*p++; + } + break; + case UCPTRIE_VALUE_BITS_32: + // Write 32-bit data values. + trie->data.ptr32 = (uint32_t *)bytes; + uprv_memcpy(bytes, p, (size_t)dataLength * 4); + break; + case UCPTRIE_VALUE_BITS_8: + // Write 8-bit data values. + trie->data.ptr8 = bytes; + for (int32_t i = dataLength; i > 0; --i) { + *bytes++ = (uint8_t)*p++; + } + break; + default: + // Will not occur, valueWidth checked at the beginning. + break; + } + +#ifdef UCPTRIE_DEBUG + trie->name = name; + + ucptrie_printLengths(trie, ""); +#endif + + clear(); + return trie; +} + +} // namespace + +U_NAMESPACE_END + +U_NAMESPACE_USE + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + LocalPointer trie( + new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + return reinterpret_cast(trie.orphan()); +} + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + if (other == nullptr) { + return nullptr; + } + LocalPointer clone( + new MutableCodePointTrie(*reinterpret_cast(other), *pErrorCode), *pErrorCode); + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + return reinterpret_cast(clone.orphan()); +} + +U_CAPI void U_EXPORT2 +umutablecptrie_close(UMutableCPTrie *trie) { + delete reinterpret_cast(trie); +} + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + if (map == nullptr) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + return reinterpret_cast(MutableCodePointTrie::fromUCPMap(map, *pErrorCode)); +} + +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + if (trie == nullptr) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return nullptr; + } + return reinterpret_cast(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode)); +} + +U_CAPI uint32_t U_EXPORT2 +umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) { + return reinterpret_cast(trie)->get(c); +} + +namespace { + +UChar32 getRange(const void *trie, UChar32 start, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + return reinterpret_cast(trie)-> + getRange(start, filter, context, pValue); +} + +} // namespace + +U_CAPI UChar32 U_EXPORT2 +umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { + return ucptrie_internalGetRange(getRange, trie, start, + option, surrogateValue, + filter, context, pValue); +} + +U_CAPI void U_EXPORT2 +umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return; + } + reinterpret_cast(trie)->set(c, value, *pErrorCode); +} + +U_CAPI void U_EXPORT2 +umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end, + uint32_t value, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return; + } + reinterpret_cast(trie)->setRange(start, end, value, *pErrorCode); +} + +/* Compact and internally serialize the trie. */ +U_CAPI UCPTrie * U_EXPORT2 +umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, + UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { + return nullptr; + } + return reinterpret_cast(trie)->build(type, valueWidth, *pErrorCode); +} + +#ifdef UCPTRIE_DEBUG +U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) { + reinterpret_cast(trie)->name = name; +} +#endif diff --git a/deps/icu-small/source/common/umutex.h b/deps/icu-small/source/common/umutex.h index 8f2f6123541f79..37e49871049b93 100644 --- a/deps/icu-small/source/common/umutex.h +++ b/deps/icu-small/source/common/umutex.h @@ -56,6 +56,13 @@ U_NAMESPACE_END U_NAMESPACE_BEGIN +// Export an explicit template instantiation of std::atomic. +// When building DLLs for Windows this is required as it is used as a data member of the exported SharedObject class. +// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples. +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +template struct U_COMMON_API std::atomic; +#endif + typedef std::atomic u_atomic_int32_t; #define ATOMIC_INT32_T_INITIALIZER(val) ATOMIC_VAR_INIT(val) @@ -205,7 +212,7 @@ umtx_atomic_dec(u_atomic_int32_t *p); U_NAMESPACE_END -#endif /* Low Level Atomic Ops Platfrom Chain */ +#endif /* Low Level Atomic Ops Platform Chain */ @@ -319,7 +326,7 @@ U_NAMESPACE_END *************************************************************************************************/ #if defined(U_USER_MUTEX_H) -// #inlcude "U_USER_MUTEX_H" +// #include "U_USER_MUTEX_H" #include U_MUTEX_XSTR(U_USER_MUTEX_H) #elif U_PLATFORM_USES_ONLY_WIN32_API @@ -389,7 +396,7 @@ struct UConditionVar { #else /* - * Unknow platform type. + * Unknown platform type. * This is an error condition. ICU requires mutexes. */ @@ -401,7 +408,7 @@ struct UConditionVar { /************************************************************************************** * - * Mutex Implementation function declaratations. + * Mutex Implementation function declarations. * Declarations are platform neutral. * Implementations, in umutex.cpp, are platform specific. * diff --git a/deps/icu-small/source/common/unames.cpp b/deps/icu-small/source/common/unames.cpp index 13a4572e1c38a1..5f752b0d1725cb 100644 --- a/deps/icu-small/source/common/unames.cpp +++ b/deps/icu-small/source/common/unames.cpp @@ -466,7 +466,7 @@ static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) { buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10); } buffer += ndigits; - length += ndigits; + length += static_cast(ndigits); WRITE_CHAR(buffer, bufferLength, length, '>'); return length; diff --git a/deps/icu-small/source/common/unicode/bytestream.h b/deps/icu-small/source/common/unicode/bytestream.h index 9df23f79c54c0c..61d1e8aca651d7 100644 --- a/deps/icu-small/source/common/unicode/bytestream.h +++ b/deps/icu-small/source/common/unicode/bytestream.h @@ -237,13 +237,12 @@ class StringByteSink : public ByteSink { * @stable ICU 4.2 */ StringByteSink(StringClass* dest) : dest_(dest) { } -#ifndef U_HIDE_DRAFT_API /** * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string. * * @param dest pointer to string object to append to * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d - * @draft ICU 60 + * @stable ICU 60 */ StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) { if (initialAppendCapacity > 0 && @@ -251,7 +250,6 @@ class StringByteSink : public ByteSink { dest->reserve(dest->length() + initialAppendCapacity); } } -#endif // U_HIDE_DRAFT_API /** * Append "bytes[0,n-1]" to this. * @param data the pointer to the bytes diff --git a/deps/icu-small/source/common/unicode/casemap.h b/deps/icu-small/source/common/unicode/casemap.h index 4b77256d742784..477eb484d136b6 100644 --- a/deps/icu-small/source/common/unicode/casemap.h +++ b/deps/icu-small/source/common/unicode/casemap.h @@ -194,7 +194,6 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { char16_t *dest, int32_t destCapacity, Edits *edits, UErrorCode &errorCode); -#ifndef U_HIDE_DRAFT_API /** * Lowercases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. @@ -214,7 +213,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * which must not indicate a failure before the function call. * * @see ucasemap_utf8ToLower - * @draft ICU 60 + * @stable ICU 60 */ static void utf8ToLower( const char *locale, uint32_t options, @@ -240,7 +239,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * which must not indicate a failure before the function call. * * @see ucasemap_utf8ToUpper - * @draft ICU 60 + * @stable ICU 60 */ static void utf8ToUpper( const char *locale, uint32_t options, @@ -280,7 +279,7 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * which must not indicate a failure before the function call. * * @see ucasemap_utf8ToTitle - * @draft ICU 60 + * @stable ICU 60 */ static void utf8ToTitle( const char *locale, uint32_t options, BreakIterator *iter, @@ -311,13 +310,12 @@ class U_COMMON_API CaseMap U_FINAL : public UMemory { * which must not indicate a failure before the function call. * * @see ucasemap_utf8FoldCase - * @draft ICU 60 + * @stable ICU 60 */ static void utf8Fold( uint32_t options, StringPiece src, ByteSink &sink, Edits *edits, UErrorCode &errorCode); -#endif // U_HIDE_DRAFT_API /** * Lowercases a UTF-8 string and optionally records edits. diff --git a/deps/icu-small/source/common/unicode/char16ptr.h b/deps/icu-small/source/common/unicode/char16ptr.h index 49d0e029a93b6e..a7c5f1a0c5ed56 100644 --- a/deps/icu-small/source/common/unicode/char16ptr.h +++ b/deps/icu-small/source/common/unicode/char16ptr.h @@ -28,6 +28,8 @@ U_NAMESPACE_BEGIN // Use the predefined value. #elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT # define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory") +#elif defined(U_IN_DOXYGEN) +# define U_ALIASING_BARRIER(ptr) #endif /** @@ -103,6 +105,7 @@ class U_COMMON_API Char16Ptr U_FINAL { #endif }; +/// \cond #ifdef U_ALIASING_BARRIER Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {} @@ -134,6 +137,7 @@ Char16Ptr::~Char16Ptr() {} char16_t *Char16Ptr::get() const { return u_.cp; } #endif +/// \endcond /** * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. @@ -209,6 +213,7 @@ class U_COMMON_API ConstChar16Ptr U_FINAL { #endif }; +/// \cond #ifdef U_ALIASING_BARRIER ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {} @@ -240,6 +245,7 @@ ConstChar16Ptr::~ConstChar16Ptr() {} const char16_t *ConstChar16Ptr::get() const { return u_.cp; } #endif +/// \endcond /** * Converts from const char16_t * to const UChar *. diff --git a/deps/icu-small/source/common/unicode/docmain.h b/deps/icu-small/source/common/unicode/docmain.h index 91e5ae3fa2e2e6..243fa17b87917a 100644 --- a/deps/icu-small/source/common/unicode/docmain.h +++ b/deps/icu-small/source/common/unicode/docmain.h @@ -88,6 +88,11 @@ * icu::UnicodeSet * * + * Maps from Unicode Code Points to Integer Values + * ucptrie.h, umutablecptrie.h + * C API + * + * * Maps from Strings to Integer Values * (no C API) * icu::BytesTrie, icu::UCharsTrie @@ -208,9 +213,9 @@ * C API * * - * Layout Engine/Complex Text Layout - * loengine.h - * icu::LayoutEngine,icu::ParagraphLayout + * Paragraph Layout / Complex Text Layout + * playout.h + * icu::ParagraphLayout * * * ICU I/O diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h index f767a8d3b494c3..79e98b0cc27ac6 100644 --- a/deps/icu-small/source/common/unicode/edits.h +++ b/deps/icu-small/source/common/unicode/edits.h @@ -24,8 +24,8 @@ class UnicodeString; * in linear progression. Does not support moving/reordering of text. * * There are two types of edits: change edits and no-change edits. Add edits to - * instances of this class using {@link #addReplace(int, int)} (for change edits) and - * {@link #addUnchanged(int)} (for no-change edits). Change edits are retained with full granularity, + * instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and + * {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity, * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one * mapping between code points in the source and destination strings. * @@ -62,11 +62,11 @@ class UnicodeString; * * * The "fine changes" and "coarse changes" iterators will step through only the change edits when their - * {@link Edits::Iterator#next()} methods are called. They are identical to the non-change iterators when - * their {@link Edits::Iterator#findSourceIndex(int)} or {@link Edits::Iterator#findDestinationIndex(int)} + * `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when + * their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()` * methods are used to walk through the string. * - * For examples of how to use this class, see the test TestCaseMapEditsIteratorDocs in + * For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in * UCharacterCaseTest.java. * * An Edits object tracks a separate UErrorCode, but ICU string transformation functions @@ -86,7 +86,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { /** * Copy constructor. * @param other source edits - * @draft ICU 60 + * @stable ICU 60 */ Edits(const Edits &other) : array(stackArray), capacity(STACK_CAPACITY), length(other.length), @@ -98,7 +98,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * Move constructor, might leave src empty. * This object will have the same contents that the source object had. * @param src source edits - * @draft ICU 60 + * @stable ICU 60 */ Edits(Edits &&src) U_NOEXCEPT : array(stackArray), capacity(STACK_CAPACITY), length(src.length), @@ -117,7 +117,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * Assignment operator. * @param other source edits * @return *this - * @draft ICU 60 + * @stable ICU 60 */ Edits &operator=(const Edits &other); @@ -127,7 +127,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * The behavior is undefined if *this and src are the same object. * @param src source edits * @return *this - * @draft ICU 60 + * @stable ICU 60 */ Edits &operator=(Edits &&src) U_NOEXCEPT; @@ -173,13 +173,11 @@ class U_COMMON_API Edits U_FINAL : public UMemory { */ UBool hasChanges() const { return numChanges != 0; } -#ifndef U_HIDE_DRAFT_API /** * @return the number of change edits - * @draft ICU 60 + * @stable ICU 60 */ int32_t numberOfChanges() const { return numChanges; } -#endif // U_HIDE_DRAFT_API /** * Access to the list of edits. @@ -189,9 +187,9 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars. * - * The iterator can be moved between edits using the {@link #next()}, {@link #findSourceIndex(int)}, - * and {@link #findDestinationIndex(int)} methods. Calling any of these methods mutates the iterator - * to make it point to the corresponding edit. + * The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`, + * and `findDestinationIndex(int32_t, UErrorCode &)` methods. + * Calling any of these methods mutates the iterator to make it point to the corresponding edit. * * For more information, see the documentation for {@link Edits}. * @@ -202,7 +200,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { struct U_COMMON_API Iterator U_FINAL : public UMemory { /** * Default constructor, empty iterator. - * @draft ICU 60 + * @stable ICU 60 */ Iterator() : array(nullptr), index(0), length(0), @@ -253,7 +251,6 @@ class U_COMMON_API Edits U_FINAL : public UMemory { return findIndex(i, TRUE, errorCode) == 0; } -#ifndef U_HIDE_DRAFT_API /** * Moves the iterator to the edit that contains the destination index. * The destination index may be found in a no-change edit @@ -271,7 +268,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return TRUE if the edit for the destination index was found - * @draft ICU 60 + * @stable ICU 60 */ UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) { return findIndex(i, FALSE, errorCode) == 0; @@ -297,7 +294,7 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return destination index; undefined if i is not 0..string length - * @draft ICU 60 + * @stable ICU 60 */ int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode); @@ -321,10 +318,9 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return source index; undefined if i is not 0..string length - * @draft ICU 60 + * @stable ICU 60 */ int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode); -#endif // U_HIDE_DRAFT_API /** * Returns whether the edit currently represented by the iterator is a change edit. @@ -366,13 +362,13 @@ class U_COMMON_API Edits U_FINAL : public UMemory { /** * The start index of the current span in the replacement string; the span has length * {@link #newLength}. Well-defined only if the current edit is a change edit. - *

- * The replacement string is the concatenation of all substrings of the destination + * + * The *replacement string* is the concatenation of all substrings of the destination * string corresponding to change edits. - *

+ * * This method is intended to be used together with operations that write only replacement - * characters (e.g., {@link CaseMap#omitUnchangedText()}). The source string can then be modified - * in-place. + * characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option). + * The source string can then be modified in-place. * * @return the current index into the replacement-characters-only string, * not counting unchanged spans @@ -475,7 +471,6 @@ class U_COMMON_API Edits U_FINAL : public UMemory { return Iterator(array, length, FALSE, FALSE); } -#ifndef U_HIDE_DRAFT_API /** * Merges the two input Edits and appends the result to this object. * @@ -501,10 +496,9 @@ class U_COMMON_API Edits U_FINAL : public UMemory { * or else the function returns immediately. Check for U_FAILURE() * on output or use with function chaining. (See User Guide for details.) * @return *this, with the merged edits appended - * @draft ICU 60 + * @stable ICU 60 */ Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode); -#endif // U_HIDE_DRAFT_API private: void releaseArray() U_NOEXCEPT; diff --git a/deps/icu-small/source/common/unicode/enumset.h b/deps/icu-small/source/common/unicode/enumset.h index 82b2074ec35875..82d633ed016b5e 100644 --- a/deps/icu-small/source/common/unicode/enumset.h +++ b/deps/icu-small/source/common/unicode/enumset.h @@ -28,6 +28,7 @@ U_NAMESPACE_BEGIN * enum bitset for boolean fields. Similar to Java EnumSet<>. * Needs to range check. Used for private instance variables. * @internal + * \cond */ template class EnumSet { @@ -60,6 +61,8 @@ class EnumSet { uint32_t fBools; }; +/** \endcond */ + U_NAMESPACE_END #endif /* U_SHOW_CPLUSPLUS_API */ diff --git a/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/icu-small/source/common/unicode/filteredbrk.h index 751d1faf40454f..2444114e9a14bf 100644 --- a/deps/icu-small/source/common/unicode/filteredbrk.h +++ b/deps/icu-small/source/common/unicode/filteredbrk.h @@ -67,16 +67,14 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); #endif /* U_HIDE_DEPRECATED_API */ -#ifndef U_HIDE_DRAFT_API /** * Construct an empty FilteredBreakIteratorBuilder. * In this state, it will not suppress any segment boundaries. * @param status The error code. * @return the new builder - * @draft ICU 60 + * @stable ICU 60 */ static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status); -#endif /* U_HIDE_DRAFT_API */ /** * Suppress a certain string from being the end of a segment. @@ -95,7 +93,7 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * This function does not create any new segment boundaries, but only serves to un-do * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of * locale data which may be suppressing certain strings. - * @param exception the exception to remove + * @param string the exception to remove * @param status error code * @return returns TRUE if the string was present and now removed, * FALSE if the call was a no-op because the string was not being suppressed. @@ -114,7 +112,6 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { */ virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; -#ifndef U_HIDE_DRAFT_API /** * Wrap (adopt) an existing break iterator in a new filtered instance. * The resulting BreakIterator is owned by the caller. @@ -126,12 +123,11 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * @param adoptBreakIterator the break iterator to adopt * @param status error code * @return the new BreakIterator, owned by the caller. - * @draft ICU 60 + * @stable ICU 60 */ inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) { return build(adoptBreakIterator, status); } -#endif /* U_HIDE_DRAFT_API */ protected: /** diff --git a/deps/icu-small/source/common/unicode/icuplug.h b/deps/icu-small/source/common/unicode/icuplug.h index 2a11b96be69c53..827cbe94b61cf8 100644 --- a/deps/icu-small/source/common/unicode/icuplug.h +++ b/deps/icu-small/source/common/unicode/icuplug.h @@ -110,7 +110,7 @@ #include "unicode/utypes.h" -#if UCONFIG_ENABLE_PLUGINS +#if UCONFIG_ENABLE_PLUGINS || defined(U_IN_DOXYGEN) diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h index c84774e07fd395..415bced82299e3 100644 --- a/deps/icu-small/source/common/unicode/locid.h +++ b/deps/icu-small/source/common/unicode/locid.h @@ -31,6 +31,10 @@ #ifndef LOCID_H #define LOCID_H +#include "unicode/bytestream.h" +#include "unicode/localpointer.h" +#include "unicode/strenum.h" +#include "unicode/stringpiece.h" #include "unicode/utypes.h" #include "unicode/uobject.h" #include "unicode/putil.h" @@ -280,6 +284,16 @@ class U_COMMON_API Locale : public UObject { */ Locale(const Locale& other); +#ifndef U_HIDE_DRAFT_API + /** + * Move constructor; might leave source in bogus state. + * This locale will have the same contents that the source locale had. + * + * @param other The Locale object being moved in. + * @draft ICU 63 + */ + Locale(Locale&& other) U_NOEXCEPT; +#endif // U_HIDE_DRAFT_API /** * Destructor @@ -296,6 +310,19 @@ class U_COMMON_API Locale : public UObject { */ Locale& operator=(const Locale& other); +#ifndef U_HIDE_DRAFT_API + /** + * Move assignment operator; might leave source in bogus state. + * This locale will have the same contents that the source locale had. + * The behavior is undefined if *this and the source are the same object. + * + * @param other The Locale object being moved in. + * @return *this + * @draft ICU 63 + */ + Locale& operator=(Locale&& other) U_NOEXCEPT; +#endif // U_HIDE_DRAFT_API + /** * Checks if two locale keys are the same. * @@ -362,6 +389,55 @@ class U_COMMON_API Locale : public UObject { UErrorCode& success); #endif /* U_HIDE_SYSTEM_API */ +#ifndef U_HIDE_DRAFT_API + /** + * Returns a Locale for the specified BCP47 language tag string. + * If the specified language tag contains any ill-formed subtags, + * the first such subtag and all following subtags are ignored. + *

+ * This implements the 'Language-Tag' production of BCP47, and so + * supports grandfathered (regular and irregular) as well as private + * use language tags. Private use tags are represented as 'x-whatever', + * and grandfathered tags are converted to their canonical replacements + * where they exist. Note that a few grandfathered tags have no modern + * replacement, these will be converted using the fallback described in + * the first paragraph, so some information might be lost. + * @param tag the input BCP47 language tag. + * @param status error information if creating the Locale failed. + * @return the Locale for the specified BCP47 language tag. + * @draft ICU 63 + */ + static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status); + + /** + * Returns a well-formed language tag for this Locale. + *

+ * Note: Any locale fields which do not satisfy the BCP47 syntax + * requirement will be silently omitted from the result. + * + * If this function fails, partial output may have been written to the sink. + * + * @param sink the output sink receiving the BCP47 language + * tag for this Locale. + * @param status error information if creating the language tag failed. + * @draft ICU 63 + */ + void toLanguageTag(ByteSink& sink, UErrorCode& status) const; + + /** + * Returns a well-formed language tag for this Locale. + *

+ * Note: Any locale fields which do not satisfy the BCP47 syntax + * requirement will be silently omitted from the result. + * + * @param status error information if creating the language tag failed. + * @return the BCP47 language tag for this Locale. + * @draft ICU 63 + */ + template + inline StringClass toLanguageTag(UErrorCode& status) const; +#endif // U_HIDE_DRAFT_API + /** * Creates a locale which has had minimal canonicalization * as per uloc_getName(). @@ -432,6 +508,69 @@ class U_COMMON_API Locale : public UObject { */ const char * getBaseName() const; +#ifndef U_HIDE_DRAFT_API + /** + * Add the likely subtags for this Locale, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If this Locale is already in the maximal form, or not valid, or there is + * no data available for maximization, the Locale will be unchanged. + * + * For example, "und-Zzzz" cannot be maximized, since there is no + * reasonable maximization. + * + * Examples: + * + * "en" maximizes to "en_Latn_US" + * + * "de" maximizes to "de_Latn_US" + * + * "sr" maximizes to "sr_Cyrl_RS" + * + * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) + * + * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) + * + * @param status error information if maximizing this Locale failed. + * If this Locale is not well-formed, the error code is + * U_ILLEGAL_ARGUMENT_ERROR. + * @draft ICU 63 + */ + void addLikelySubtags(UErrorCode& status); + + /** + * Minimize the subtags for this Locale, per the algorithm described + * in the following CLDR technical report: + * + * http://www.unicode.org/reports/tr35/#Likely_Subtags + * + * If this Locale is already in the minimal form, or not valid, or there is + * no data available for minimization, the Locale will be unchanged. + * + * Since the minimization algorithm relies on proper maximization, see the + * comments for addLikelySubtags for reasons why there might not be any + * data. + * + * Examples: + * + * "en_Latn_US" minimizes to "en" + * + * "de_Latn_US" minimizes to "de" + * + * "sr_Cyrl_RS" minimizes to "sr" + * + * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the + * script, and minimizing to "zh" would imply "zh_Hans_CN".) + * + * @param status error information if maximizing this Locale failed. + * If this Locale is not well-formed, the error code is + * U_ILLEGAL_ARGUMENT_ERROR. + * @draft ICU 63 + */ + void minimizeSubtags(UErrorCode& status); +#endif // U_HIDE_DRAFT_API /** * Gets the list of keywords for the specified locale. @@ -439,13 +578,62 @@ class U_COMMON_API Locale : public UObject { * @param status the status code * @return pointer to StringEnumeration class, or NULL if there are no keywords. * Client must dispose of it by calling delete. + * @see getKeywords * @stable ICU 2.8 */ StringEnumeration * createKeywords(UErrorCode &status) const; +#ifndef U_HIDE_DRAFT_API + + /** + * Gets the list of Unicode keywords for the specified locale. + * + * @param status the status code + * @return pointer to StringEnumeration class, or NULL if there are no keywords. + * Client must dispose of it by calling delete. + * @see getUnicodeKeywords + * @draft ICU 63 + */ + StringEnumeration * createUnicodeKeywords(UErrorCode &status) const; + + /** + * Gets the set of keywords for this Locale. + * + * A wrapper to call createKeywords() and write the resulting + * keywords as standard strings (or compatible objects) into any kind of + * container that can be written to by an STL style output iterator. + * + * @param iterator an STL style output iterator to write the keywords to. + * @param status error information if creating set of keywords failed. + * @draft ICU 63 + */ + template + inline void getKeywords(OutputIterator iterator, UErrorCode& status) const; + + /** + * Gets the set of Unicode keywords for this Locale. + * + * A wrapper to call createUnicodeKeywords() and write the resulting + * keywords as standard strings (or compatible objects) into any kind of + * container that can be written to by an STL style output iterator. + * + * @param iterator an STL style output iterator to write the keywords to. + * @param status error information if creating set of keywords failed. + * @draft ICU 63 + */ + template + inline void getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const; + +#endif // U_HIDE_DRAFT_API + /** * Gets the value for a keyword. * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * * @param keywordName name of the keyword for which we want the value. Case insensitive. * @param buffer The buffer to receive the keyword value. * @param bufferCapacity The capacity of receiving buffer @@ -456,12 +644,81 @@ class U_COMMON_API Locale : public UObject { */ int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const; +#ifndef U_HIDE_DRAFT_API + /** + * Gets the value for a keyword. + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword for which we want the value. + * @param sink the sink to receive the keyword value. + * @param status error information if getting the value failed. + * @draft ICU 63 + */ + void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const; + + /** + * Gets the value for a keyword. + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword for which we want the value. + * @param status error information if getting the value failed. + * @return the keyword value. + * @draft ICU 63 + */ + template + inline StringClass getKeywordValue(StringPiece keywordName, UErrorCode& status) const; + + /** + * Gets the Unicode value for a Unicode keyword. + * + * This uses Unicode key-value pairs, like "co-phonebk". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword for which we want the value. + * @param sink the sink to receive the keyword value. + * @param status error information if getting the value failed. + * @draft ICU 63 + */ + void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const; + + /** + * Gets the Unicode value for a Unicode keyword. + * + * This uses Unicode key-value pairs, like "co-phonebk". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword for which we want the value. + * @param status error information if getting the value failed. + * @return the keyword value. + * @draft ICU 63 + */ + template + inline StringClass getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const; +#endif // U_HIDE_DRAFT_API + /** * Sets or removes the value for a keyword. * * For removing all keywords, use getBaseName(), * and construct a new Locale if it differs from getName(). * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * * @param keywordName name of the keyword to be set. Case insensitive. * @param keywordValue value of the keyword to be set. If 0-length or * NULL, will result in the keyword being removed. No error is given if @@ -472,6 +729,48 @@ class U_COMMON_API Locale : public UObject { */ void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status); +#ifndef U_HIDE_DRAFT_API + /** + * Sets or removes the value for a keyword. + * + * For removing all keywords, use getBaseName(), + * and construct a new Locale if it differs from getName(). + * + * This uses legacy keyword=value pairs, like "collation=phonebook". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword to be set. + * @param keywordValue value of the keyword to be set. If 0-length or + * NULL, will result in the keyword being removed. No error is given if + * that keyword does not exist. + * @param status Returns any error information while performing this operation. + * @draft ICU 63 + */ + void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status); + + /** + * Sets or removes the Unicode value for a Unicode keyword. + * + * For removing all keywords, use getBaseName(), + * and construct a new Locale if it differs from getName(). + * + * This uses Unicode key-value pairs, like "co-phonebk". + * + * ICU4C doesn't do automatic conversion between legacy and Unicode + * keywords and values in getters and setters (as opposed to ICU4J). + * + * @param keywordName name of the keyword to be set. + * @param keywordValue value of the keyword to be set. If 0-length or + * NULL, will result in the keyword being removed. No error is given if + * that keyword does not exist. + * @param status Returns any error information while performing this operation. + * @draft ICU 63 + */ + void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status); +#endif // U_HIDE_DRAFT_API + /** * returns the locale's three-letter language code, as specified * in ISO draft standard ISO-639-2. @@ -759,12 +1058,12 @@ class U_COMMON_API Locale : public UObject { /** * A friend to allow the default locale to be set by either the C or C++ API. - * @internal + * @internal (private) */ friend Locale *locale_set_default_internal(const char *, UErrorCode& status); /** - * @internal + * @internal (private) */ friend void U_CALLCONV locale_available_init(); }; @@ -775,6 +1074,17 @@ Locale::operator!=(const Locale& other) const return !operator==(other); } +#ifndef U_HIDE_DRAFT_API +template inline StringClass +Locale::toLanguageTag(UErrorCode& status) const +{ + StringClass result; + StringByteSink sink(&result); + toLanguageTag(sink, status); + return result; +} +#endif // U_HIDE_DRAFT_API + inline const char * Locale::getCountry() const { @@ -805,6 +1115,62 @@ Locale::getName() const return fullName; } +#ifndef U_HIDE_DRAFT_API + +template inline void +Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const +{ + LocalPointer keys(createKeywords(status)); + if (U_FAILURE(status)) { + return; + } + for (;;) { + int32_t resultLength; + const char* buffer = keys->next(&resultLength, status); + if (U_FAILURE(status) || buffer == nullptr) { + return; + } + *iterator++ = StringClass(buffer, resultLength); + } +} + +template inline void +Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const +{ + LocalPointer keys(createUnicodeKeywords(status)); + if (U_FAILURE(status)) { + return; + } + for (;;) { + int32_t resultLength; + const char* buffer = keys->next(&resultLength, status); + if (U_FAILURE(status) || buffer == nullptr) { + return; + } + *iterator++ = StringClass(buffer, resultLength); + } +} + +template inline StringClass +Locale::getKeywordValue(StringPiece keywordName, UErrorCode& status) const +{ + StringClass result; + StringByteSink sink(&result); + getKeywordValue(keywordName, sink, status); + return result; +} + +template inline StringClass +Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const +{ + StringClass result; + StringByteSink sink(&result); + getUnicodeKeywordValue(keywordName, sink, status); + return result; +} + +#endif // U_HIDE_DRAFT_API + inline UBool Locale::isBogus(void) const { return fIsBogus; diff --git a/deps/icu-small/source/common/unicode/messagepattern.h b/deps/icu-small/source/common/unicode/messagepattern.h index f28adafee0d2ac..9f2a86551ca417 100644 --- a/deps/icu-small/source/common/unicode/messagepattern.h +++ b/deps/icu-small/source/common/unicode/messagepattern.h @@ -771,8 +771,8 @@ class U_COMMON_API MessagePattern : public UObject { * @stable ICU 4.8 */ UMessagePatternArgType getArgType() const { - UMessagePatternPartType type=getType(); - if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { + UMessagePatternPartType msgType=getType(); + if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) { return (UMessagePatternArgType)value; } else { return UMSGPAT_ARG_TYPE_NONE; diff --git a/deps/icu-small/source/common/unicode/normalizer2.h b/deps/icu-small/source/common/unicode/normalizer2.h index 8a6d7138021b56..4caa0e31034b2d 100644 --- a/deps/icu-small/source/common/unicode/normalizer2.h +++ b/deps/icu-small/source/common/unicode/normalizer2.h @@ -241,7 +241,7 @@ class U_COMMON_API Normalizer2 : public UObject { * pass the U_SUCCESS() test, or else the function returns * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) - * @draft ICU 60 + * @stable ICU 60 */ virtual void normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, @@ -391,7 +391,7 @@ class U_COMMON_API Normalizer2 : public UObject { * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) * @return TRUE if s is normalized - * @draft ICU 60 + * @stable ICU 60 */ virtual UBool isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const; @@ -559,7 +559,7 @@ class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { * pass the U_SUCCESS() test, or else the function returns * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) - * @draft ICU 60 + * @stable ICU 60 */ virtual void normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, @@ -686,7 +686,7 @@ class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { * immediately. Check for U_FAILURE() on output or use with * function chaining. (See User Guide for details.) * @return TRUE if s is normalized - * @draft ICU 60 + * @stable ICU 60 */ virtual UBool isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE; diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h index d9636580c309b5..ee0d8b7a000303 100644 --- a/deps/icu-small/source/common/unicode/platform.h +++ b/deps/icu-small/source/common/unicode/platform.h @@ -38,7 +38,7 @@ * and/or from other macros that are predefined by the compiler * or defined in standard (POSIX or platform or compiler) headers. * - * As a temporary workaround, you can add an explicit #define for some macros + * As a temporary workaround, you can add an explicit \#define for some macros * before it is first tested, or add an equivalent -D macro definition * to the compiler's command line. * @@ -207,6 +207,9 @@ # define CYGWINMSVC #endif */ +#ifdef U_IN_DOXYGEN +# define CYGWINMSVC +#endif /** * \def U_PLATFORM_USES_ONLY_WIN32_API @@ -417,6 +420,9 @@ #ifndef __has_cpp_attribute # define __has_cpp_attribute(x) 0 #endif +#ifndef __has_declspec_attribute +# define __has_declspec_attribute(x) 0 +#endif #ifndef __has_builtin # define __has_builtin(x) 0 #endif @@ -493,13 +499,8 @@ namespace std { */ #ifdef U_NOEXCEPT /* Use the predefined value. */ -#elif defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS /* Visual Studio */ -# define U_NOEXCEPT -#elif U_CPLUSPLUS_VERSION >= 11 || __has_feature(cxx_noexcept) || __has_extension(cxx_noexcept) \ - || (defined(_MSC_VER) && _MSC_VER >= 1900) /* Visual Studio 2015 */ -# define U_NOEXCEPT noexcept #else -# define U_NOEXCEPT +# define U_NOEXCEPT noexcept #endif /** @@ -519,6 +520,8 @@ namespace std { (__has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough")) # define U_FALLTHROUGH [[clang::fallthrough]] # endif +#elif defined(__GNUC__) && (__GNUC__ >= 7) +# define U_FALLTHROUGH __attribute__((fallthrough)) #endif #ifndef U_FALLTHROUGH @@ -763,7 +766,8 @@ namespace std { #elif U_HAVE_CHAR16_T \ || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \ || (defined(__HP_aCC) && __HP_aCC >= 035000) \ - || (defined(__HP_cc) && __HP_cc >= 111106) + || (defined(__HP_cc) && __HP_cc >= 111106) \ + || (defined(U_IN_DOXYGEN)) # define U_DECLARE_UTF16(string) u ## string #elif U_SIZEOF_WCHAR_T == 2 \ && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__))) @@ -782,6 +786,8 @@ namespace std { /* Use the predefined value. */ #elif defined(U_STATIC_IMPLEMENTATION) # define U_EXPORT +#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport)) +# define U_EXPORT __declspec(dllexport) #elif defined(__GNUC__) # define U_EXPORT __attribute__((visibility("default"))) #elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \ @@ -789,8 +795,6 @@ namespace std { # define U_EXPORT __global /*#elif defined(__HP_aCC) || defined(__HP_cc) # define U_EXPORT __declspec(dllexport)*/ -#elif defined(_MSC_VER) -# define U_EXPORT __declspec(dllexport) #else # define U_EXPORT #endif @@ -806,7 +810,7 @@ namespace std { #ifdef U_IMPORT /* Use the predefined value. */ -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) || (__has_declspec_attribute(dllexport) && __has_declspec_attribute(dllimport)) /* Windows needs to export/import data. */ # define U_IMPORT __declspec(dllimport) #else diff --git a/deps/icu-small/source/common/unicode/ptypes.h b/deps/icu-small/source/common/unicode/ptypes.h index 6eaf2dbf035725..70324ffee3b9c4 100644 --- a/deps/icu-small/source/common/unicode/ptypes.h +++ b/deps/icu-small/source/common/unicode/ptypes.h @@ -83,6 +83,7 @@ typedef unsigned char uint8_t; #else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */ +/// \cond #if ! U_HAVE_INT8_T typedef signed char int8_t; #endif @@ -122,6 +123,7 @@ typedef unsigned int uint32_t; typedef unsigned long long uint64_t; #endif #endif +/// \endcond #endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */ diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h index e9b82cd520736b..47abd554eaf0f1 100644 --- a/deps/icu-small/source/common/unicode/rbbi.h +++ b/deps/icu-small/source/common/unicode/rbbi.h @@ -99,7 +99,7 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * If present, UStack of LanguageBreakEngine objects that might handle * dictionary characters. Searched from top to bottom to find an object to * handle a given character. - * @internal + * @internal (private) */ UStack *fLanguageBreakEngines; @@ -108,14 +108,14 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * If present, the special LanguageBreakEngine used for handling * characters that are in the dictionary set, but not handled by any * LangugageBreakEngine. - * @internal + * @internal (private) */ UnhandledEngine *fUnhandledBreakEngine; /** * Counter for the number of characters encountered with the "dictionary" * flag set. - * @internal + * @internal (private) */ uint32_t fDictionaryCharCount; @@ -150,7 +150,7 @@ class U_COMMON_API RuleBasedBreakIterator /*U_FINAL*/ : public BreakIterator { * * The break iterator adopts the memory, and will * free it when done. - * @internal + * @internal (private) */ RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); diff --git a/deps/icu-small/source/common/unicode/stringoptions.h b/deps/icu-small/source/common/unicode/stringoptions.h index f2de96e9634a02..7b9f70944f62db 100644 --- a/deps/icu-small/source/common/unicode/stringoptions.h +++ b/deps/icu-small/source/common/unicode/stringoptions.h @@ -39,8 +39,6 @@ */ #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 -#ifndef U_HIDE_DRAFT_API - /** * Titlecase the string as a whole rather than each word. * (Titlecase only the character at index 0, possibly adjusted.) @@ -50,7 +48,7 @@ * including both an options bit and an explicit BreakIterator. * * @see U_TITLECASE_ADJUST_TO_CASED - * @draft ICU 60 + * @stable ICU 60 */ #define U_TITLECASE_WHOLE_STRING 0x20 @@ -63,12 +61,10 @@ * including both an options bit and an explicit BreakIterator. * * @see U_TITLECASE_ADJUST_TO_CASED - * @draft ICU 60 + * @stable ICU 60 */ #define U_TITLECASE_SENTENCES 0x40 -#endif // U_HIDE_DRAFT_API - /** * Do not lowercase non-initial parts of words when titlecasing. * Option bit for titlecasing APIs that take an options bit set. @@ -112,8 +108,6 @@ */ #define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 -#ifndef U_HIDE_DRAFT_API - /** * Adjust each titlecasing BreakIterator index to the next cased character. * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) @@ -130,7 +124,7 @@ * It is an error to specify multiple titlecasing adjustment options together. * * @see U_TITLECASE_NO_BREAK_ADJUSTMENT - * @draft ICU 60 + * @stable ICU 60 */ #define U_TITLECASE_ADJUST_TO_CASED 0x400 @@ -141,7 +135,7 @@ * @see CaseMap * @see Edits * @see Normalizer2 - * @draft ICU 60 + * @stable ICU 60 */ #define U_EDITS_NO_RESET 0x2000 @@ -153,12 +147,10 @@ * @see CaseMap * @see Edits * @see Normalizer2 - * @draft ICU 60 + * @stable ICU 60 */ #define U_OMIT_UNCHANGED_TEXT 0x4000 -#endif // U_HIDE_DRAFT_API - /** * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: * Compare strings in code point order instead of code unit order. diff --git a/deps/icu-small/source/common/unicode/stringtriebuilder.h b/deps/icu-small/source/common/unicode/stringtriebuilder.h index 8d2b229413cd6d..c27fbd67960d70 100644 --- a/deps/icu-small/source/common/unicode/stringtriebuilder.h +++ b/deps/icu-small/source/common/unicode/stringtriebuilder.h @@ -26,8 +26,10 @@ */ // Forward declaration. +/// \cond struct UHashtable; typedef struct UHashtable UHashtable; +/// \endcond /** * Build options for BytesTrieBuilder and CharsTrieBuilder. @@ -64,7 +66,7 @@ class U_COMMON_API StringTrieBuilder : public UObject { public: #ifndef U_HIDE_INTERNAL_API /** @internal */ - static UBool hashNode(const void *node); + static int32_t hashNode(const void *node); /** @internal */ static UBool equalNodes(const void *left, const void *right); #endif /* U_HIDE_INTERNAL_API */ @@ -188,7 +190,10 @@ class U_COMMON_API StringTrieBuilder : public UObject { // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API, // it is needed for layout of other objects. - /** @internal */ + /** + * @internal + * \cond + */ class Node : public UObject { public: Node(int32_t initialHash) : hash(initialHash), offset(0) {} @@ -391,7 +396,9 @@ class U_COMMON_API StringTrieBuilder : public UObject { int32_t length; Node *next; // A branch sub-node. }; + #endif /* U_HIDE_INTERNAL_API */ + /// \endcond /** @internal */ virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, diff --git a/deps/icu-small/source/common/unicode/ubidi.h b/deps/icu-small/source/common/unicode/ubidi.h index 254a5bf9ef469f..f4875c8801efd6 100644 --- a/deps/icu-small/source/common/unicode/ubidi.h +++ b/deps/icu-small/source/common/unicode/ubidi.h @@ -323,6 +323,10 @@ * these special values are designed that way. Also, the implementation * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. * + * Note: The numeric values of the related constants will not change: + * They are tied to the use of 7-bit byte values (plus the override bit) + * and of the UBiDiLevel=uint8_t data type in this API. + * * @see UBIDI_DEFAULT_LTR * @see UBIDI_DEFAULT_RTL * @see UBIDI_LEVEL_OVERRIDE @@ -386,6 +390,8 @@ typedef uint8_t UBiDiLevel; /** * Maximum explicit embedding level. + * Same as the max_depth value in the + * Unicode Bidirectional Algorithm. * (The maximum resolved level can be up to UBIDI_MAX_EXPLICIT_LEVEL+1). * @stable ICU 2.0 */ @@ -1996,7 +2002,7 @@ U_CDECL_BEGIN * * @return The directional property / Bidi class for the given code point * c if the default class has been overridden, or - * #U_BIDI_CLASS_DEFAULT=u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 + * u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 * if the standard Bidi class value for c is to be used. * @see ubidi_setClassCallback * @see ubidi_getClassCallback @@ -2010,7 +2016,7 @@ U_CDECL_END /** * Retrieve the Bidi class for a given code point. *

If a #UBiDiClassCallback callback is defined and returns a - * value other than #U_BIDI_CLASS_DEFAULT=u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1, + * value other than u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1, * that value is used; otherwise the default class determination mechanism is invoked.

* * @param pBiDi is the paragraph UBiDi object. diff --git a/deps/icu-small/source/common/unicode/ubiditransform.h b/deps/icu-small/source/common/unicode/ubiditransform.h index 627b005ed45f8d..5c08ed5df0fd90 100644 --- a/deps/icu-small/source/common/unicode/ubiditransform.h +++ b/deps/icu-small/source/common/unicode/ubiditransform.h @@ -26,33 +26,38 @@ /** * \file * \brief Bidi Transformations + */ + +/** + * `UBiDiOrder` indicates the order of text. * - * UBiDiOrder indicates the order of text.

* This bidi transformation engine supports all possible combinations (4 in * total) of input and output text order: - *

    - *
  • : unless the output direction is RTL, this - * corresponds to a normal operation of the Bidi algorithm as described in the - * Unicode Technical Report and implemented by UBiDi when the - * reordering mode is set to UBIDI_REORDER_DEFAULT. Visual RTL - * mode is not supported by UBiDi and is accomplished through - * reversing a visual LTR string,
  • - *
  • : unless the input direction is RTL, this - * corresponds to an "inverse bidi algorithm" in UBiDi with the - * reordering mode set to UBIDI_REORDER_INVERSE_LIKE_DIRECT. - * Visual RTL mode is not not supported by UBiDi and is - * accomplished through reversing a visual LTR string,
  • - *
  • : if the input and output base directions - * mismatch, this corresponds to the UBiDi implementation with the - * reordering mode set to UBIDI_REORDER_RUNS_ONLY; and if the - * input and output base directions are identical, the transformation engine - * will only handle character mirroring and Arabic shaping operations without - * reordering,
  • - *
  • : this reordering mode is not supported by - * the UBiDi engine; it implies character mirroring, Arabic - * shaping, and - if the input/output base directions mismatch - string - * reverse operations.
  • - *
+ * + * - : unless the output direction is RTL, this + * corresponds to a normal operation of the Bidi algorithm as described in the + * Unicode Technical Report and implemented by `UBiDi` when the + * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL + * mode is not supported by `UBiDi` and is accomplished through + * reversing a visual LTR string, + * + * - : unless the input direction is RTL, this + * corresponds to an "inverse bidi algorithm" in `UBiDi` with the + * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`. + * Visual RTL mode is not not supported by `UBiDi` and is + * accomplished through reversing a visual LTR string, + * + * - : if the input and output base directions + * mismatch, this corresponds to the `UBiDi` implementation with the + * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the + * input and output base directions are identical, the transformation engine + * will only handle character mirroring and Arabic shaping operations without + * reordering, + * + * - : this reordering mode is not supported by + * the `UBiDi` engine; it implies character mirroring, Arabic + * shaping, and - if the input/output base directions mismatch - string + * reverse operations. * @see ubidi_setInverse * @see ubidi_setReorderingMode * @see UBIDI_REORDER_DEFAULT diff --git a/deps/icu-small/source/common/unicode/uchar.h b/deps/icu-small/source/common/unicode/uchar.h index 6d31083e66ee0d..9e180db53b665a 100644 --- a/deps/icu-small/source/common/unicode/uchar.h +++ b/deps/icu-small/source/common/unicode/uchar.h @@ -27,6 +27,24 @@ #include "unicode/utypes.h" #include "unicode/stringoptions.h" +#include "unicode/ucpmap.h" + +#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN) + +#define USET_DEFINED + +/** + * USet is the C API type corresponding to C++ class UnicodeSet. + * It is forward-declared here to avoid including unicode/uset.h file if related + * APIs are not used. + * + * @see ucnv_getUnicodeSet + * @stable ICU 2.4 + */ +typedef struct USet USet; + +#endif + U_CDECL_BEGIN @@ -61,6 +79,18 @@ U_CDECL_BEGIN * "About the Unicode Character Database" (http://www.unicode.org/ucd/) * and the ICU User Guide chapter on Properties (http://icu-project.org/userguide/properties.html). * + * Many properties are accessible via generic functions that take a UProperty selector. + * - u_hasBinaryProperty() returns a binary value (TRUE/FALSE) per property and code point. + * - u_getIntPropertyValue() returns an integer value per property and code point. + * For each supported enumerated or catalog property, there is + * an enum type for all of the property's values, and + * u_getIntPropertyValue() returns the numeric values of those constants. + * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with + * all code points for which the property is true. + * - u_getIntPropertyMap() returns a map for each + * ICU-supported enumerated/catalog/int-valued property which + * maps all Unicode code points to their values for that property. + * * Many functions are designed to match java.lang.Character functions. * See the individual function documentation, * and see the JDK 1.4 java.lang.Character documentation @@ -546,12 +576,34 @@ typedef enum UProperty { (http://www.unicode.org/reports/tr9/) Returns UBidiPairedBracketType values. @stable ICU 52 */ UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015, + /** + * Enumerated property Indic_Positional_Category. + * New in Unicode 6.0 as provisional property Indic_Matra_Category; + * renamed and changed to informative in Unicode 8.0. + * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt + * @stable ICU 63 + */ + UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016, + /** + * Enumerated property Indic_Syllabic_Category. + * New in Unicode 6.0 as provisional; informative since Unicode 8.0. + * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt + * @stable ICU 63 + */ + UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017, + /** + * Enumerated property Vertical_Orientation. + * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). + * New as a UCD property in Unicode 10.0. + * @stable ICU 63 + */ + UCHAR_VERTICAL_ORIENTATION=0x1018, #ifndef U_HIDE_DEPRECATED_API /** * One more than the last constant for enumerated/integer Unicode properties. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UCHAR_INT_LIMIT=0x1016, + UCHAR_INT_LIMIT=0x1019, #endif // U_HIDE_DEPRECATED_API /** Bitmask property General_Category_Mask. @@ -2320,6 +2372,161 @@ typedef enum UHangulSyllableType { #endif // U_HIDE_DEPRECATED_API } UHangulSyllableType; +/** + * Indic Positional Category constants. + * + * @see UCHAR_INDIC_POSITIONAL_CATEGORY + * @stable ICU 63 + */ +typedef enum UIndicPositionalCategory { + /* + * Note: UIndicPositionalCategory constants are parsed by preparseucd.py. + * It matches lines like + * U_INPC_ + */ + + /** @stable ICU 63 */ + U_INPC_NA, + /** @stable ICU 63 */ + U_INPC_BOTTOM, + /** @stable ICU 63 */ + U_INPC_BOTTOM_AND_LEFT, + /** @stable ICU 63 */ + U_INPC_BOTTOM_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_LEFT, + /** @stable ICU 63 */ + U_INPC_LEFT_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_OVERSTRUCK, + /** @stable ICU 63 */ + U_INPC_RIGHT, + /** @stable ICU 63 */ + U_INPC_TOP, + /** @stable ICU 63 */ + U_INPC_TOP_AND_BOTTOM, + /** @stable ICU 63 */ + U_INPC_TOP_AND_BOTTOM_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_TOP_AND_LEFT, + /** @stable ICU 63 */ + U_INPC_TOP_AND_LEFT_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_TOP_AND_RIGHT, + /** @stable ICU 63 */ + U_INPC_VISUAL_ORDER_LEFT, +} UIndicPositionalCategory; + +/** + * Indic Syllabic Category constants. + * + * @see UCHAR_INDIC_SYLLABIC_CATEGORY + * @stable ICU 63 + */ +typedef enum UIndicSyllabicCategory { + /* + * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py. + * It matches lines like + * U_INSC_ + */ + + /** @stable ICU 63 */ + U_INSC_OTHER, + /** @stable ICU 63 */ + U_INSC_AVAGRAHA, + /** @stable ICU 63 */ + U_INSC_BINDU, + /** @stable ICU 63 */ + U_INSC_BRAHMI_JOINING_NUMBER, + /** @stable ICU 63 */ + U_INSC_CANTILLATION_MARK, + /** @stable ICU 63 */ + U_INSC_CONSONANT, + /** @stable ICU 63 */ + U_INSC_CONSONANT_DEAD, + /** @stable ICU 63 */ + U_INSC_CONSONANT_FINAL, + /** @stable ICU 63 */ + U_INSC_CONSONANT_HEAD_LETTER, + /** @stable ICU 63 */ + U_INSC_CONSONANT_INITIAL_POSTFIXED, + /** @stable ICU 63 */ + U_INSC_CONSONANT_KILLER, + /** @stable ICU 63 */ + U_INSC_CONSONANT_MEDIAL, + /** @stable ICU 63 */ + U_INSC_CONSONANT_PLACEHOLDER, + /** @stable ICU 63 */ + U_INSC_CONSONANT_PRECEDING_REPHA, + /** @stable ICU 63 */ + U_INSC_CONSONANT_PREFIXED, + /** @stable ICU 63 */ + U_INSC_CONSONANT_SUBJOINED, + /** @stable ICU 63 */ + U_INSC_CONSONANT_SUCCEEDING_REPHA, + /** @stable ICU 63 */ + U_INSC_CONSONANT_WITH_STACKER, + /** @stable ICU 63 */ + U_INSC_GEMINATION_MARK, + /** @stable ICU 63 */ + U_INSC_INVISIBLE_STACKER, + /** @stable ICU 63 */ + U_INSC_JOINER, + /** @stable ICU 63 */ + U_INSC_MODIFYING_LETTER, + /** @stable ICU 63 */ + U_INSC_NON_JOINER, + /** @stable ICU 63 */ + U_INSC_NUKTA, + /** @stable ICU 63 */ + U_INSC_NUMBER, + /** @stable ICU 63 */ + U_INSC_NUMBER_JOINER, + /** @stable ICU 63 */ + U_INSC_PURE_KILLER, + /** @stable ICU 63 */ + U_INSC_REGISTER_SHIFTER, + /** @stable ICU 63 */ + U_INSC_SYLLABLE_MODIFIER, + /** @stable ICU 63 */ + U_INSC_TONE_LETTER, + /** @stable ICU 63 */ + U_INSC_TONE_MARK, + /** @stable ICU 63 */ + U_INSC_VIRAMA, + /** @stable ICU 63 */ + U_INSC_VISARGA, + /** @stable ICU 63 */ + U_INSC_VOWEL, + /** @stable ICU 63 */ + U_INSC_VOWEL_DEPENDENT, + /** @stable ICU 63 */ + U_INSC_VOWEL_INDEPENDENT, +} UIndicSyllabicCategory; + +/** + * Vertical Orientation constants. + * + * @see UCHAR_VERTICAL_ORIENTATION + * @stable ICU 63 + */ +typedef enum UVerticalOrientation { + /* + * Note: UVerticalOrientation constants are parsed by preparseucd.py. + * It matches lines like + * U_VO_ + */ + + /** @stable ICU 63 */ + U_VO_ROTATED, + /** @stable ICU 63 */ + U_VO_TRANSFORMED_ROTATED, + /** @stable ICU 63 */ + U_VO_TRANSFORMED_UPRIGHT, + /** @stable ICU 63 */ + U_VO_UPRIGHT, +} UVerticalOrientation; + /** * Check a binary Unicode property for a code point. * @@ -2342,6 +2549,7 @@ typedef enum UHangulSyllableType { * does not have data for the property at all, or not for this code point. * * @see UProperty + * @see u_getBinaryPropertySet * @see u_getIntPropertyValue * @see u_getUnicodeVersion * @stable ICU 2.1 @@ -2349,6 +2557,28 @@ typedef enum UHangulSyllableType { U_STABLE UBool U_EXPORT2 u_hasBinaryProperty(UChar32 c, UProperty which); +#ifndef U_HIDE_DRAFT_API + +/** + * Returns a frozen USet for a binary property. + * The library retains ownership over the returned object. + * Sets an error code if the property number is not one for a binary property. + * + * The returned set contains all code points for which the property is true. + * + * @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1 + * @param pErrorCode an in/out ICU UErrorCode + * @return the property as a set + * @see UProperty + * @see u_hasBinaryProperty + * @see Unicode::fromUSet + * @draft ICU 63 + */ +U_CAPI const USet * U_EXPORT2 +u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode); + +#endif // U_HIDE_DRAFT_API + /** * Check if a code point has the Alphabetic Unicode property. * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC). @@ -2449,6 +2679,7 @@ u_isUWhiteSpace(UChar32 c); * @see u_hasBinaryProperty * @see u_getIntPropertyMinValue * @see u_getIntPropertyMaxValue + * @see u_getIntPropertyMap * @see u_getUnicodeVersion * @stable ICU 2.2 */ @@ -2505,6 +2736,28 @@ u_getIntPropertyMinValue(UProperty which); U_STABLE int32_t U_EXPORT2 u_getIntPropertyMaxValue(UProperty which); +#ifndef U_HIDE_DRAFT_API + +/** + * Returns an immutable UCPMap for an enumerated/catalog/int-valued property. + * The library retains ownership over the returned object. + * Sets an error code if the property number is not one for an "int property". + * + * The returned object maps all Unicode code points to their values for that property. + * For documentation of the integer values see u_getIntPropertyValue(). + * + * @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1 + * @param pErrorCode an in/out ICU UErrorCode + * @return the property as a map + * @see UProperty + * @see u_getIntPropertyValue + * @draft ICU 63 + */ +U_CAPI const UCPMap * U_EXPORT2 +u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode); + +#endif // U_HIDE_DRAFT_API + /** * Get the numeric value for a Unicode code point as defined in the * Unicode Character Database. diff --git a/deps/icu-small/source/common/unicode/ucnv.h b/deps/icu-small/source/common/unicode/ucnv.h index 53b4c6f0733aca..ec7c5f350b4973 100644 --- a/deps/icu-small/source/common/unicode/ucnv.h +++ b/deps/icu-small/source/common/unicode/ucnv.h @@ -53,19 +53,18 @@ #include "unicode/uenum.h" #include "unicode/localpointer.h" -#ifndef __USET_H__ +#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN) + +#define USET_DEFINED /** - * USet is the C API type for Unicode sets. - * It is forward-declared here to avoid including the header file if related + * USet is the C API type corresponding to C++ class UnicodeSet. + * It is forward-declared here to avoid including unicode/uset.h file if related * conversion APIs are not used. - * See unicode/uset.h * * @see ucnv_getUnicodeSet - * @stable ICU 2.6 + * @stable ICU 2.4 */ -struct USet; -/** @stable ICU 2.6 */ typedef struct USet USet; #endif diff --git a/deps/icu-small/source/common/unicode/uconfig.h b/deps/icu-small/source/common/unicode/uconfig.h index 5e28a146de3fff..3a7d2db9aa07c8 100644 --- a/deps/icu-small/source/common/unicode/uconfig.h +++ b/deps/icu-small/source/common/unicode/uconfig.h @@ -183,7 +183,7 @@ */ #ifdef U_HAVE_LIB_SUFFIX /* Use the predefined value. */ -#elif defined(U_LIB_SUFFIX_C_NAME) +#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN) # define U_HAVE_LIB_SUFFIX 1 #endif @@ -431,17 +431,6 @@ # define UCONFIG_HAVE_PARSEALLINPUT 1 #endif - -/** - * \def UCONFIG_FORMAT_FASTPATHS_49 - * This switch turns on other formatting fastpaths. Binary incompatible in object DecimalFormat and DecimalFormatSymbols - * - * @internal - */ -#ifndef UCONFIG_FORMAT_FASTPATHS_49 -# define UCONFIG_FORMAT_FASTPATHS_49 1 -#endif - /** * \def UCONFIG_NO_FILTERED_BREAK_ITERATION * This switch turns off filtered break iteration code. diff --git a/deps/icu-small/source/common/unicode/ucpmap.h b/deps/icu-small/source/common/unicode/ucpmap.h new file mode 100644 index 00000000000000..f2c42b6b7f4bee --- /dev/null +++ b/deps/icu-small/source/common/unicode/ucpmap.h @@ -0,0 +1,162 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucpmap.h +// created: 2018sep03 Markus W. Scherer + +#ifndef __UCPMAP_H__ +#define __UCPMAP_H__ + +#include "unicode/utypes.h" + +#ifndef U_HIDE_DRAFT_API + +U_CDECL_BEGIN + +/** + * \file + * + * This file defines an abstract map from Unicode code points to integer values. + * + * @see UCPMap + * @see UCPTrie + * @see UMutableCPTrie + */ + +/** + * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values. + * + * @see UCPTrie + * @see UMutableCPTrie + * @draft ICU 63 + */ +typedef struct UCPMap UCPMap; + +/** + * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates. + * Most users should use UCPMAP_RANGE_NORMAL. + * + * @see ucpmap_getRange + * @see ucptrie_getRange + * @see umutablecptrie_getRange + * @draft ICU 63 + */ +enum UCPMapRangeOption { + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map. + * Most users should use this option. + * @draft ICU 63 + */ + UCPMAP_RANGE_NORMAL, + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map, + * except that lead surrogates (U+D800..U+DBFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_LEAD(c). + * + * Most users should use UCPMAP_RANGE_NORMAL instead. + * + * This option is useful for maps that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * @draft ICU 63 + */ + UCPMAP_RANGE_FIXED_LEAD_SURROGATES, + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map, + * except that all surrogates (U+D800..U+DFFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_SURROGATE(c). + * + * Most users should use UCPMAP_RANGE_NORMAL instead. + * + * This option is useful for maps that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * @draft ICU 63 + */ + UCPMAP_RANGE_FIXED_ALL_SURROGATES +}; +#ifndef U_IN_DOXYGEN +typedef enum UCPMapRangeOption UCPMapRangeOption; +#endif + +/** + * Returns the value for a code point as stored in the map, with range checking. + * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF. + * + * @param map the map + * @param c the code point + * @return the map value, + * or an implementation-defined error value if the code point is not in the range 0..U+10FFFF + * @draft ICU 63 + */ +U_CAPI uint32_t U_EXPORT2 +ucpmap_get(const UCPMap *map, UChar32 c); + +/** + * Callback function type: Modifies a map value. + * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). + * The modified value will be returned by the getRange function. + * + * Can be used to ignore some of the value bits, + * make a filter for one of several values, + * return a value index computed from the map value, etc. + * + * @param context an opaque pointer, as passed into the getRange function + * @param value a value from the map + * @return the modified value + * @draft ICU 63 + */ +typedef uint32_t U_CALLCONV +UCPMapValueFilter(const void *context, uint32_t value); + +/** + * Returns the last code point such that all those from start to there have the same value. + * Can be used to efficiently iterate over all same-value ranges in a map. + * (This is normally faster than iterating over code points and get()ting each value, + * but much slower than a data structure that stores ranges directly.) + * + * If the UCPMapValueFilter function pointer is not NULL, then + * the value to be delivered is passed through that function, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that function pointer is NULL. + * + * Example: + * \code + * UChar32 start = 0, end; + * uint32_t value; + * while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, + * NULL, NULL, &value)) >= 0) { + * // Work with the range start..end and its value. + * start = end + 1; + * } + * \endcode + * + * @param map the map + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL + * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL + * @param filter a pointer to a function that may modify the map data value, + * or NULL if the values from the map are to be used unmodified + * @param context an opaque pointer that is passed on to the filter function + * @param pValue if not NULL, receives the value that every code point start..end has; + * may have been modified by filter(context, map value) + * if that function pointer is not NULL + * @return the range end code point, or -1 if start is not a valid code point + * @draft ICU 63 + */ +U_CAPI UChar32 U_EXPORT2 +ucpmap_getRange(const UCPMap *map, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +U_CDECL_END + +#endif // U_HIDE_DRAFT_API +#endif diff --git a/deps/icu-small/source/common/unicode/ucptrie.h b/deps/icu-small/source/common/unicode/ucptrie.h new file mode 100644 index 00000000000000..2718c984e43197 --- /dev/null +++ b/deps/icu-small/source/common/unicode/ucptrie.h @@ -0,0 +1,646 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// ucptrie.h (modified from utrie2.h) +// created: 2017dec29 Markus W. Scherer + +#ifndef __UCPTRIE_H__ +#define __UCPTRIE_H__ + +#include "unicode/utypes.h" + +#ifndef U_HIDE_DRAFT_API + +#include "unicode/localpointer.h" +#include "unicode/ucpmap.h" +#include "unicode/utf8.h" + +U_CDECL_BEGIN + +/** + * \file + * + * This file defines an immutable Unicode code point trie. + * + * @see UCPTrie + * @see UMutableCPTrie + */ + +#ifndef U_IN_DOXYGEN +/** @internal */ +typedef union UCPTrieData { + /** @internal */ + const void *ptr0; + /** @internal */ + const uint16_t *ptr16; + /** @internal */ + const uint32_t *ptr32; + /** @internal */ + const uint8_t *ptr8; +} UCPTrieData; +#endif + +/** + * Immutable Unicode code point trie structure. + * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values. + * For details see http://site.icu-project.org/design/struct/utrie + * + * Do not access UCPTrie fields directly; use public functions and macros. + * Functions are easy to use: They support all trie types and value widths. + * + * When performance is really important, macros provide faster access. + * Most macros are specific to either "fast" or "small" tries, see UCPTrieType. + * There are "fast" macros for special optimized use cases. + * + * The macros will return bogus values, or may crash, if used on the wrong type or value width. + * + * @see UMutableCPTrie + * @draft ICU 63 + */ +struct UCPTrie { +#ifndef U_IN_DOXYGEN + /** @internal */ + const uint16_t *index; + /** @internal */ + UCPTrieData data; + + /** @internal */ + int32_t indexLength; + /** @internal */ + int32_t dataLength; + /** Start of the last range which ends at U+10FFFF. @internal */ + UChar32 highStart; + /** highStart>>12 @internal */ + uint16_t shifted12HighStart; + + /** @internal */ + int8_t type; // UCPTrieType + /** @internal */ + int8_t valueWidth; // UCPTrieValueWidth + + /** padding/reserved @internal */ + uint32_t reserved32; + /** padding/reserved @internal */ + uint16_t reserved16; + + /** + * Internal index-3 null block offset. + * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. + * @internal + */ + uint16_t index3NullOffset; + /** + * Internal data null block offset, not shifted. + * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. + * @internal + */ + int32_t dataNullOffset; + /** @internal */ + uint32_t nullValue; + +#ifdef UCPTRIE_DEBUG + /** @internal */ + const char *name; +#endif +#endif +}; +#ifndef U_IN_DOXYGEN +typedef struct UCPTrie UCPTrie; +#endif + +/** + * Selectors for the type of a UCPTrie. + * Different trade-offs for size vs. speed. + * + * @see umutablecptrie_buildImmutable + * @see ucptrie_openFromBinary + * @see ucptrie_getType + * @draft ICU 63 + */ +enum UCPTrieType { + /** + * For ucptrie_openFromBinary() to accept any type. + * ucptrie_getType() will return the actual type. + * @draft ICU 63 + */ + UCPTRIE_TYPE_ANY = -1, + /** + * Fast/simple/larger BMP data structure. Use functions and "fast" macros. + * @draft ICU 63 + */ + UCPTRIE_TYPE_FAST, + /** + * Small/slower BMP data structure. Use functions and "small" macros. + * @draft ICU 63 + */ + UCPTRIE_TYPE_SMALL +}; +#ifndef U_IN_DOXYGEN +typedef enum UCPTrieType UCPTrieType; +#endif + +/** + * Selectors for the number of bits in a UCPTrie data value. + * + * @see umutablecptrie_buildImmutable + * @see ucptrie_openFromBinary + * @see ucptrie_getValueWidth + * @draft ICU 63 + */ +enum UCPTrieValueWidth { + /** + * For ucptrie_openFromBinary() to accept any data value width. + * ucptrie_getValueWidth() will return the actual data value width. + * @draft ICU 63 + */ + UCPTRIE_VALUE_BITS_ANY = -1, + /** + * The trie stores 16 bits per data value. + * It returns them as unsigned values 0..0xffff=65535. + * @draft ICU 63 + */ + UCPTRIE_VALUE_BITS_16, + /** + * The trie stores 32 bits per data value. + * @draft ICU 63 + */ + UCPTRIE_VALUE_BITS_32, + /** + * The trie stores 8 bits per data value. + * It returns them as unsigned values 0..0xff=255. + * @draft ICU 63 + */ + UCPTRIE_VALUE_BITS_8 +}; +#ifndef U_IN_DOXYGEN +typedef enum UCPTrieValueWidth UCPTrieValueWidth; +#endif + +/** + * Opens a trie from its binary form, stored in 32-bit-aligned memory. + * Inverse of ucptrie_toBinary(). + * + * The memory must remain valid and unchanged as long as the trie is used. + * You must ucptrie_close() the trie once you are done using it. + * + * @param type selects the trie type; results in an + * U_INVALID_FORMAT_ERROR if it does not match the binary data; + * use UCPTRIE_TYPE_ANY to accept any type + * @param valueWidth selects the number of bits in a data value; results in an + * U_INVALID_FORMAT_ERROR if it does not match the binary data; + * use UCPTRIE_VALUE_BITS_ANY to accept any data value width + * @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie + * @param length the number of bytes available at data; + * can be more than necessary + * @param pActualLength receives the actual number of bytes at data taken up by the trie data; + * can be NULL + * @param pErrorCode an in/out ICU UErrorCode + * @return the trie + * + * @see umutablecptrie_open + * @see umutablecptrie_buildImmutable + * @see ucptrie_toBinary + * @draft ICU 63 + */ +U_CAPI UCPTrie * U_EXPORT2 +ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, + const void *data, int32_t length, int32_t *pActualLength, + UErrorCode *pErrorCode); + +/** + * Closes a trie and releases associated memory. + * + * @param trie the trie + * @draft ICU 63 + */ +U_CAPI void U_EXPORT2 +ucptrie_close(UCPTrie *trie); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUCPTriePointer + * "Smart pointer" class, closes a UCPTrie via ucptrie_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 63 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close); + +U_NAMESPACE_END + +#endif + +/** + * Returns the trie type. + * + * @param trie the trie + * @return the trie type + * @see ucptrie_openFromBinary + * @see UCPTRIE_TYPE_ANY + * @draft ICU 63 + */ +U_CAPI UCPTrieType U_EXPORT2 +ucptrie_getType(const UCPTrie *trie); + +/** + * Returns the number of bits in a trie data value. + * + * @param trie the trie + * @return the number of bits in a trie data value + * @see ucptrie_openFromBinary + * @see UCPTRIE_VALUE_BITS_ANY + * @draft ICU 63 + */ +U_CAPI UCPTrieValueWidth U_EXPORT2 +ucptrie_getValueWidth(const UCPTrie *trie); + +/** + * Returns the value for a code point as stored in the trie, with range checking. + * Returns the trie error value if c is not in the range 0..U+10FFFF. + * + * Easier to use than UCPTRIE_FAST_GET() and similar macros but slower. + * Easier to use because, unlike the macros, this function works on all UCPTrie + * objects, for all types and value widths. + * + * @param trie the trie + * @param c the code point + * @return the trie value, + * or the trie error value if the code point is not in the range 0..U+10FFFF + * @draft ICU 63 + */ +U_CAPI uint32_t U_EXPORT2 +ucptrie_get(const UCPTrie *trie, UChar32 c); + +/** + * Returns the last code point such that all those from start to there have the same value. + * Can be used to efficiently iterate over all same-value ranges in a trie. + * (This is normally faster than iterating over code points and get()ting each value, + * but much slower than a data structure that stores ranges directly.) + * + * If the UCPMapValueFilter function pointer is not NULL, then + * the value to be delivered is passed through that function, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that function pointer is NULL. + * + * Example: + * \code + * UChar32 start = 0, end; + * uint32_t value; + * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, + * NULL, NULL, &value)) >= 0) { + * // Work with the range start..end and its value. + * start = end + 1; + * } + * \endcode + * + * @param trie the trie + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL + * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL + * @param filter a pointer to a function that may modify the trie data value, + * or NULL if the values from the trie are to be used unmodified + * @param context an opaque pointer that is passed on to the filter function + * @param pValue if not NULL, receives the value that every code point start..end has; + * may have been modified by filter(context, trie value) + * if that function pointer is not NULL + * @return the range end code point, or -1 if start is not a valid code point + * @draft ICU 63 + */ +U_CAPI UChar32 U_EXPORT2 +ucptrie_getRange(const UCPTrie *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +/** + * Writes a memory-mappable form of the trie into 32-bit aligned memory. + * Inverse of ucptrie_openFromBinary(). + * + * @param trie the trie + * @param data a pointer to 32-bit-aligned memory to be filled with the trie data; + * can be NULL if capacity==0 + * @param capacity the number of bytes available at data, or 0 for pure preflighting + * @param pErrorCode an in/out ICU UErrorCode; + * U_BUFFER_OVERFLOW_ERROR if the capacity is too small + * @return the number of bytes written or (if buffer overflow) needed for the trie + * + * @see ucptrie_openFromBinary() + * @draft ICU 63 + */ +U_CAPI int32_t U_EXPORT2 +ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode); + +/** + * Macro parameter value for a trie with 16-bit data values. + * Use the name of this macro as a "dataAccess" parameter in other macros. + * Do not use this macro in any other way. + * + * @see UCPTRIE_VALUE_BITS_16 + * @draft ICU 63 + */ +#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i]) + +/** + * Macro parameter value for a trie with 32-bit data values. + * Use the name of this macro as a "dataAccess" parameter in other macros. + * Do not use this macro in any other way. + * + * @see UCPTRIE_VALUE_BITS_32 + * @draft ICU 63 + */ +#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i]) + +/** + * Macro parameter value for a trie with 8-bit data values. + * Use the name of this macro as a "dataAccess" parameter in other macros. + * Do not use this macro in any other way. + * + * @see UCPTRIE_VALUE_BITS_8 + * @draft ICU 63 + */ +#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i]) + +/** + * Returns a trie value for a code point, with range checking. + * Returns the trie error value if c is not in the range 0..U+10FFFF. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point + * @return The code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c)) + +/** + * Returns a 16-bit trie value for a code point, with range checking. + * Returns the trie error value if c is not in the range U+0000..U+10FFFF. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point + * @return The code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \ + dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c)) + +/** + * UTF-16: Reads the next code point (UChar32 c, out), post-increments src, + * and gets a value from the trie. + * Sets the trie error value if c is an unpaired surrogate. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param src (const UChar *, in/out) the source text pointer + * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated + * @param c (UChar32, out) variable for the code point + * @param result (out) variable for the trie lookup result + * @draft ICU 63 + */ +#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) { \ + (c) = *(src)++; \ + int32_t __index; \ + if (!U16_IS_SURROGATE(c)) { \ + __index = _UCPTRIE_FAST_INDEX(trie, c); \ + } else { \ + uint16_t __c2; \ + if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \ + ++(src); \ + (c) = U16_GET_SUPPLEMENTARY((c), __c2); \ + __index = _UCPTRIE_SMALL_INDEX(trie, c); \ + } else { \ + __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ + } \ + } \ + (result) = dataAccess(trie, __index); \ +} + +/** + * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src, + * and gets a value from the trie. + * Sets the trie error value if c is an unpaired surrogate. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param start (const UChar *, in) the start pointer for the text + * @param src (const UChar *, in/out) the source text pointer + * @param c (UChar32, out) variable for the code point + * @param result (out) variable for the trie lookup result + * @draft ICU 63 + */ +#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) { \ + (c) = *--(src); \ + int32_t __index; \ + if (!U16_IS_SURROGATE(c)) { \ + __index = _UCPTRIE_FAST_INDEX(trie, c); \ + } else { \ + uint16_t __c2; \ + if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \ + --(src); \ + (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \ + __index = _UCPTRIE_SMALL_INDEX(trie, c); \ + } else { \ + __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \ + } \ + } \ + (result) = dataAccess(trie, __index); \ +} + +/** + * UTF-8: Post-increments src and gets a value from the trie. + * Sets the trie error value for an ill-formed byte sequence. + * + * Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point + * because it would be more work to do so and is often not needed. + * If the trie value differs from the error value, then the byte sequence is well-formed, + * and the code point can be assembled without revalidation. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param src (const char *, in/out) the source text pointer + * @param limit (const char *, in) the limit pointer for the text (must not be NULL) + * @param result (out) variable for the trie lookup result + * @draft ICU 63 + */ +#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) { \ + int32_t __lead = (uint8_t)*(src)++; \ + if (!U8_IS_SINGLE(__lead)) { \ + uint8_t __t1, __t2, __t3; \ + if ((src) != (limit) && \ + (__lead >= 0xe0 ? \ + __lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \ + U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \ + ++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \ + (__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \ + : /* U+10000..U+10FFFF */ \ + (__lead -= 0xf0) <= 4 && \ + U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \ + (__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \ + (__t2 = *(src) - 0x80) <= 0x3f && \ + ++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \ + (__lead = __lead >= (trie)->shifted12HighStart ? \ + (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ + ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \ + : /* U+0080..U+07FF */ \ + __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \ + (__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \ + ++(src); \ + } else { \ + __lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \ + } \ + } \ + (result) = dataAccess(trie, __lead); \ +} + +/** + * UTF-8: Pre-decrements src and gets a value from the trie. + * Sets the trie error value for an ill-formed byte sequence. + * + * Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point + * because it would be more work to do so and is often not needed. + * If the trie value differs from the error value, then the byte sequence is well-formed, + * and the code point can be assembled without revalidation. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param start (const char *, in) the start pointer for the text + * @param src (const char *, in/out) the source text pointer + * @param result (out) variable for the trie lookup result + * @draft ICU 63 + */ +#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) { \ + int32_t __index = (uint8_t)*--(src); \ + if (!U8_IS_SINGLE(__index)) { \ + __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \ + (const uint8_t *)(src)); \ + (src) -= __index & 7; \ + __index >>= 3; \ + } \ + (result) = dataAccess(trie, __index); \ +} + +/** + * Returns a trie value for an ASCII code point, without range checking. + * + * @param trie (const UCPTrie *, in) the trie (of either fast or small type) + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point; must be U+0000..U+007F + * @return The ASCII code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c) + +/** + * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. + * Can be used to look up a value for a UTF-16 code unit if other parts of + * the string processing check for surrogates. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF + * @return The BMP code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c)) + +/** + * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), + * without range checking. + * + * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST + * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width + * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF + * @return The supplementary code point's trie value. + * @draft ICU 63 + */ +#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c)) + +/* Internal definitions ----------------------------------------------------- */ + +#ifndef U_IN_DOXYGEN + +/** + * Internal implementation constants. + * These are needed for the API macros, but users should not use these directly. + * @internal + */ +enum { + /** @internal */ + UCPTRIE_FAST_SHIFT = 6, + + /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ + UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT, + + /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ + UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1, + + /** @internal */ + UCPTRIE_SMALL_MAX = 0xfff, + + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for out-of-range code points and ill-formed UTF-8/16. + * @internal + */ + UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1, + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for code points highStart..U+10FFFF. + * @internal + */ + UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2 +}; + +/* Internal functions and macros -------------------------------------------- */ +// Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API + +/** @internal */ +U_INTERNAL int32_t U_EXPORT2 +ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c); + +/** @internal */ +U_INTERNAL int32_t U_EXPORT2 +ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3); + +/** + * Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations. + * Do not call directly. + * @internal + */ +U_INTERNAL int32_t U_EXPORT2 +ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, + const uint8_t *start, const uint8_t *src); + +/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */ +#define _UCPTRIE_FAST_INDEX(trie, c) \ + ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK)) + +/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */ +#define _UCPTRIE_SMALL_INDEX(trie, c) \ + ((c) >= (trie)->highStart ? \ + (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \ + ucptrie_internalSmallIndex(trie, c)) + +/** + * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF. + * Returns the data index. + * @internal + */ +#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \ + ((uint32_t)(c) <= (uint32_t)(fastMax) ? \ + _UCPTRIE_FAST_INDEX(trie, c) : \ + (uint32_t)(c) <= 0x10ffff ? \ + _UCPTRIE_SMALL_INDEX(trie, c) : \ + (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET) + +U_CDECL_END + +#endif // U_IN_DOXYGEN +#endif // U_HIDE_DRAFT_API +#endif diff --git a/deps/icu-small/source/common/unicode/ucurr.h b/deps/icu-small/source/common/unicode/ucurr.h index adfaf0023bb7cf..1021adc83e7ee2 100644 --- a/deps/icu-small/source/common/unicode/ucurr.h +++ b/deps/icu-small/source/common/unicode/ucurr.h @@ -60,6 +60,7 @@ enum UCurrencyUsage { UCURR_USAGE_COUNT=2 #endif // U_HIDE_DEPRECATED_API }; +/** Currency Usage used for Decimal Format */ typedef enum UCurrencyUsage UCurrencyUsage; /** diff --git a/deps/icu-small/source/common/unicode/uenum.h b/deps/icu-small/source/common/unicode/uenum.h index 56faae895279b6..eb8ecdf88b6ec5 100644 --- a/deps/icu-small/source/common/unicode/uenum.h +++ b/deps/icu-small/source/common/unicode/uenum.h @@ -190,8 +190,6 @@ U_STABLE UEnumeration* U_EXPORT2 uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count, UErrorCode* ec); -/* Note: next function is not hidden as draft, as it is used internally (it was formerly an internal function). */ - /** * Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null. * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close. diff --git a/deps/icu-small/source/common/unicode/umachine.h b/deps/icu-small/source/common/unicode/umachine.h index a9dc1631b00e70..6d932cfcfb2ae4 100644 --- a/deps/icu-small/source/common/unicode/umachine.h +++ b/deps/icu-small/source/common/unicode/umachine.h @@ -125,6 +125,9 @@ * May result in an error if it applied to something not an override. * @internal */ +#ifndef U_OVERRIDE +#define U_OVERRIDE override +#endif /** * \def U_FINAL @@ -133,24 +136,10 @@ * May result in an error if subclasses attempt to override. * @internal */ - -#if U_CPLUSPLUS_VERSION >= 11 -/* C++11 */ -#ifndef U_OVERRIDE -#define U_OVERRIDE override -#endif -#ifndef U_FINAL +#if !defined(U_FINAL) || defined(U_IN_DOXYGEN) #define U_FINAL final #endif -#else -/* not C++11 - define to nothing */ -#ifndef U_OVERRIDE -#define U_OVERRIDE -#endif -#ifndef U_FINAL -#define U_FINAL -#endif -#endif + /*==========================================================================*/ /* limits for int32_t etc., like in POSIX inttypes.h */ @@ -318,7 +307,7 @@ typedef int8_t UBool; * UChar is configurable by defining the macro UCHAR_TYPE * on the preprocessor or compiler command line: * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc. - * (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.) + * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.) * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16. * * The default is UChar=char16_t. diff --git a/deps/icu-small/source/common/unicode/umutablecptrie.h b/deps/icu-small/source/common/unicode/umutablecptrie.h new file mode 100644 index 00000000000000..e75191a4495209 --- /dev/null +++ b/deps/icu-small/source/common/unicode/umutablecptrie.h @@ -0,0 +1,241 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// umutablecptrie.h (split out of ucptrie.h) +// created: 2018jan24 Markus W. Scherer + +#ifndef __UMUTABLECPTRIE_H__ +#define __UMUTABLECPTRIE_H__ + +#include "unicode/utypes.h" + +#ifndef U_HIDE_DRAFT_API + +#include "unicode/localpointer.h" +#include "unicode/ucpmap.h" +#include "unicode/ucptrie.h" +#include "unicode/utf8.h" + +U_CDECL_BEGIN + +/** + * \file + * + * This file defines a mutable Unicode code point trie. + * + * @see UCPTrie + * @see UMutableCPTrie + */ + +/** + * Mutable Unicode code point trie. + * Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values. + * For details see http://site.icu-project.org/design/struct/utrie + * + * Setting values (especially ranges) and lookup is fast. + * The mutable trie is only somewhat space-efficient. + * It builds a compacted, immutable UCPTrie. + * + * This trie can be modified while iterating over its contents. + * For example, it is possible to merge its values with those from another + * set of ranges (e.g., another mutable or immutable trie): + * Iterate over those source ranges; for each of them iterate over this trie; + * add the source value into the value of each trie range. + * + * @see UCPTrie + * @see umutablecptrie_buildImmutable + * @draft ICU 63 + */ +typedef struct UMutableCPTrie UMutableCPTrie; + +/** + * Creates a mutable trie that initially maps each Unicode code point to the same value. + * It uses 32-bit data values until umutablecptrie_buildImmutable() is called. + * umutablecptrie_buildImmutable() takes a valueWidth parameter which + * determines the number of bits in the data value in the resulting UCPTrie. + * You must umutablecptrie_close() the trie once you are done using it. + * + * @param initialValue the initial value that is set for all code points + * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16 + * @param pErrorCode an in/out ICU UErrorCode + * @return the trie + * @draft ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode); + +/** + * Clones a mutable trie. + * You must umutablecptrie_close() the clone once you are done using it. + * + * @param other the trie to clone + * @param pErrorCode an in/out ICU UErrorCode + * @return the trie clone + * @draft ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode); + +/** + * Closes a mutable trie and releases associated memory. + * + * @param trie the trie + * @draft ICU 63 + */ +U_CAPI void U_EXPORT2 +umutablecptrie_close(UMutableCPTrie *trie); + +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + +/** + * \class LocalUMutableCPTriePointer + * "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close(). + * For most methods see the LocalPointerBase base class. + * + * @see LocalPointerBase + * @see LocalPointer + * @draft ICU 63 + */ +U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close); + +U_NAMESPACE_END + +#endif + +/** + * Creates a mutable trie with the same contents as the UCPMap. + * You must umutablecptrie_close() the mutable trie once you are done using it. + * + * @param map the source map + * @param pErrorCode an in/out ICU UErrorCode + * @return the mutable trie + * @draft ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode); + +/** + * Creates a mutable trie with the same contents as the immutable one. + * You must umutablecptrie_close() the mutable trie once you are done using it. + * + * @param trie the immutable trie + * @param pErrorCode an in/out ICU UErrorCode + * @return the mutable trie + * @draft ICU 63 + */ +U_CAPI UMutableCPTrie * U_EXPORT2 +umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode); + +/** + * Returns the value for a code point as stored in the trie. + * + * @param trie the trie + * @param c the code point + * @return the value + * @draft ICU 63 + */ +U_CAPI uint32_t U_EXPORT2 +umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c); + +/** + * Returns the last code point such that all those from start to there have the same value. + * Can be used to efficiently iterate over all same-value ranges in a trie. + * (This is normally faster than iterating over code points and get()ting each value, + * but much slower than a data structure that stores ranges directly.) + * + * The trie can be modified between calls to this function. + * + * If the UCPMapValueFilter function pointer is not NULL, then + * the value to be delivered is passed through that function, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that function pointer is NULL. + * + * See the same-signature ucptrie_getRange() for a code sample. + * + * @param trie the trie + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL + * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL + * @param filter a pointer to a function that may modify the trie data value, + * or NULL if the values from the trie are to be used unmodified + * @param context an opaque pointer that is passed on to the filter function + * @param pValue if not NULL, receives the value that every code point start..end has; + * may have been modified by filter(context, trie value) + * if that function pointer is not NULL + * @return the range end code point, or -1 if start is not a valid code point + * @draft ICU 63 + */ +U_CAPI UChar32 U_EXPORT2 +umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start, + UCPMapRangeOption option, uint32_t surrogateValue, + UCPMapValueFilter *filter, const void *context, uint32_t *pValue); + +/** + * Sets a value for a code point. + * + * @param trie the trie + * @param c the code point + * @param value the value + * @param pErrorCode an in/out ICU UErrorCode + * @draft ICU 63 + */ +U_CAPI void U_EXPORT2 +umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode); + +/** + * Sets a value for each code point [start..end]. + * Faster and more space-efficient than setting the value for each code point separately. + * + * @param trie the trie + * @param start the first code point to get the value + * @param end the last code point to get the value (inclusive) + * @param value the value + * @param pErrorCode an in/out ICU UErrorCode + * @draft ICU 63 + */ +U_CAPI void U_EXPORT2 +umutablecptrie_setRange(UMutableCPTrie *trie, + UChar32 start, UChar32 end, + uint32_t value, UErrorCode *pErrorCode); + +/** + * Compacts the data and builds an immutable UCPTrie according to the parameters. + * After this, the mutable trie will be empty. + * + * The mutable trie stores 32-bit values until buildImmutable() is called. + * If values shorter than 32 bits are to be stored in the immutable trie, + * then the upper bits are discarded. + * For example, when the mutable trie contains values 0x81, -0x7f, and 0xa581, + * and the value width is 8 bits, then each of these is stored as 0x81 + * and the immutable trie will return that as an unsigned value. + * (Some implementations may want to make productive temporary use of the upper bits + * until buildImmutable() discards them.) + * + * Not every possible set of mappings can be built into a UCPTrie, + * because of limitations resulting from speed and space optimizations. + * Every Unicode assigned character can be mapped to a unique value. + * Typical data yields data structures far smaller than the limitations. + * + * It is possible to construct extremely unusual mappings that exceed the data structure limits. + * In such a case this function will fail with a U_INDEX_OUTOFBOUNDS_ERROR. + * + * @param trie the trie trie + * @param type selects the trie type + * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits, + * then the values stored in the trie will be truncated first + * @param pErrorCode an in/out ICU UErrorCode + * + * @see umutablecptrie_fromUCPTrie + * @draft ICU 63 + */ +U_CAPI UCPTrie * U_EXPORT2 +umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth, + UErrorCode *pErrorCode); + +U_CDECL_END + +#endif // U_HIDE_DRAFT_API +#endif diff --git a/deps/icu-small/source/common/unicode/uniset.h b/deps/icu-small/source/common/unicode/uniset.h index c2e0ad48bd7b2c..2ab2695a8780d7 100644 --- a/deps/icu-small/source/common/unicode/uniset.h +++ b/deps/icu-small/source/common/unicode/uniset.h @@ -13,6 +13,7 @@ #ifndef UNICODESET_H #define UNICODESET_H +#include "unicode/ucpmap.h" #include "unicode/unifilt.h" #include "unicode/unistr.h" #include "unicode/uset.h" @@ -25,9 +26,8 @@ U_NAMESPACE_BEGIN // Forward Declarations. -void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status); /**< @internal */ - class BMPSet; +class CharacterProperties; class ParsePosition; class RBBIRuleScanner; class SymbolTable; @@ -584,9 +584,8 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { //---------------------------------------------------------------- /** - * Make this object represent the range start - end. - * If end > start then this object is set to an - * an empty range. + * Make this object represent the range `start - end`. + * If `end > start` then this object is set to an empty range. * A frozen set will not be modified. * * @param start first character in the set, inclusive @@ -1506,6 +1505,7 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { //---------------------------------------------------------------- UnicodeSet(const UnicodeSet& o, UBool /* asThawed */); + UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed); //---------------------------------------------------------------- // Implementation: Pattern parsing @@ -1614,7 +1614,7 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { UnicodeString& rebuiltPat, UErrorCode& ec); - friend void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status); + friend class CharacterProperties; static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status); /** @@ -1634,9 +1634,15 @@ class U_COMMON_API UnicodeSet U_FINAL : public UnicodeFilter { */ void applyFilter(Filter filter, void* context, - int32_t src, + const UnicodeSet* inclusions, UErrorCode &status); +#ifndef U_HIDE_DRAFT_API // Skipped: ucpmap.h is draft only. + void applyIntPropertyValue(const UCPMap *map, + UCPMapValueFilter *filter, const void *context, + UErrorCode &errorCode); +#endif /* U_HIDE_DRAFT_API */ + /** * Set the new pattern to cache. */ diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h index b84f40bd449ced..bf954b5f1d8232 100644 --- a/deps/icu-small/source/common/unicode/unistr.h +++ b/deps/icu-small/source/common/unicode/unistr.h @@ -243,6 +243,9 @@ class UnicodeStringAppendable; // unicode/appendable.h * than other ICU APIs. In particular: * - If indexes are out of bounds for a UnicodeString object * (<0 or >length()) then they are "pinned" to the nearest boundary. + * - If the buffer passed to an insert/append/replace operation is owned by the + * target object, e.g., calling str.append(str), an extra copy may take place + * to ensure safety. * - If primitive string pointer values (e.g., const char16_t * or char *) * for input strings are NULL, then those input string parameters are treated * as if they pointed to an empty string. diff --git a/deps/icu-small/source/common/unicode/uobject.h b/deps/icu-small/source/common/unicode/uobject.h index 080600e52650d5..f7a7b6eddbc96b 100644 --- a/deps/icu-small/source/common/unicode/uobject.h +++ b/deps/icu-small/source/common/unicode/uobject.h @@ -27,7 +27,6 @@ */ /** - * @{ * \def U_NO_THROW * Define this to define the throw() specification so * certain functions do not throw any exceptions @@ -44,8 +43,6 @@ #define U_NO_THROW throw() #endif -/** @} */ - /*===========================================================================*/ /* UClassID-based RTTI */ /*===========================================================================*/ diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index 4175e527f404a7..5812173e39cfd2 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -33,6 +33,9 @@ #if !U_DISABLE_RENAMING +// Disable Renaming for Visual Studio's IntelliSense feature, so that 'Go-to-Definition' (F12) will work. +#if !(defined(_MSC_VER) && defined(__INTELLISENSE__)) + /* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give the platform a chance to define it first. Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined. @@ -107,6 +110,7 @@ #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) #define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) +#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl) #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) #define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) #define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) @@ -253,12 +257,14 @@ #define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen) #define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc) #define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket) +#define u_getBinaryPropertySet U_ICU_ENTRY_POINT_RENAME(u_getBinaryPropertySet) #define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass) #define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory) #define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion) #define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter) #define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure) #define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment) +#define u_getIntPropertyMap U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMap) #define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue) #define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue) #define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue) @@ -763,6 +769,20 @@ #define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap) #define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA) #define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder) +#define ucpmap_get U_ICU_ENTRY_POINT_RENAME(ucpmap_get) +#define ucpmap_getRange U_ICU_ENTRY_POINT_RENAME(ucpmap_getRange) +#define ucptrie_close U_ICU_ENTRY_POINT_RENAME(ucptrie_close) +#define ucptrie_get U_ICU_ENTRY_POINT_RENAME(ucptrie_get) +#define ucptrie_getRange U_ICU_ENTRY_POINT_RENAME(ucptrie_getRange) +#define ucptrie_getType U_ICU_ENTRY_POINT_RENAME(ucptrie_getType) +#define ucptrie_getValueWidth U_ICU_ENTRY_POINT_RENAME(ucptrie_getValueWidth) +#define ucptrie_internalGetRange U_ICU_ENTRY_POINT_RENAME(ucptrie_internalGetRange) +#define ucptrie_internalSmallIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallIndex) +#define ucptrie_internalSmallU8Index U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallU8Index) +#define ucptrie_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalU8PrevIndex) +#define ucptrie_openFromBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_openFromBinary) +#define ucptrie_swap U_ICU_ENTRY_POINT_RENAME(ucptrie_swap) +#define ucptrie_toBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_toBinary) #define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close) #define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect) #define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll) @@ -1079,6 +1099,7 @@ #define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize) #define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open) #define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute) +#define ulocimp_forLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_forLanguageTag) #define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry) #define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage) #define ulocimp_getRegionForSupplementalData U_ICU_ENTRY_POINT_RENAME(ulocimp_getRegionForSupplementalData) @@ -1106,6 +1127,16 @@ #define umtx_condWait U_ICU_ENTRY_POINT_RENAME(umtx_condWait) #define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock) #define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock) +#define umutablecptrie_buildImmutable U_ICU_ENTRY_POINT_RENAME(umutablecptrie_buildImmutable) +#define umutablecptrie_clone U_ICU_ENTRY_POINT_RENAME(umutablecptrie_clone) +#define umutablecptrie_close U_ICU_ENTRY_POINT_RENAME(umutablecptrie_close) +#define umutablecptrie_fromUCPMap U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPMap) +#define umutablecptrie_fromUCPTrie U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPTrie) +#define umutablecptrie_get U_ICU_ENTRY_POINT_RENAME(umutablecptrie_get) +#define umutablecptrie_getRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_getRange) +#define umutablecptrie_open U_ICU_ENTRY_POINT_RENAME(umutablecptrie_open) +#define umutablecptrie_set U_ICU_ENTRY_POINT_RENAME(umutablecptrie_set) +#define umutablecptrie_setRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_setRange) #define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance) #define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append) #define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close) @@ -1218,6 +1249,7 @@ #define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel) #define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName) #define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload) +#define uprops_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uprops_addPropertyStarts) #define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource) #define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts) #define uprv_add32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_add32_overflow) @@ -1236,6 +1268,7 @@ #define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix) #define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii) #define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic) +#define uprv_currencyLeads U_ICU_ENTRY_POINT_RENAME(uprv_currencyLeads) #define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus) #define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault) #define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding) @@ -1329,7 +1362,6 @@ #define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod) #define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free) #define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters) -#define uprv_getDefaultCodepage U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultCodepage) #define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID) #define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity) #define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength) @@ -1754,7 +1786,6 @@ #define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie) #define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32) #define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit) -#define utrie2_getVersion U_ICU_ENTRY_POINT_RENAME(utrie2_getVersion) #define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex) #define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex) #define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen) @@ -1766,7 +1797,6 @@ #define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit) #define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32) #define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap) -#define utrie2_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie2_swapAnyVersion) #define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone) #define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close) #define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset) @@ -1778,6 +1808,7 @@ #define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32) #define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32) #define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap) +#define utrie_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie_swapAnyVersion) #define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize) #define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy) #define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone) @@ -1827,6 +1858,7 @@ #define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime) #define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo) -#endif +#endif /* !(defined(_MSC_VER) && defined(__INTELLISENSE__)) */ +#endif /* U_DISABLE_RENAMING */ +#endif /* URENAME_H */ -#endif diff --git a/deps/icu-small/source/common/unicode/uset.h b/deps/icu-small/source/common/unicode/uset.h index 5b7c5db9ec03e5..ef6bbb5c38f3fb 100644 --- a/deps/icu-small/source/common/unicode/uset.h +++ b/deps/icu-small/source/common/unicode/uset.h @@ -33,10 +33,14 @@ #include "unicode/uchar.h" #include "unicode/localpointer.h" -#ifndef UCNV_H -struct USet; +#ifndef USET_DEFINED + +#ifndef U_IN_DOXYGEN +#define USET_DEFINED +#endif /** - * A UnicodeSet. Use the uset_* API to manipulate. Create with + * USet is the C API type corresponding to C++ class UnicodeSet. + * Use the uset_* API to manipulate. Create with * uset_open*, and destroy with uset_close. * @stable ICU 2.4 */ diff --git a/deps/icu-small/source/common/unicode/ustring.h b/deps/icu-small/source/common/unicode/ustring.h index cf6ec0b6b4c9d2..0d2274a0cabb51 100644 --- a/deps/icu-small/source/common/unicode/ustring.h +++ b/deps/icu-small/source/common/unicode/ustring.h @@ -895,34 +895,31 @@ u_memrchr32(const UChar *s, UChar32 c, int32_t count); * parameters. * The string parameter must be a C string literal. * The length of the string, not including the terminating - * NUL, must be specified as a constant. + * `NUL`, must be specified as a constant. * The U_STRING_DECL macro should be invoked exactly once for one * such string variable before it is used. * * Usage: - *
- *    U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
- *    U_STRING_DECL(ustringVar2, "jumps 5%", 8);
- *    static UBool didInit=FALSE;
- *
- *    int32_t function() {
- *        if(!didInit) {
- *            U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
- *            U_STRING_INIT(ustringVar2, "jumps 5%", 8);
- *            didInit=TRUE;
- *        }
- *        return u_strcmp(ustringVar1, ustringVar2);
- *    }
- * 
- * - * Note that the macros will NOT consistently work if their argument is another #define. - * The following will not work on all platforms, don't use it. - * - *
+ *
+ *     U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
+ *     U_STRING_DECL(ustringVar2, "jumps 5%", 8);
+ *     static UBool didInit=FALSE;
+ *
+ *     int32_t function() {
+ *         if(!didInit) {
+ *             U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
+ *             U_STRING_INIT(ustringVar2, "jumps 5%", 8);
+ *             didInit=TRUE;
+ *         }
+ *         return u_strcmp(ustringVar1, ustringVar2);
+ *     }
+ *
+ * Note that the macros will NOT consistently work if their argument is another #`define`.
+ * The following will not work on all platforms, don't use it.
+ *
  *     #define GLUCK "Mr. Gluck"
  *     U_STRING_DECL(var, GLUCK, 9)
  *     U_STRING_INIT(var, GLUCK, 9)
- * 
* * Instead, use the string literal "Mr. Gluck" as the argument to both macro * calls. diff --git a/deps/icu-small/source/common/unicode/utf16.h b/deps/icu-small/source/common/unicode/utf16.h index b9b9c59d3cb21f..0908b4f00e9bf1 100644 --- a/deps/icu-small/source/common/unicode/utf16.h +++ b/deps/icu-small/source/common/unicode/utf16.h @@ -213,8 +213,6 @@ } \ } -#ifndef U_HIDE_DRAFT_API - /** * Get a code point from a string at a random-access offset, * without changing the offset. @@ -236,7 +234,7 @@ * @param length string length * @param c output UChar32 variable * @see U16_GET_UNSAFE - * @draft ICU 60 + * @stable ICU 60 */ #define U16_GET_OR_FFFD(s, start, i, length, c) { \ (c)=(s)[i]; \ @@ -258,8 +256,6 @@ } \ } -#endif // U_HIDE_DRAFT_API - /* definitions with forward iteration --------------------------------------- */ /** @@ -320,8 +316,6 @@ } \ } -#ifndef U_HIDE_DRAFT_API - /** * Get a code point from a string at a code point boundary offset, * and advance the offset to the next code point boundary. @@ -341,7 +335,7 @@ * @param length string length * @param c output UChar32 variable * @see U16_NEXT_UNSAFE - * @draft ICU 60 + * @stable ICU 60 */ #define U16_NEXT_OR_FFFD(s, i, length, c) { \ (c)=(s)[(i)++]; \ @@ -356,8 +350,6 @@ } \ } -#endif // U_HIDE_DRAFT_API - /** * Append a code point to a string, overwriting 1 or 2 code units. * The offset points to the current end of the string contents @@ -585,8 +577,6 @@ } \ } -#ifndef U_HIDE_DRAFT_API - /** * Move the string offset from one code point boundary to the previous one * and get the code point between them. @@ -605,7 +595,7 @@ * @param i string offset, must be startvalue.pointer; - U_ASSERT(sharedObject->cachePtr = this); + U_ASSERT(sharedObject->cachePtr == this); uhash_removeElement(fHashtable, element); removeSoftRef(sharedObject); // Deletes the sharedObject when softRefCount goes to zero. result = TRUE; diff --git a/deps/icu-small/source/common/uniset.cpp b/deps/icu-small/source/common/uniset.cpp index d828660796f743..7d2e3cd619fc1d 100644 --- a/deps/icu-small/source/common/uniset.cpp +++ b/deps/icu-small/source/common/uniset.cpp @@ -152,6 +152,7 @@ UnicodeSet::UnicodeSet() : UErrorCode status = U_ZERO_ERROR; allocateStrings(status); if (U_FAILURE(status)) { + setToBogus(); // If memory allocation failed, set to bogus state. return; } list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); @@ -179,6 +180,7 @@ UnicodeSet::UnicodeSet(UChar32 start, UChar32 end) : UErrorCode status = U_ZERO_ERROR; allocateStrings(status); if (U_FAILURE(status)) { + setToBogus(); // If memory allocation failed, set to bogus state. return; } list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); @@ -206,6 +208,7 @@ UnicodeSet::UnicodeSet(const UnicodeSet& o) : UErrorCode status = U_ZERO_ERROR; allocateStrings(status); if (U_FAILURE(status)) { + setToBogus(); // If memory allocation failed, set to bogus state. return; } list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); @@ -230,6 +233,7 @@ UnicodeSet::UnicodeSet(const UnicodeSet& o, UBool /* asThawed */) : UErrorCode status = U_ZERO_ERROR; allocateStrings(status); if (U_FAILURE(status)) { + setToBogus(); // If memory allocation failed, set to bogus state. return; } list = (UChar32*) uprv_malloc(sizeof(UChar32) * capacity); @@ -272,6 +276,10 @@ UnicodeSet::~UnicodeSet() { * Assigns this object to be a copy of another. */ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { + return copyFrom(o, FALSE); +} + +UnicodeSet& UnicodeSet::copyFrom(const UnicodeSet& o, UBool asThawed) { if (this == &o) { return *this; } @@ -285,11 +293,12 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { UErrorCode ec = U_ZERO_ERROR; ensureCapacity(o.len, ec); if (U_FAILURE(ec)) { - return *this; // There is no way to report this error :-( + // ensureCapacity will mark the UnicodeSet as Bogus if OOM failure happens. + return *this; } len = o.len; uprv_memcpy(list, o.list, (size_t)len*sizeof(UChar32)); - if (o.bmpSet == NULL) { + if (o.bmpSet == NULL || asThawed) { bmpSet = NULL; } else { bmpSet = new BMPSet(*o.bmpSet, list, len); @@ -304,7 +313,7 @@ UnicodeSet& UnicodeSet::operator=(const UnicodeSet& o) { setToBogus(); return *this; } - if (o.stringSpan == NULL) { + if (o.stringSpan == NULL || asThawed) { stringSpan = NULL; } else { stringSpan = new UnicodeSetStringSpan(*o.stringSpan, *strings); @@ -359,12 +368,12 @@ UBool UnicodeSet::operator==(const UnicodeSet& o) const { * @see Object#hashCode() */ int32_t UnicodeSet::hashCode(void) const { - int32_t result = len; + uint32_t result = static_cast(len); for (int32_t i = 0; i < len; ++i) { - result *= 1000003; + result *= 1000003u; result += list[i]; } - return result; + return static_cast(result); } //---------------------------------------------------------------- @@ -912,7 +921,8 @@ UnicodeSet& UnicodeSet::add(UChar32 c) { UErrorCode status = U_ZERO_ERROR; ensureCapacity(len+1, status); if (U_FAILURE(status)) { - return *this; // There is no way to report this error :-( + // ensureCapacity will mark the object as Bogus if OOM failure happens. + return *this; } list[len++] = UNICODESET_HIGH; } @@ -957,7 +967,8 @@ UnicodeSet& UnicodeSet::add(UChar32 c) { UErrorCode status = U_ZERO_ERROR; ensureCapacity(len+2, status); if (U_FAILURE(status)) { - return *this; // There is no way to report this error :-( + // ensureCapacity will mark the object as Bogus if OOM failure happens. + return *this; } //for (int32_t k=len-1; k>=i; --k) { @@ -1654,12 +1665,13 @@ UBool UnicodeSet::allocateStrings(UErrorCode &status) { } void UnicodeSet::ensureCapacity(int32_t newLen, UErrorCode& ec) { - if (newLen <= capacity) + if (newLen <= capacity) { return; + } UChar32* temp = (UChar32*) uprv_realloc(list, sizeof(UChar32) * (newLen + GROW_EXTRA)); if (temp == NULL) { ec = U_MEMORY_ALLOCATION_ERROR; - setToBogus(); + setToBogus(); // set the object to bogus state if an OOM failure occurred. return; } list = temp; diff --git a/deps/icu-small/source/common/uniset_props.cpp b/deps/icu-small/source/common/uniset_props.cpp index ef5d6a32b2d10b..1312de209802b5 100644 --- a/deps/icu-small/source/common/uniset_props.cpp +++ b/deps/icu-small/source/common/uniset_props.cpp @@ -36,8 +36,6 @@ #include "uprops.h" #include "propname.h" #include "normalizer2impl.h" -#include "ucase.h" -#include "ubidi_props.h" #include "uinvchar.h" #include "uprops.h" #include "charstr.h" @@ -98,47 +96,13 @@ static const char ASSIGNED[] = "Assigned"; // [:^Cn:] U_CDECL_BEGIN static UBool U_CALLCONV uset_cleanup(); -struct Inclusion { - UnicodeSet *fSet; - UInitOnce fInitOnce; -}; -static Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions() - static UnicodeSet *uni32Singleton; static icu::UInitOnce uni32InitOnce = U_INITONCE_INITIALIZER; -//---------------------------------------------------------------- -// Inclusions list -//---------------------------------------------------------------- - -// USetAdder implementation -// Does not use uset.h to reduce code dependencies -static void U_CALLCONV -_set_add(USet *set, UChar32 c) { - ((UnicodeSet *)set)->add(c); -} - -static void U_CALLCONV -_set_addRange(USet *set, UChar32 start, UChar32 end) { - ((UnicodeSet *)set)->add(start, end); -} - -static void U_CALLCONV -_set_addString(USet *set, const UChar *str, int32_t length) { - ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length)); -} - /** * Cleanup function for UnicodeSet */ static UBool U_CALLCONV uset_cleanup(void) { - for(int32_t i = UPROPS_SRC_NONE; i < UPROPS_SRC_COUNT; ++i) { - Inclusion &in = gInclusions[i]; - delete in.fSet; - in.fSet = NULL; - in.fInitOnce.reset(); - } - delete uni32Singleton; uni32Singleton = NULL; uni32InitOnce.reset(); @@ -149,114 +113,6 @@ U_CDECL_END U_NAMESPACE_BEGIN -/* -Reduce excessive reallocation, and make it easier to detect initialization problems. -Usually you don't see smaller sets than this for Unicode 5.0. -*/ -#define DEFAULT_INCLUSION_CAPACITY 3072 - -void U_CALLCONV UnicodeSet_initInclusion(int32_t src, UErrorCode &status) { - // This function is invoked only via umtx_initOnce(). - // This function is a friend of class UnicodeSet. - - U_ASSERT(src >=0 && srcensureCapacity(DEFAULT_INCLUSION_CAPACITY, status); - switch(src) { - case UPROPS_SRC_CHAR: - uchar_addPropertyStarts(&sa, &status); - break; - case UPROPS_SRC_PROPSVEC: - upropsvec_addPropertyStarts(&sa, &status); - break; - case UPROPS_SRC_CHAR_AND_PROPSVEC: - uchar_addPropertyStarts(&sa, &status); - upropsvec_addPropertyStarts(&sa, &status); - break; -#if !UCONFIG_NO_NORMALIZATION - case UPROPS_SRC_CASE_AND_NORM: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); - if(U_SUCCESS(status)) { - impl->addPropertyStarts(&sa, status); - } - ucase_addPropertyStarts(&sa, &status); - break; - } - case UPROPS_SRC_NFC: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); - if(U_SUCCESS(status)) { - impl->addPropertyStarts(&sa, status); - } - break; - } - case UPROPS_SRC_NFKC: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(status); - if(U_SUCCESS(status)) { - impl->addPropertyStarts(&sa, status); - } - break; - } - case UPROPS_SRC_NFKC_CF: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(status); - if(U_SUCCESS(status)) { - impl->addPropertyStarts(&sa, status); - } - break; - } - case UPROPS_SRC_NFC_CANON_ITER: { - const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(status); - if(U_SUCCESS(status)) { - impl->addCanonIterPropertyStarts(&sa, status); - } - break; - } -#endif - case UPROPS_SRC_CASE: - ucase_addPropertyStarts(&sa, &status); - break; - case UPROPS_SRC_BIDI: - ubidi_addPropertyStarts(&sa, &status); - break; - default: - status = U_INTERNAL_PROGRAM_ERROR; - break; - } - - if (U_FAILURE(status)) { - delete incl; - incl = NULL; - return; - } - // Compact for caching - incl->compact(); - ucln_common_registerCleanup(UCLN_COMMON_USET, uset_cleanup); -} - - - -const UnicodeSet* UnicodeSet::getInclusions(int32_t src, UErrorCode &status) { - U_ASSERT(src >=0 && src 0 && uprv_memcmp(&v, version, sizeof(v)) <= 0; } -typedef struct { - UProperty prop; - int32_t value; -} IntPropertyContext; - -static UBool intPropertyFilter(UChar32 ch, void* context) { - IntPropertyContext* c = (IntPropertyContext*)context; - return u_getIntPropertyValue((UChar32) ch, c->prop) == c->value; -} - static UBool scriptExtensionsFilter(UChar32 ch, void* context) { return uscript_hasScript(ch, *(UScriptCode*)context); } @@ -891,7 +732,7 @@ static UBool scriptExtensionsFilter(UChar32 ch, void* context) { */ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, void* context, - int32_t src, + const UnicodeSet* inclusions, UErrorCode &status) { if (U_FAILURE(status)) return; @@ -902,12 +743,8 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, // To improve performance, use an inclusions set which // encodes information about character ranges that are known // to have identical properties. - // getInclusions(src) contains exactly the first characters of - // same-value ranges for the given properties "source". - const UnicodeSet* inclusions = getInclusions(src, status); - if (U_FAILURE(status)) { - return; - } + // inclusions contains the first characters of + // same-value ranges for the given property. clear(); @@ -944,6 +781,43 @@ void UnicodeSet::applyFilter(UnicodeSet::Filter filter, namespace { +/** Maps map values to 1 if the mask contains their value'th bit, all others to 0. */ +uint32_t U_CALLCONV generalCategoryMaskFilter(const void *context, uint32_t value) { + uint32_t mask = *(const uint32_t *)context; + value = U_MASK(value) & mask; + if (value != 0) { value = 1; } + return value; +} + +/** Maps one map value to 1, all others to 0. */ +uint32_t U_CALLCONV intValueFilter(const void *context, uint32_t value) { + uint32_t v = *(const uint32_t *)context; + return value == v ? 1 : 0; +} + +} // namespace + +void UnicodeSet::applyIntPropertyValue(const UCPMap *map, + UCPMapValueFilter *filter, const void *context, + UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { return; } + clear(); + UChar32 start = 0, end; + uint32_t value; + while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0, + filter, context, &value)) >= 0) { + if (value != 0) { + add(start, end); + } + start = end + 1; + } + if (isBogus()) { + errorCode = U_MEMORY_ALLOCATION_ERROR; + } +} + +namespace { + static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { /* Note: we use ' ' in compiler code page */ int32_t j = 0; @@ -971,16 +845,35 @@ static UBool mungeCharName(char* dst, const char* src, int32_t dstCapacity) { UnicodeSet& UnicodeSet::applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode& ec) { - if (U_FAILURE(ec) || isFrozen()) return *this; - + if (U_FAILURE(ec)) { return *this; } + // All of the following check isFrozen() before modifying this set. if (prop == UCHAR_GENERAL_CATEGORY_MASK) { - applyFilter(generalCategoryMaskFilter, &value, UPROPS_SRC_CHAR, ec); + const UCPMap *map = u_getIntPropertyMap(UCHAR_GENERAL_CATEGORY, &ec); + applyIntPropertyValue(map, generalCategoryMaskFilter, &value, ec); } else if (prop == UCHAR_SCRIPT_EXTENSIONS) { + const UnicodeSet* inclusions = CharacterProperties::getInclusionsForProperty(prop, ec); UScriptCode script = (UScriptCode)value; - applyFilter(scriptExtensionsFilter, &script, UPROPS_SRC_PROPSVEC, ec); + applyFilter(scriptExtensionsFilter, &script, inclusions, ec); + } else if (0 <= prop && prop < UCHAR_BINARY_LIMIT) { + if (value == 0 || value == 1) { + const USet *set = u_getBinaryPropertySet(prop, &ec); + if (U_FAILURE(ec)) { return *this; } + copyFrom(*UnicodeSet::fromUSet(set), TRUE); + if (value == 0) { + complement(); + } + } else { + clear(); + } + } else if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) { + const UCPMap *map = u_getIntPropertyMap(prop, &ec); + applyIntPropertyValue(map, intValueFilter, &value, ec); } else { - IntPropertyContext c = {prop, value}; - applyFilter(intPropertyFilter, &c, uprops_getSource(prop), ec); + // This code used to always call getInclusions(property source) + // which sets an error for an unsupported property. + ec = U_ILLEGAL_ARGUMENT_ERROR; + // Otherwise we would just clear() this set because + // getIntPropertyValue(c, prop) returns 0 for all code points. } return *this; } @@ -1030,13 +923,13 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop, p == UCHAR_TRAIL_CANONICAL_COMBINING_CLASS || p == UCHAR_LEAD_CANONICAL_COMBINING_CLASS) { char* end; - double value = uprv_strtod(vname.data(), &end); + double val = uprv_strtod(vname.data(), &end); // Anything between 0 and 255 is valid even if unused. // Cast double->int only after range check. // We catch NaN here because comparing it with both 0 and 255 will be false // (as are all comparisons with NaN). - if (*end != 0 || !(0 <= value && value <= 255) || - (v = (int32_t)value) != value) { + if (*end != 0 || !(0 <= val && val <= 255) || + (v = (int32_t)val) != val) { // non-integral value or outside 0..255, or trailing junk FAIL(ec); } @@ -1052,11 +945,12 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop, case UCHAR_NUMERIC_VALUE: { char* end; - double value = uprv_strtod(vname.data(), &end); + double val = uprv_strtod(vname.data(), &end); if (*end != 0) { FAIL(ec); } - applyFilter(numericValueFilter, &value, UPROPS_SRC_CHAR, ec); + applyFilter(numericValueFilter, &val, + CharacterProperties::getInclusionsForProperty(p, ec), ec); return *this; } case UCHAR_NAME: @@ -1085,7 +979,8 @@ UnicodeSet::applyPropertyAlias(const UnicodeString& prop, if (!mungeCharName(buf, vname.data(), sizeof(buf))) FAIL(ec); UVersionInfo version; u_versionFromString(version, buf); - applyFilter(versionFilter, &version, UPROPS_SRC_PROPSVEC, ec); + applyFilter(versionFilter, &version, + CharacterProperties::getInclusionsForProperty(p, ec), ec); return *this; } case UCHAR_SCRIPT_EXTENSIONS: diff --git a/deps/icu-small/source/common/unistr.cpp b/deps/icu-small/source/common/unistr.cpp index 48ad929e85a729..ff85734d615efa 100644 --- a/deps/icu-small/source/common/unistr.cpp +++ b/deps/icu-small/source/common/unistr.cpp @@ -1447,10 +1447,15 @@ UnicodeString::doReplace(int32_t start, } if(srcChars == 0) { - srcStart = srcLength = 0; - } else if(srcLength < 0) { - // get the srcLength if necessary - srcLength = u_strlen(srcChars + srcStart); + srcLength = 0; + } else { + // Perform all remaining operations relative to srcChars + srcStart. + // From this point forward, do not use srcStart. + srcChars += srcStart; + if (srcLength < 0) { + // get the srcLength if necessary + srcLength = u_strlen(srcChars); + } } // pin the indices to legal values @@ -1465,17 +1470,28 @@ UnicodeString::doReplace(int32_t start, } newLength += srcLength; + // Check for insertion into ourself + const UChar *oldArray = getArrayStart(); + if (isBufferWritable() && + oldArray < srcChars + srcLength && + srcChars < oldArray + oldLength) { + // Copy into a new UnicodeString and start over + UnicodeString copy(srcChars, srcLength); + if (copy.isBogus()) { + setToBogus(); + return *this; + } + return doReplace(start, length, copy.getArrayStart(), 0, srcLength); + } + // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents; // therefore we need to keep the current fArray UChar oldStackBuffer[US_STACKBUF_SIZE]; - UChar *oldArray; if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) { // copy the stack buffer contents because it will be overwritten with // fUnion.fFields values - u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength); + u_memcpy(oldStackBuffer, oldArray, oldLength); oldArray = oldStackBuffer; - } else { - oldArray = getArrayStart(); } // clone our array and allocate a bigger array if needed @@ -1503,7 +1519,7 @@ UnicodeString::doReplace(int32_t start, } // now fill in the hole with the new string - us_arrayCopy(srcChars, srcStart, newArray, start, srcLength); + us_arrayCopy(srcChars, 0, newArray, start, srcLength); setLength(newLength); @@ -1536,15 +1552,34 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng return *this; } + // Perform all remaining operations relative to srcChars + srcStart. + // From this point forward, do not use srcStart. + srcChars += srcStart; + if(srcLength < 0) { // get the srcLength if necessary - if((srcLength = u_strlen(srcChars + srcStart)) == 0) { + if((srcLength = u_strlen(srcChars)) == 0) { return *this; } } int32_t oldLength = length(); int32_t newLength = oldLength + srcLength; + + // Check for append onto ourself + const UChar* oldArray = getArrayStart(); + if (isBufferWritable() && + oldArray < srcChars + srcLength && + srcChars < oldArray + oldLength) { + // Copy into a new UnicodeString and start over + UnicodeString copy(srcChars, srcLength); + if (copy.isBogus()) { + setToBogus(); + return *this; + } + return doAppend(copy.getArrayStart(), 0, srcLength); + } + // optimize append() onto a large-enough, owned string if((newLength <= getCapacity() && isBufferWritable()) || cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) { @@ -1556,8 +1591,8 @@ UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLeng // or // str.appendString(buffer, length) // or similar. - if(srcChars + srcStart != newArray + oldLength) { - us_arrayCopy(srcChars, srcStart, newArray, oldLength, srcLength); + if(srcChars != newArray + oldLength) { + us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength); } setLength(newLength); } diff --git a/deps/icu-small/source/common/uprops.cpp b/deps/icu-small/source/common/uprops.cpp index 21723b32aa7c8d..2421c15d2bd0b6 100644 --- a/deps/icu-small/source/common/uprops.cpp +++ b/deps/icu-small/source/common/uprops.cpp @@ -25,6 +25,7 @@ #include "unicode/utypes.h" #include "unicode/uchar.h" +#include "unicode/ucptrie.h" #include "unicode/unorm2.h" #include "unicode/uscript.h" #include "unicode/ustring.h" @@ -36,6 +37,10 @@ #include "ucase.h" #include "ustr_imp.h" +// ulayout_props_data.h is machine-generated by genprops +#define INCLUDED_FROM_UPROPS_CPP +#include "ulayout_props_data.h" + U_NAMESPACE_USE /* general properties API functions ----------------------------------------- */ @@ -56,7 +61,7 @@ static UBool defaultContains(const BinaryProperty &prop, UChar32 c, UProperty /* } static UBool caseBinaryPropertyContains(const BinaryProperty &/*prop*/, UChar32 c, UProperty which) { - return ucase_hasBinaryProperty(c, which); + return static_cast(ucase_hasBinaryProperty(c, which)); } static UBool isBidiControl(const BinaryProperty &/*prop*/, UChar32 c, UProperty /*which*/) { @@ -428,6 +433,18 @@ static int32_t getTrailCombiningClass(const IntProperty &/*prop*/, UChar32 c, UP } #endif +static int32_t getInPC(const IntProperty &, UChar32 c, UProperty) { + return ucptrie_get(&inpc_trie, c); +} + +static int32_t getInSC(const IntProperty &, UChar32 c, UProperty) { + return ucptrie_get(&insc_trie, c); +} + +static int32_t getVo(const IntProperty &, UChar32 c, UProperty) { + return ucptrie_get(&vo_trie, c); +} + static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ /* * column, mask and shift values for int-value properties from u_getUnicodeProperties(). @@ -463,6 +480,9 @@ static const IntProperty intProps[UCHAR_INT_LIMIT-UCHAR_INT_START]={ { 2, UPROPS_SB_MASK, UPROPS_SB_SHIFT, defaultGetValue, defaultGetMaxValue }, { 2, UPROPS_WB_MASK, UPROPS_WB_SHIFT, defaultGetValue, defaultGetMaxValue }, { UPROPS_SRC_BIDI, 0, 0, getBiDiPairedBracketType, biDiGetMaxValue }, + { UPROPS_SRC_INPC, 0, maxInPCValue, getInPC, getMaxValueFromShift }, + { UPROPS_SRC_INSC, 0, maxInSCValue, getInSC, getMaxValueFromShift }, + { UPROPS_SRC_VO, 0, maxVoValue, getVo, getMaxValueFromShift }, }; U_CAPI int32_t U_EXPORT2 @@ -564,6 +584,34 @@ uprops_getSource(UProperty which) { } } +U_CFUNC void U_EXPORT2 +uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode) { + if (U_FAILURE(*pErrorCode)) { return; } + const UCPTrie *trie; + switch (src) { + case UPROPS_SRC_INPC: + trie = &inpc_trie; + break; + case UPROPS_SRC_INSC: + trie = &insc_trie; + break; + case UPROPS_SRC_VO: + trie = &vo_trie; + break; + default: + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + // Add the start code point of each same-value range of the trie. + UChar32 start = 0, end; + while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, + nullptr, nullptr, nullptr)) >= 0) { + sa->add(sa->set, start); + start = end + 1; + } +} + #if !UCONFIG_NO_NORMALIZATION U_CAPI int32_t U_EXPORT2 diff --git a/deps/icu-small/source/common/uprops.h b/deps/icu-small/source/common/uprops.h index 2078384c3e47dd..1a8e4e84f7445d 100644 --- a/deps/icu-small/source/common/uprops.h +++ b/deps/icu-small/source/common/uprops.h @@ -397,6 +397,10 @@ enum UPropertySource { UPROPS_SRC_NFKC_CF, /** From normalizer2impl.cpp/nfc.nrm canonical iterator data */ UPROPS_SRC_NFC_CANON_ITER, + // Text layout properties. + UPROPS_SRC_INPC, + UPROPS_SRC_INSC, + UPROPS_SRC_VO, /** One more than the highest UPropertySource (UPROPS_SRC_) constant. */ UPROPS_SRC_COUNT }; @@ -425,6 +429,9 @@ uchar_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); U_CFUNC void U_EXPORT2 upropsvec_addPropertyStarts(const USetAdder *sa, UErrorCode *pErrorCode); +U_CFUNC void U_EXPORT2 +uprops_addPropertyStarts(UPropertySource src, const USetAdder *sa, UErrorCode *pErrorCode); + /** * Return a set of characters for property enumeration. * For each two consecutive characters (start, limit) in the set, @@ -452,6 +459,13 @@ U_NAMESPACE_BEGIN class UnicodeSet; +class CharacterProperties { +public: + CharacterProperties() = delete; + static void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode); + static const UnicodeSet *getInclusionsForProperty(UProperty prop, UErrorCode &errorCode); +}; + // implemented in uniset_props.cpp U_CFUNC UnicodeSet * uniset_getUnicode32Instance(UErrorCode &errorCode); diff --git a/deps/icu-small/source/common/uresbund.cpp b/deps/icu-small/source/common/uresbund.cpp index c88d9014ec23b7..3da73421c0cfb7 100644 --- a/deps/icu-small/source/common/uresbund.cpp +++ b/deps/icu-small/source/common/uresbund.cpp @@ -368,6 +368,11 @@ static UResourceDataEntry *init_entry(const char *localeID, const char *path, UE res_load(&(r->fData), r->fPath, r->fName, status); if (U_FAILURE(*status)) { + /* if we failed to load due to an out-of-memory error, exit early. */ + if (*status == U_MEMORY_ALLOCATION_ERROR) { + uprv_free(r); + return NULL; + } /* we have no such entry in dll, so it will always use fallback */ *status = U_USING_FALLBACK_WARNING; r->fBogus = U_USING_FALLBACK_WARNING; @@ -537,6 +542,11 @@ loadParentsExceptRoot(UResourceDataEntry *&t1, UErrorCode usrStatus = U_ZERO_ERROR; if (usingUSRData) { // This code inserts user override data into the inheritance chain. u2 = init_entry(name, usrDataPath, &usrStatus); + // If we failed due to out-of-memory, report that to the caller and exit early. + if (usrStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = usrStatus; + return FALSE; + } } if (usingUSRData && U_SUCCESS(usrStatus) && u2->fBogus == U_ZERO_ERROR) { @@ -642,21 +652,32 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID, /* We're going to skip all the locales that do not have any data */ r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finishUnlock; + } + if(r != NULL) { /* if there is one real locale, we can look for parents. */ t1 = r; hasRealData = TRUE; if ( usingUSRData ) { /* This code inserts user override data into the inheritance chain */ UErrorCode usrStatus = U_ZERO_ERROR; UResourceDataEntry *u1 = init_entry(t1->fName, usrDataPath, &usrStatus); - if ( u1 != NULL ) { - if(u1->fBogus == U_ZERO_ERROR) { - u1->fParent = t1; - r = u1; - } else { - /* the USR override data wasn't found, set it to be deleted */ - u1->fCountExisting = 0; - } - } + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finishUnlock; + } + if ( u1 != NULL ) { + if(u1->fBogus == U_ZERO_ERROR) { + u1->fParent = t1; + r = u1; + } else { + /* the USR override data wasn't found, set it to be deleted */ + u1->fCountExisting = 0; + } + } } if (hasChopped && !isRoot) { if (!loadParentsExceptRoot(t1, name, UPRV_LENGTHOF(name), usingUSRData, usrDataPath, status)) { @@ -671,6 +692,11 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID, /* insert default locale */ uprv_strcpy(name, uloc_getDefault()); r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finishUnlock; + } intStatus = U_USING_DEFAULT_WARNING; if(r != NULL) { /* the default locale exists */ t1 = r; @@ -690,6 +716,11 @@ static UResourceDataEntry *entryOpen(const char* path, const char* localeID, if(r == NULL) { uprv_strcpy(name, kRootLocaleName); r = findFirstExisting(path, name, &isRoot, &hasChopped, &isDefault, &intStatus); + // If we failed due to out-of-memory, report the failure and exit early. + if (intStatus == U_MEMORY_ALLOCATION_ERROR) { + *status = intStatus; + goto finishUnlock; + } if(r != NULL) { t1 = r; intStatus = U_USING_DEFAULT_WARNING; @@ -2421,7 +2452,7 @@ ures_loc_nextLocale(UEnumeration* en, UResourceBundle *k = NULL; const char *result = NULL; int32_t len = 0; - if(ures_hasNext(res) && (k = ures_getNextResource(res, &ctx->curr, status))) { + if(ures_hasNext(res) && (k = ures_getNextResource(res, &ctx->curr, status)) != 0) { result = ures_getKey(k); len = (int32_t)uprv_strlen(result); } @@ -2843,7 +2874,7 @@ ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status) valuesBuf[0]=0; valuesBuf[1]=0; - while((locale = uenum_next(locs, &locLen, status))) { + while((locale = uenum_next(locs, &locLen, status)) != 0) { UResourceBundle *bund = NULL; UResourceBundle *subPtr = NULL; UErrorCode subStatus = U_ZERO_ERROR; /* don't fail if a bundle is unopenable */ @@ -2868,7 +2899,7 @@ ures_getKeywordValues(const char *path, const char *keyword, UErrorCode *status) continue; } - while((subPtr = ures_getNextResource(&item,&subItem,&subStatus)) + while((subPtr = ures_getNextResource(&item,&subItem,&subStatus)) != 0 && U_SUCCESS(subStatus)) { const char *k; int32_t i; diff --git a/deps/icu-small/source/common/uresdata.h b/deps/icu-small/source/common/uresdata.h index 8d845e3dfcfc83..4e28ddccf63199 100644 --- a/deps/icu-small/source/common/uresdata.h +++ b/deps/icu-small/source/common/uresdata.h @@ -475,7 +475,7 @@ U_NAMESPACE_BEGIN class ResourceDataValue : public ResourceValue { public: - ResourceDataValue() : pResData(NULL), res(URES_NONE) {} + ResourceDataValue() : pResData(NULL), res(static_cast(URES_NONE)) {} virtual ~ResourceDataValue(); void setData(const ResourceData *data) { pResData = data; } diff --git a/deps/icu-small/source/common/ushape.cpp b/deps/icu-small/source/common/ushape.cpp index c3f3ef9e2078f0..792de50bbcc736 100644 --- a/deps/icu-small/source/common/ushape.cpp +++ b/deps/icu-small/source/common/ushape.cpp @@ -1323,7 +1323,7 @@ shapeUnicode(UChar *dest, int32_t sourceLength, /* to ensure the array index is within the range */ U_ASSERT(dest[i] >= 0x064Bu && dest[i]-0x064Bu < UPRV_LENGTHOF(IrrelevantPos)); - dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape; + dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + static_cast(Shape); } }else if ((currLink & APRESENT) > 0) { dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape); diff --git a/deps/icu-small/source/common/usprep.cpp b/deps/icu-small/source/common/usprep.cpp index 54a77172fe1b09..1e54e6cab59eb2 100644 --- a/deps/icu-small/source/common/usprep.cpp +++ b/deps/icu-small/source/common/usprep.cpp @@ -112,7 +112,9 @@ hashEntry(const UHashTok parm) { UHashTok namekey, pathkey; namekey.pointer = b->name; pathkey.pointer = b->path; - return uhash_hashChars(namekey)+37*uhash_hashChars(pathkey); + uint32_t unsignedHash = static_cast(uhash_hashChars(namekey)) + + 37u * static_cast(uhash_hashChars(pathkey)); + return static_cast(unsignedHash); } /* compares two entries */ @@ -351,9 +353,9 @@ usprep_getProfile(const char* path, LocalMemory keyName; LocalMemory keyPath; if( key.allocateInsteadAndReset() == NULL || - keyName.allocateInsteadAndCopy(uprv_strlen(name)+1) == NULL || + keyName.allocateInsteadAndCopy(static_cast(uprv_strlen(name)+1)) == NULL || (path != NULL && - keyPath.allocateInsteadAndCopy(uprv_strlen(path)+1) == NULL) + keyPath.allocateInsteadAndCopy(static_cast(uprv_strlen(path)+1)) == NULL) ) { *status = U_MEMORY_ALLOCATION_ERROR; usprep_unload(newProfile.getAlias()); @@ -726,7 +728,7 @@ usprep_prepare( const UStringPrepProfile* profile, ((result < _SPREP_TYPE_THRESHOLD) && (result & 0x01) /* first bit says it the code point is prohibited*/) ){ *status = U_STRINGPREP_PROHIBITED_ERROR; - uprv_syntaxError(b1, b2Index-U16_LENGTH(ch), b2Len, parseError); + uprv_syntaxError(b2, b2Index-U16_LENGTH(ch), b2Len, parseError); return 0; } diff --git a/deps/icu-small/source/common/ustrcase.cpp b/deps/icu-small/source/common/ustrcase.cpp index 978bd3b7b8618e..618e847c65d5a6 100644 --- a/deps/icu-small/source/common/ustrcase.cpp +++ b/deps/icu-small/source/common/ustrcase.cpp @@ -218,7 +218,7 @@ int32_t toLower(int32_t caseLocale, uint32_t options, int32_t srcIndex = srcStart; for (;;) { // fast path for simple cases - UChar lead; + UChar lead = 0; while (srcIndex < srcLimit) { lead = src[srcIndex]; int32_t delta; @@ -238,7 +238,7 @@ int32_t toLower(int32_t caseLocale, uint32_t options, continue; } } - lead += delta; + lead += static_cast(delta); destIndex = appendUnchanged(dest, destIndex, destCapacity, src + prev, srcIndex - 1 - prev, options, edits); if (destIndex >= 0) { @@ -313,7 +313,7 @@ int32_t toUpper(int32_t caseLocale, uint32_t options, int32_t srcIndex = 0; for (;;) { // fast path for simple cases - UChar lead; + UChar lead = 0; while (srcIndex < srcLength) { lead = src[srcIndex]; int32_t delta; @@ -333,7 +333,7 @@ int32_t toUpper(int32_t caseLocale, uint32_t options, continue; } } - lead += delta; + lead += static_cast(delta); destIndex = appendUnchanged(dest, destIndex, destCapacity, src + prev, srcIndex - 1 - prev, options, edits); if (destIndex >= 0) { @@ -1747,8 +1747,8 @@ static int32_t _cmpFold( } if(matchLen1) { - *matchLen1=m1-org1; - *matchLen2=m2-org2; + *matchLen1=static_cast(m1-org1); + *matchLen2=static_cast(m2-org2); } return cmpRes; } diff --git a/deps/icu-small/source/common/utext.cpp b/deps/icu-small/source/common/utext.cpp index 6f3806f27db3f0..5e3a005626e6b8 100644 --- a/deps/icu-small/source/common/utext.cpp +++ b/deps/icu-small/source/common/utext.cpp @@ -1196,9 +1196,9 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) { // Swap the UText buffers. // We want to fill what was previously the alternate buffer, // and make what was the current buffer be the new alternate. - UTF8Buf *u8b = (UTF8Buf *)ut->q; + UTF8Buf *u8b_swap = (UTF8Buf *)ut->q; ut->q = ut->p; - ut->p = u8b; + ut->p = u8b_swap; int32_t strLen = ut->b; UBool nulTerminated = FALSE; @@ -1207,9 +1207,9 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) { nulTerminated = TRUE; } - UChar *buf = u8b->buf; - uint8_t *mapToNative = u8b->mapToNative; - uint8_t *mapToUChars = u8b->mapToUChars; + UChar *buf = u8b_swap->buf; + uint8_t *mapToNative = u8b_swap->mapToNative; + uint8_t *mapToUChars = u8b_swap->mapToUChars; int32_t destIx = 0; int32_t srcIx = ix; UBool seenNonAscii = FALSE; @@ -1230,7 +1230,7 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) { // General case, handle everything. if (seenNonAscii == FALSE) { seenNonAscii = TRUE; - u8b->bufNILimit = destIx; + u8b_swap->bufNILimit = destIx; } int32_t cIx = srcIx; @@ -1263,22 +1263,22 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) { mapToUChars[srcIx - ix] = (uint8_t)destIx; // fill in Buffer descriptor - u8b->bufNativeStart = ix; - u8b->bufNativeLimit = srcIx; - u8b->bufStartIdx = 0; - u8b->bufLimitIdx = destIx; + u8b_swap->bufNativeStart = ix; + u8b_swap->bufNativeLimit = srcIx; + u8b_swap->bufStartIdx = 0; + u8b_swap->bufLimitIdx = destIx; if (seenNonAscii == FALSE) { - u8b->bufNILimit = destIx; + u8b_swap->bufNILimit = destIx; } - u8b->toUCharsMapStart = u8b->bufNativeStart; + u8b_swap->toUCharsMapStart = u8b_swap->bufNativeStart; // Set UText chunk to refer to this buffer. ut->chunkContents = buf; ut->chunkOffset = 0; - ut->chunkLength = u8b->bufLimitIdx; - ut->chunkNativeStart = u8b->bufNativeStart; - ut->chunkNativeLimit = u8b->bufNativeLimit; - ut->nativeIndexingLimit = u8b->bufNILimit; + ut->chunkLength = u8b_swap->bufLimitIdx; + ut->chunkNativeStart = u8b_swap->bufNativeStart; + ut->chunkNativeLimit = u8b_swap->bufNativeLimit; + ut->nativeIndexingLimit = u8b_swap->bufNILimit; // For zero terminated strings, keep track of the maximum point // scanned so far. @@ -1311,13 +1311,13 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) { // Swap the UText buffers. // We want to fill what was previously the alternate buffer, // and make what was the current buffer be the new alternate. - UTF8Buf *u8b = (UTF8Buf *)ut->q; + UTF8Buf *u8b_swap = (UTF8Buf *)ut->q; ut->q = ut->p; - ut->p = u8b; + ut->p = u8b_swap; - UChar *buf = u8b->buf; - uint8_t *mapToNative = u8b->mapToNative; - uint8_t *mapToUChars = u8b->mapToUChars; + UChar *buf = u8b_swap->buf; + uint8_t *mapToNative = u8b_swap->mapToNative; + uint8_t *mapToUChars = u8b_swap->mapToUChars; int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1; // Note that toUCharsMapStart can be negative. Happens when the remaining // text from current position to the beginning is less than the buffer size. @@ -1387,19 +1387,19 @@ utf8TextAccess(UText *ut, int64_t index, UBool forward) { bufNILimit = destIx; } } - u8b->bufNativeStart = srcIx; - u8b->bufNativeLimit = ix; - u8b->bufStartIdx = destIx; - u8b->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2; - u8b->bufNILimit = bufNILimit - u8b->bufStartIdx; - u8b->toUCharsMapStart = toUCharsMapStart; - - ut->chunkContents = &buf[u8b->bufStartIdx]; - ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx; + u8b_swap->bufNativeStart = srcIx; + u8b_swap->bufNativeLimit = ix; + u8b_swap->bufStartIdx = destIx; + u8b_swap->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2; + u8b_swap->bufNILimit = bufNILimit - u8b_swap->bufStartIdx; + u8b_swap->toUCharsMapStart = toUCharsMapStart; + + ut->chunkContents = &buf[u8b_swap->bufStartIdx]; + ut->chunkLength = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx; ut->chunkOffset = ut->chunkLength; - ut->chunkNativeStart = u8b->bufNativeStart; - ut->chunkNativeLimit = u8b->bufNativeLimit; - ut->nativeIndexingLimit = u8b->bufNILimit; + ut->chunkNativeStart = u8b_swap->bufNativeStart; + ut->chunkNativeLimit = u8b_swap->bufNativeLimit; + ut->nativeIndexingLimit = u8b_swap->bufNILimit; return TRUE; } diff --git a/deps/icu-small/source/common/utrie.h b/deps/icu-small/source/common/utrie.h index 641027a1a3f448..3e2197eda6c261 100644 --- a/deps/icu-small/source/common/utrie.h +++ b/deps/icu-small/source/common/utrie.h @@ -21,7 +21,6 @@ #include "unicode/utypes.h" #include "unicode/utf16.h" -#include "udataswp.h" U_CDECL_BEGIN @@ -732,17 +731,13 @@ utrie_serialize(UNewTrie *trie, void *data, int32_t capacity, UBool reduceTo16Bits, UErrorCode *pErrorCode); -/** - * Swap a serialized UTrie. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - /* serialization ------------------------------------------------------------ */ +// UTrie signature values, in platform endianness and opposite endianness. +// The UTrie signature ASCII byte values spell "Trie". +#define UTRIE_SIG 0x54726965 +#define UTRIE_OE_SIG 0x65697254 + /** * Trie data structure in serialized form: * diff --git a/deps/icu-small/source/common/utrie2.cpp b/deps/icu-small/source/common/utrie2.cpp index 8f9183bafad71f..24ef5782c90565 100644 --- a/deps/icu-small/source/common/utrie2.cpp +++ b/deps/icu-small/source/common/utrie2.cpp @@ -24,11 +24,10 @@ * This file contains only the runtime and enumeration code, for read-only access. * See utrie2_builder.c for the builder code. */ -#ifdef UTRIE2_DEBUG -# include -#endif - #include "unicode/utypes.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif #include "unicode/utf.h" #include "unicode/utf8.h" #include "unicode/utf16.h" @@ -202,6 +201,9 @@ utrie2_openFromSerialized(UTrie2ValueBits valueBits, trie->memory=(uint32_t *)data; trie->length=actualLength; trie->isMemoryOwned=FALSE; +#ifdef UTRIE2_DEBUG + trie->name="fromSerialized"; +#endif /* set the pointers to its index and data arrays */ p16=(const uint16_t *)(header+1); @@ -294,6 +296,9 @@ utrie2_openDummy(UTrie2ValueBits valueBits, trie->errorValue=errorValue; trie->highStart=0; trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET; +#ifdef UTRIE2_DEBUG + trie->name="dummy"; +#endif /* set the header fields */ header=(UTrie2Header *)trie->memory; @@ -373,34 +378,15 @@ utrie2_close(UTrie2 *trie) { } if(trie->newTrie!=NULL) { uprv_free(trie->newTrie->data); +#ifdef UCPTRIE_DEBUG + umutablecptrie_close(trie->newTrie->t3); +#endif uprv_free(trie->newTrie); } uprv_free(trie); } } -U_CAPI int32_t U_EXPORT2 -utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) { - uint32_t signature; - if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) { - return 0; - } - signature=*(const uint32_t *)data; - if(signature==UTRIE2_SIG) { - return 2; - } - if(anyEndianOk && signature==UTRIE2_OE_SIG) { - return 2; - } - if(signature==UTRIE_SIG) { - return 1; - } - if(anyEndianOk && signature==UTRIE_OE_SIG) { - return 1; - } - return 0; -} - U_CAPI UBool U_EXPORT2 utrie2_isFrozen(const UTrie2 *trie) { return (UBool)(trie->newTrie==NULL); @@ -430,96 +416,6 @@ utrie2_serialize(const UTrie2 *trie, return trie->length; } -U_CAPI int32_t U_EXPORT2 -utrie2_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - const UTrie2Header *inTrie; - UTrie2Header trie; - int32_t dataLength, size; - UTrie2ValueBits valueBits; - - if(U_FAILURE(*pErrorCode)) { - return 0; - } - if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* setup and swapping */ - if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { - *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - inTrie=(const UTrie2Header *)inData; - trie.signature=ds->readUInt32(inTrie->signature); - trie.options=ds->readUInt16(inTrie->options); - trie.indexLength=ds->readUInt16(inTrie->indexLength); - trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); - - valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); - dataLength=(int32_t)trie.shiftedDataLength<=0) { - UTrie2Header *outTrie; - - if(lengthswapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); - ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); - - /* swap the index and the data */ - switch(valueBits) { - case UTRIE2_16_VALUE_BITS: - ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); - break; - case UTRIE2_32_VALUE_BITS: - ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); - ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, - (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); - break; - default: - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - } - - return size; -} - -// utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c -// to avoid a dependency from utrie2.cpp on utrie.c. - /* enumeration -------------------------------------------------------------- */ #define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b)) @@ -746,7 +642,7 @@ uint16_t BackwardUTrie2StringIterator::previous16() { codePointLimit=codePointStart; if(start>=codePointStart) { codePoint=U_SENTINEL; - return trie->errorValue; + return static_cast(trie->errorValue); } uint16_t result; UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result); @@ -757,7 +653,7 @@ uint16_t ForwardUTrie2StringIterator::next16() { codePointStart=codePointLimit; if(codePointLimit==limit) { codePoint=U_SENTINEL; - return trie->errorValue; + return static_cast(trie->errorValue); } uint16_t result; UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result); diff --git a/deps/icu-small/source/common/utrie2.h b/deps/icu-small/source/common/utrie2.h index 8e1caa5e90bde2..75028ee23ac1e9 100644 --- a/deps/icu-small/source/common/utrie2.h +++ b/deps/icu-small/source/common/utrie2.h @@ -22,7 +22,6 @@ #include "unicode/utypes.h" #include "unicode/utf8.h" #include "putilimp.h" -#include "udataswp.h" U_CDECL_BEGIN @@ -330,40 +329,6 @@ utrie2_serialize(const UTrie2 *trie, /* Public UTrie2 API: miscellaneous functions ------------------------------- */ -/** - * Get the UTrie version from 32-bit-aligned memory containing the serialized form - * of either a UTrie (version 1) or a UTrie2 (version 2). - * - * @param data a pointer to 32-bit-aligned memory containing the serialized form - * of a UTrie, version 1 or 2 - * @param length the number of bytes available at data; - * can be more than necessary (see return value) - * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. - * If TRUE, opposite-endian serialized forms are recognized as well. - * @return the UTrie version of the serialized form, or 0 if it is not - * recognized as a serialized UTrie - */ -U_CAPI int32_t U_EXPORT2 -utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk); - -/** - * Swap a serialized UTrie2. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie2_swap(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - -/** - * Swap a serialized UTrie or UTrie2. - * @internal - */ -U_CAPI int32_t U_EXPORT2 -utrie2_swapAnyVersion(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode); - /** * Build a UTrie2 (version 2) from a UTrie (version 1). * Enumerates all values in the UTrie and builds a UTrie2 with the same values. @@ -709,6 +674,10 @@ struct UTrie2 { UBool padding1; int16_t padding2; UNewTrie2 *newTrie; /* builder object; NULL when frozen */ + +#ifdef UTRIE2_DEBUG + const char *name; +#endif }; /** diff --git a/deps/icu-small/source/common/utrie2_builder.cpp b/deps/icu-small/source/common/utrie2_builder.cpp index d8a3a06757370d..80e09c9c26b3e1 100644 --- a/deps/icu-small/source/common/utrie2_builder.cpp +++ b/deps/icu-small/source/common/utrie2_builder.cpp @@ -24,16 +24,23 @@ * This file contains only the builder code. * See utrie2.c for the runtime and enumeration code. */ +// #define UTRIE2_DEBUG #ifdef UTRIE2_DEBUG # include #endif +// #define UCPTRIE_DEBUG #include "unicode/utypes.h" +#ifdef UCPTRIE_DEBUG +#include "unicode/ucptrie.h" +#include "unicode/umutablecptrie.h" +#include "ucptrie_impl.h" +#endif #include "cmemory.h" #include "utrie2.h" #include "utrie2_impl.h" -#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */ +#include "utrie.h" // for utrie2_fromUTrie() /* Implementation notes ----------------------------------------------------- */ @@ -132,8 +139,14 @@ utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) trie->errorValue=errorValue; trie->highStart=0x110000; trie->newTrie=newTrie; +#ifdef UTRIE2_DEBUG + trie->name="open"; +#endif newTrie->data=data; +#ifdef UCPTRIE_DEBUG + newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode); +#endif newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH; newTrie->initialValue=initialValue; newTrie->errorValue=errorValue; @@ -246,6 +259,14 @@ cloneBuilder(const UNewTrie2 *other) { uprv_free(trie); return NULL; } +#ifdef UCPTRIE_DEBUG + if(other->t3==nullptr) { + trie->t3=nullptr; + } else { + UErrorCode errorCode=U_ZERO_ERROR; + trie->t3=umutablecptrie_clone(other->t3, &errorCode); + } +#endif trie->dataCapacity=other->dataCapacity; /* clone data */ @@ -343,6 +364,22 @@ copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { } #ifdef UTRIE2_DEBUG +static long countInitial(const UTrie2 *trie) { + uint32_t initialValue=trie->initialValue; + int32_t length=trie->dataLength; + long count=0; + if(trie->data16!=nullptr) { + for(int32_t i=0; idata16[i]==initialValue) { ++count; } + } + } else { + for(int32_t i=0; idata32[i]==initialValue) { ++count; } + } + } + return count; +} + static void utrie_printLengths(const UTrie *trie) { long indexLength=trie->indexLength; @@ -357,8 +394,8 @@ utrie2_printLengths(const UTrie2 *trie, const char *which) { long indexLength=trie->indexLength; long dataLength=(long)trie->dataLength; long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); - printf("**UTrie2Lengths(%s)** index:%6ld data:%6ld serialized:%6ld\n", - which, indexLength, dataLength, totalLength); + printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n", + which, trie->name, indexLength, dataLength, countInitial(trie), totalLength); } #endif @@ -622,6 +659,9 @@ set32(UNewTrie2 *trie, *pErrorCode=U_NO_WRITE_PERMISSION; return; } +#ifdef UCPTRIE_DEBUG + umutablecptrie_set(trie->t3, c, value, pErrorCode); +#endif block=getDataBlock(trie, c, forLSCP); if(block<0) { @@ -717,6 +757,9 @@ utrie2_setRange32(UTrie2 *trie, *pErrorCode=U_NO_WRITE_PERMISSION; return; } +#ifdef UCPTRIE_DEBUG + umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode); +#endif if(!overwrite && value==newTrie->initialValue) { return; /* nothing to do */ } @@ -732,7 +775,7 @@ utrie2_setRange32(UTrie2 *trie, return; } - nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK; + nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK; if(nextStart<=limit) { fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, value, newTrie->initialValue, overwrite); @@ -983,6 +1026,10 @@ findHighStart(UNewTrie2 *trie, uint32_t highValue) { */ static void compactData(UNewTrie2 *trie) { +#ifdef UTRIE2_DEBUG + int32_t countSame=0, sumOverlaps=0; +#endif + int32_t start, newStart, movedStart; int32_t blockLength, overlap; int32_t i, mapIndex, blockCount; @@ -1023,6 +1070,9 @@ compactData(UNewTrie2 *trie) { if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength)) >=0 ) { +#ifdef UTRIE2_DEBUG + ++countSame; +#endif /* found an identical block, set the other block's index value for the current block */ for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { trie->map[mapIndex++]=movedStart; @@ -1042,6 +1092,9 @@ compactData(UNewTrie2 *trie) { overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap); overlap-=UTRIE2_DATA_GRANULARITY) {} +#ifdef UTRIE2_DEBUG + sumOverlaps+=overlap; +#endif if(overlap>0 || newStart%lu\n", - (long)trie->dataLength, (long)newStart); + printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", + (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps); #endif trie->dataLength=newStart; @@ -1163,7 +1216,7 @@ compactIndex2(UNewTrie2 *trie) { #ifdef UTRIE2_DEBUG /* we saved some space */ - printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n", + printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n", (long)trie->index2Length, (long)newStart); #endif @@ -1193,7 +1246,7 @@ compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { trie->highStart=newTrie->highStart=highStart; #ifdef UTRIE2_DEBUG - printf("UTrie2: highStart U+%04lx highValue 0x%lx initialValue 0x%lx\n", + printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", (long)highStart, (long)highValue, (long)trie->initialValue); #endif @@ -1211,7 +1264,7 @@ compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { compactIndex2(newTrie); #ifdef UTRIE2_DEBUG } else { - printf("UTrie2: highStart U+%04lx count of 16-bit index-2 words %lu->%lu\n", + printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n", (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET); #endif } @@ -1334,7 +1387,7 @@ utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { if(highStart<=0x10000) { trie->index2NullOffset=0xffff; } else { - trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset; + trie->index2NullOffset=static_cast(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset); } trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset); trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY; @@ -1411,31 +1464,18 @@ utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { return; } +#ifdef UTRIE2_DEBUG + utrie2_printLengths(trie, ""); +#endif + +#ifdef UCPTRIE_DEBUG + umutablecptrie_setName(newTrie->t3, trie->name); + ucptrie_close( + umutablecptrie_buildImmutable( + newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode)); +#endif /* Delete the UNewTrie2. */ uprv_free(newTrie->data); uprv_free(newTrie); trie->newTrie=NULL; } - -/* - * This is here to avoid a dependency from utrie2.cpp on utrie.c. - * This file already depends on utrie.c. - * Otherwise, this should be in utrie2.cpp right after utrie2_swap(). - */ -U_CAPI int32_t U_EXPORT2 -utrie2_swapAnyVersion(const UDataSwapper *ds, - const void *inData, int32_t length, void *outData, - UErrorCode *pErrorCode) { - if(U_SUCCESS(*pErrorCode)) { - switch(utrie2_getVersion(inData, length, TRUE)) { - case 1: - return utrie_swap(ds, inData, length, outData, pErrorCode); - case 2: - return utrie2_swap(ds, inData, length, outData, pErrorCode); - default: - *pErrorCode=U_INVALID_FORMAT_ERROR; - return 0; - } - } - return 0; -} diff --git a/deps/icu-small/source/common/utrie2_impl.h b/deps/icu-small/source/common/utrie2_impl.h index b7dc9d3fb45fd4..2a14db3a6bdd1a 100644 --- a/deps/icu-small/source/common/utrie2_impl.h +++ b/deps/icu-small/source/common/utrie2_impl.h @@ -22,22 +22,20 @@ #ifndef __UTRIE2_IMPL_H__ #define __UTRIE2_IMPL_H__ +#ifdef UCPTRIE_DEBUG +#include "unicode/umutablecptrie.h" +#endif #include "utrie2.h" /* Public UTrie2 API implementation ----------------------------------------- */ /* - * These definitions are mostly needed by utrie2.c, + * These definitions are mostly needed by utrie2.cpp, * but also by utrie2_serialize() and utrie2_swap(). */ -/* - * UTrie and UTrie2 signature values, - * in platform endianness and opposite endianness. - */ -#define UTRIE_SIG 0x54726965 -#define UTRIE_OE_SIG 0x65697254 - +// UTrie2 signature values, in platform endianness and opposite endianness. +// The UTrie2 signature ASCII byte values spell "Tri2". #define UTRIE2_SIG 0x54726932 #define UTRIE2_OE_SIG 0x32697254 @@ -145,6 +143,9 @@ struct UNewTrie2 { int32_t index1[UNEWTRIE2_INDEX_1_LENGTH]; int32_t index2[UNEWTRIE2_MAX_INDEX_2_LENGTH]; uint32_t *data; +#ifdef UCPTRIE_DEBUG + UMutableCPTrie *t3; +#endif uint32_t initialValue, errorValue; int32_t index2Length, dataCapacity, dataLength; diff --git a/deps/icu-small/source/common/utrie_swap.cpp b/deps/icu-small/source/common/utrie_swap.cpp new file mode 100644 index 00000000000000..5abe7bd5d77e14 --- /dev/null +++ b/deps/icu-small/source/common/utrie_swap.cpp @@ -0,0 +1,344 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// utrie_swap.cpp +// created: 2018aug08 Markus W. Scherer + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "ucptrie_impl.h" +#include "udataswp.h" +#include "utrie.h" +#include "utrie2_impl.h" + +// These functions for swapping different generations of ICU code point tries are here +// so that their implementation files need not depend on swapper code, +// need not depend on each other, and so that other swapper code +// need not depend on other trie code. + +namespace { + +constexpr int32_t ASCII_LIMIT = 0x80; + +} // namespace + +U_CAPI int32_t U_EXPORT2 +utrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UTrieHeader *inTrie; + UTrieHeader trie; + int32_t size; + UBool dataIs32; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && (uint32_t)lengthreadUInt32(inTrie->signature); + trie.options=ds->readUInt32(inTrie->options); + trie.indexLength=udata_readInt32(ds, inTrie->indexLength); + trie.dataLength=udata_readInt32(ds, inTrie->dataLength); + + if( trie.signature!=0x54726965 || + (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT || + ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT || + trie.indexLength=0) { + UTrieHeader *outTrie; + + if(lengthswapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode); + + /* swap the index and the data */ + if(dataIs32) { + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + } else { + ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode); + } + } + + return size; +} + +U_CAPI int32_t U_EXPORT2 +utrie2_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UTrie2Header *inTrie; + UTrie2Header trie; + int32_t dataLength, size; + UTrie2ValueBits valueBits; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UTrie2Header *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt16(inTrie->options); + trie.indexLength=ds->readUInt16(inTrie->indexLength); + trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength); + + valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK); + dataLength=(int32_t)trie.shiftedDataLength<=0) { + UTrie2Header *outTrie; + + if(lengthswapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); + ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); + + /* swap the index and the data */ + switch(valueBits) { + case UTRIE2_16_VALUE_BITS: + ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); + break; + case UTRIE2_32_VALUE_BITS: + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + } + + return size; +} + +U_CAPI int32_t U_EXPORT2 +ucptrie_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UCPTrieHeader *inTrie; + UCPTrieHeader trie; + int32_t dataLength, size; + UCPTrieValueWidth valueWidth; + + if(U_FAILURE(*pErrorCode)) { + return 0; + } + if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* setup and swapping */ + if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inTrie=(const UCPTrieHeader *)inData; + trie.signature=ds->readUInt32(inTrie->signature); + trie.options=ds->readUInt16(inTrie->options); + trie.indexLength=ds->readUInt16(inTrie->indexLength); + trie.dataLength = ds->readUInt16(inTrie->dataLength); + + UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3); + valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK); + dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength; + + int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ? + UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; + if( trie.signature!=UCPTRIE_SIG || + type > UCPTRIE_TYPE_SMALL || + (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 || + valueWidth > UCPTRIE_VALUE_BITS_8 || + trie.indexLength < minIndexLength || + dataLength < ASCII_LIMIT + ) { + *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */ + return 0; + } + + size=sizeof(UCPTrieHeader)+trie.indexLength*2; + switch(valueWidth) { + case UCPTRIE_VALUE_BITS_16: + size+=dataLength*2; + break; + case UCPTRIE_VALUE_BITS_32: + size+=dataLength*4; + break; + case UCPTRIE_VALUE_BITS_8: + size+=dataLength; + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + + if(length>=0) { + UCPTrieHeader *outTrie; + + if(lengthswapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode); + ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode); + + /* swap the index and the data */ + switch(valueWidth) { + case UCPTRIE_VALUE_BITS_16: + ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode); + break; + case UCPTRIE_VALUE_BITS_32: + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4, + (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode); + break; + case UCPTRIE_VALUE_BITS_8: + ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode); + if(inTrie!=outTrie) { + uprv_memmove((outTrie+1)+trie.indexLength, (inTrie+1)+trie.indexLength, dataLength); + } + break; + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } + } + + return size; +} + +namespace { + +/** + * Gets the trie version from 32-bit-aligned memory containing the serialized form + * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3). + * + * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie + * @param length the number of bytes available at data; + * can be more than necessary (see return value) + * @param anyEndianOk If FALSE, only platform-endian serialized forms are recognized. + * If TRUE, opposite-endian serialized forms are recognized as well. + * @return the trie version of the serialized form, or 0 if it is not + * recognized as a serialized trie + */ +int32_t +getVersion(const void *data, int32_t length, UBool anyEndianOk) { + uint32_t signature; + if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) { + return 0; + } + signature=*(const uint32_t *)data; + if(signature==UCPTRIE_SIG) { + return 3; + } + if(anyEndianOk && signature==UCPTRIE_OE_SIG) { + return 3; + } + if(signature==UTRIE2_SIG) { + return 2; + } + if(anyEndianOk && signature==UTRIE2_OE_SIG) { + return 2; + } + if(signature==UTRIE_SIG) { + return 1; + } + if(anyEndianOk && signature==UTRIE_OE_SIG) { + return 1; + } + return 0; +} + +} // namespace + +U_CAPI int32_t U_EXPORT2 +utrie_swapAnyVersion(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { return 0; } + switch(getVersion(inData, length, TRUE)) { + case 1: + return utrie_swap(ds, inData, length, outData, pErrorCode); + case 2: + return utrie2_swap(ds, inData, length, outData, pErrorCode); + case 3: + return ucptrie_swap(ds, inData, length, outData, pErrorCode); + default: + *pErrorCode=U_INVALID_FORMAT_ERROR; + return 0; + } +} diff --git a/deps/icu-small/source/common/uts46.cpp b/deps/icu-small/source/common/uts46.cpp index 5a23572eb64af2..b9e6cb023bb379 100644 --- a/deps/icu-small/source/common/uts46.cpp +++ b/deps/icu-small/source/common/uts46.cpp @@ -557,7 +557,10 @@ UTS46::processUnicode(const UnicodeString &src, destArray=dest.getBuffer(); destLength+=newLength-labelLength; labelLimit=labelStart+=newLength+1; - } else if(0xdf<=c && c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { + continue; + } else if(c<0xdf) { + // pass + } else if(c<=0x200d && (c==0xdf || c==0x3c2 || c>=0x200c)) { info.isTransDiff=TRUE; if(doMapDevChars) { destLength=mapDevChars(dest, labelStart, labelLimit, errorCode); @@ -565,15 +568,23 @@ UTS46::processUnicode(const UnicodeString &src, return dest; } destArray=dest.getBuffer(); - // Do not increment labelLimit in case c was removed. // All deviation characters have been mapped, no need to check for them again. doMapDevChars=FALSE; - } else { - ++labelLimit; + // Do not increment labelLimit in case c was removed. + continue; + } + } else if(U16_IS_SURROGATE(c)) { + if(U16_IS_SURROGATE_LEAD(c) ? + (labelLimit+1)==destLength || !U16_IS_TRAIL(destArray[labelLimit+1]) : + labelLimit==labelStart || !U16_IS_LEAD(destArray[labelLimit-1])) { + // Map an unpaired surrogate to U+FFFD before normalization so that when + // that removes characters we do not turn two unpaired ones into a pair. + info.labelErrors|=UIDNA_ERROR_DISALLOWED; + dest.setCharAt(labelLimit, 0xfffd); + destArray=dest.getBuffer(); } - } else { - ++labelLimit; } + ++labelLimit; } // Permit an empty label at the end (0 -#define MAX_LENGTH_ID 40 +U_NAMESPACE_BEGIN -/* The layout of the Tzi value in the registry */ -typedef struct -{ - int32_t bias; - int32_t standardBias; - int32_t daylightBias; - SYSTEMTIME standardDate; - SYSTEMTIME daylightDate; -} TZI; - -/** - * Various registry keys and key fragments. - */ -static const wchar_t CURRENT_ZONE_REGKEY[] = L"SYSTEM\\CurrentControlSet\\Control\\TimeZoneInformation\\"; -static const char STANDARD_TIME_REGKEY[] = " Standard Time"; -static const char TZI_REGKEY[] = "TZI"; -static const char STD_REGKEY[] = "Std"; +// The value of MAX_TIMEZONE_ID_LENGTH is 128, which is defined in DYNAMIC_TIME_ZONE_INFORMATION +#define MAX_TIMEZONE_ID_LENGTH 128 /** - * The time zone root keys (under HKLM) for Win7+ - */ -static const char TZ_REGKEY[] = "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Time Zones\\"; - -static LONG openTZRegKey(HKEY *hkey, const char *winid) -{ - char subKeyName[110]; /* TODO: why 110?? */ - char *name; - LONG result; - - uprv_strcpy(subKeyName, TZ_REGKEY); - name = &subKeyName[strlen(subKeyName)]; - uprv_strcat(subKeyName, winid); - - result = RegOpenKeyExA(HKEY_LOCAL_MACHINE, - subKeyName, - 0, - KEY_QUERY_VALUE, - hkey); - return result; -} - -static LONG getTZI(const char *winid, TZI *tzi) -{ - DWORD cbData = sizeof(TZI); - LONG result; - HKEY hkey; - - result = openTZRegKey(&hkey, winid); - - if (result == ERROR_SUCCESS) - { - result = RegQueryValueExA(hkey, - TZI_REGKEY, - NULL, - NULL, - (LPBYTE)tzi, - &cbData); - RegCloseKey(hkey); - } - - return result; -} - -static LONG getSTDName(const char *winid, char *regStdName, int32_t length) -{ - DWORD cbData = length; - LONG result; - HKEY hkey; - - result = openTZRegKey(&hkey, winid); - - if (result == ERROR_SUCCESS) - { - result = RegQueryValueExA(hkey, - STD_REGKEY, - NULL, - NULL, - (LPBYTE)regStdName, - &cbData); - RegCloseKey(hkey); - } - - return result; -} - -static LONG getTZKeyName(char* tzKeyName, int32_t tzKeyNamelength) -{ - HKEY hkey; - LONG result = FALSE; - WCHAR timeZoneKeyNameData[128]; - DWORD timeZoneKeyNameLength = static_cast(sizeof(timeZoneKeyNameData)); - - if(ERROR_SUCCESS == RegOpenKeyExW( - HKEY_LOCAL_MACHINE, - CURRENT_ZONE_REGKEY, - 0, - KEY_QUERY_VALUE, - &hkey)) - { - if (ERROR_SUCCESS == RegQueryValueExW( - hkey, - L"TimeZoneKeyName", - NULL, - NULL, - (LPBYTE)timeZoneKeyNameData, - &timeZoneKeyNameLength)) - { - // Ensure null termination. - timeZoneKeyNameData[UPRV_LENGTHOF(timeZoneKeyNameData) - 1] = L'\0'; - - // Convert the UTF-16 string to UTF-8. - UErrorCode status = U_ZERO_ERROR; - u_strToUTF8(tzKeyName, tzKeyNamelength, NULL, reinterpret_cast(timeZoneKeyNameData), -1, &status); - if (U_ZERO_ERROR == status) - { - result = ERROR_SUCCESS; - } - } - RegCloseKey(hkey); - } - - return result; -} - -/* - This code attempts to detect the Windows time zone directly, - as set in the Windows Date and Time control panel. It attempts - to work on versions greater than Windows Vista and on localized - installs. It works by directly interrogating the registry and - comparing the data there with the data returned by the - GetTimeZoneInformation API, along with some other strategies. The - registry contains time zone data under this key: - - HKLM\SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones\ - - Under this key are several subkeys, one for each time zone. For - example these subkeys are named "Pacific Standard Time" on Vista+. - There are some other wrinkles; see the code for - details. The subkey name is NOT LOCALIZED, allowing us to support - localized installs. - - Under the subkey are data values. We care about: - - Std Standard time display name, localized - TZI Binary block of data - - The TZI data is of particular interest. It contains the offset, two - more offsets for standard and daylight time, and the start and end - rules. This is the same data returned by the GetTimeZoneInformation - API. The API may modify the data on the way out, so we have to be - careful, but essentially we do a binary comparison against the TZI - blocks of various registry keys. When we find a match, we know what - time zone Windows is set to. Since the registry key is not - localized, we can then translate the key through a simple table - lookup into the corresponding ICU time zone. - - This strategy doesn't always work because there are zones which - share an offset and rules, so more than one TZI block will match. - For example, both Tokyo and Seoul are at GMT+9 with no DST rules; - their TZI blocks are identical. For these cases, we fall back to a - name lookup. We attempt to match the display name as stored in the - registry for the current zone to the display name stored in the - registry for various Windows zones. By comparing the registry data - directly we avoid conversion complications. - - Author: Alan Liu - Since: ICU 2.6 - Based on original code by Carl Brown +* Main Windows time zone detection function. +* Returns the Windows time zone converted to an ICU time zone as a heap-allocated buffer, or nullptr upon failure. +* Note: We use the Win32 API GetDynamicTimeZoneInformation to get the current time zone info. +* This API returns a non-localized time zone name, which we can then map to an ICU time zone name. */ - -/** - * Main Windows time zone detection function. Returns the Windows - * time zone, translated to an ICU time zone, or NULL upon failure. - */ U_CFUNC const char* U_EXPORT2 uprv_detectWindowsTimeZone() { UErrorCode status = U_ZERO_ERROR; - UResourceBundle* bundle = NULL; - char* icuid = NULL; - char apiStdName[MAX_LENGTH_ID]; - char regStdName[MAX_LENGTH_ID]; - char tmpid[MAX_LENGTH_ID]; + char* icuid = nullptr; + char dynamicTZKeyName[MAX_TIMEZONE_ID_LENGTH]; + char tmpid[MAX_TIMEZONE_ID_LENGTH]; int32_t len; - int id; + int id = GEOID_NOT_AVAILABLE; int errorCode; - wchar_t ISOcodeW[3]; /* 2 letter iso code in UTF-16*/ - char ISOcodeA[3]; /* 2 letter iso code in ansi */ + wchar_t ISOcodeW[3] = {}; /* 2 letter ISO code in UTF-16 */ + char ISOcode[3] = {}; /* 2 letter ISO code in UTF-8 */ - LONG result; - TZI tziKey; - TZI tziReg; - TIME_ZONE_INFORMATION apiTZI; + DYNAMIC_TIME_ZONE_INFORMATION dynamicTZI; + uprv_memset(&dynamicTZI, 0, sizeof(dynamicTZI)); + uprv_memset(dynamicTZKeyName, 0, sizeof(dynamicTZKeyName)); + uprv_memset(tmpid, 0, sizeof(tmpid)); - BOOL tryPreVistaFallback; - OSVERSIONINFO osVerInfo; + /* Obtain TIME_ZONE_INFORMATION from the API and get the non-localized time zone name. */ + if (TIME_ZONE_ID_INVALID == GetDynamicTimeZoneInformation(&dynamicTZI)) { + return nullptr; + } - /* Obtain TIME_ZONE_INFORMATION from the API, and then convert it - to TZI. We could also interrogate the registry directly; we do - this below if needed. */ - uprv_memset(&apiTZI, 0, sizeof(apiTZI)); - uprv_memset(&tziKey, 0, sizeof(tziKey)); - uprv_memset(&tziReg, 0, sizeof(tziReg)); - GetTimeZoneInformation(&apiTZI); - tziKey.bias = apiTZI.Bias; - uprv_memcpy((char *)&tziKey.standardDate, (char*)&apiTZI.StandardDate, - sizeof(apiTZI.StandardDate)); - uprv_memcpy((char *)&tziKey.daylightDate, (char*)&apiTZI.DaylightDate, - sizeof(apiTZI.DaylightDate)); + id = GetUserGeoID(GEOCLASS_NATION); + errorCode = GetGeoInfoW(id, GEO_ISO2, ISOcodeW, 3, 0); - /* Convert the wchar_t* standard name to char* */ - uprv_memset(apiStdName, 0, sizeof(apiStdName)); - wcstombs(apiStdName, apiTZI.StandardName, MAX_LENGTH_ID); + // convert from wchar_t* (UTF-16 on Windows) to char* (UTF-8). + u_strToUTF8(ISOcode, UPRV_LENGTHOF(ISOcode), nullptr, + reinterpret_cast(ISOcodeW), UPRV_LENGTHOF(ISOcodeW), &status); - tmpid[0] = 0; + LocalUResourceBundlePointer bundle(ures_openDirect(nullptr, "windowsZones", &status)); + ures_getByKey(bundle.getAlias(), "mapTimezones", bundle.getAlias(), &status); - id = GetUserGeoID(GEOCLASS_NATION); - errorCode = GetGeoInfoW(id, GEO_ISO2, ISOcodeW, 3, 0); - u_strToUTF8(ISOcodeA, 3, NULL, (const UChar *)ISOcodeW, 3, &status); + // convert from wchar_t* (UTF-16 on Windows) to char* (UTF-8). + u_strToUTF8(dynamicTZKeyName, UPRV_LENGTHOF(dynamicTZKeyName), nullptr, + reinterpret_cast(dynamicTZI.TimeZoneKeyName), UPRV_LENGTHOF(dynamicTZI.TimeZoneKeyName), &status); + + if (U_FAILURE(status)) { + return nullptr; + } - bundle = ures_openDirect(NULL, "windowsZones", &status); - ures_getByKey(bundle, "mapTimezones", bundle, &status); + if (dynamicTZI.TimeZoneKeyName[0] != 0) { + UResourceBundle winTZ; + ures_initStackObject(&winTZ); + ures_getByKey(bundle.getAlias(), dynamicTZKeyName, &winTZ, &status); - /* - Windows Vista+ provides us with a "TimeZoneKeyName" that is not localized - and can be used to directly map a name in our bundle. Try to use that first - if we're on Vista or higher - */ - uprv_memset(&osVerInfo, 0, sizeof(osVerInfo)); - osVerInfo.dwOSVersionInfoSize = sizeof(osVerInfo); - tryPreVistaFallback = TRUE; - result = getTZKeyName(regStdName, sizeof(regStdName)); - if(ERROR_SUCCESS == result) - { - UResourceBundle* winTZ = ures_getByKey(bundle, regStdName, NULL, &status); - if(U_SUCCESS(status)) - { - const UChar* icuTZ = NULL; - if (errorCode != 0) - { - icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); + if (U_SUCCESS(status)) { + const UChar* icuTZ = nullptr; + if (errorCode != 0) { + icuTZ = ures_getStringByKey(&winTZ, ISOcode, &len, &status); } - if (errorCode==0 || icuTZ==NULL) - { + if (errorCode == 0 || icuTZ == nullptr) { /* fallback to default "001" and reset status */ status = U_ZERO_ERROR; - icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); + icuTZ = ures_getStringByKey(&winTZ, "001", &len, &status); } - if(U_SUCCESS(status)) - { - int index=0; - while (! (*icuTZ == '\0' || *icuTZ ==' ')) - { - tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ - } - tmpid[index]='\0'; - tryPreVistaFallback = FALSE; - } - } - ures_close(winTZ); - } + if (U_SUCCESS(status)) { + int index = 0; - if(tryPreVistaFallback) - { - /* Note: We get the winid not from static tables but from resource bundle. */ - while (U_SUCCESS(status) && ures_hasNext(bundle)) - { - UBool idFound = FALSE; - const char* winid; - UResourceBundle* winTZ = ures_getNextResource(bundle, NULL, &status); - if (U_FAILURE(status)) - { - break; - } - winid = ures_getKey(winTZ); - result = getTZI(winid, &tziReg); - - if (result == ERROR_SUCCESS) - { - /* Windows alters the DaylightBias in some situations. - Using the bias and the rules suffices, so overwrite - these unreliable fields. */ - tziKey.standardBias = tziReg.standardBias; - tziKey.daylightBias = tziReg.daylightBias; - - if (uprv_memcmp((char *)&tziKey, (char*)&tziReg, sizeof(tziKey)) == 0) - { - const UChar* icuTZ = NULL; - if (errorCode != 0) - { - icuTZ = ures_getStringByKey(winTZ, ISOcodeA, &len, &status); - } - if (errorCode==0 || icuTZ==NULL) - { - /* fallback to default "001" and reset status */ - status = U_ZERO_ERROR; - icuTZ = ures_getStringByKey(winTZ, "001", &len, &status); - } - - if (U_SUCCESS(status)) - { - /* Get the standard name from the registry key to compare with - the one from Windows API call. */ - uprv_memset(regStdName, 0, sizeof(regStdName)); - result = getSTDName(winid, regStdName, sizeof(regStdName)); - if (result == ERROR_SUCCESS) - { - if (uprv_strcmp(apiStdName, regStdName) == 0) - { - idFound = TRUE; - } - } - - /* tmpid buffer holds the ICU timezone ID corresponding to the timezone ID from Windows. - * If none is found, tmpid buffer will contain a fallback ID (i.e. the time zone ID matching - * the current time zone information) - */ - if (idFound || tmpid[0] == 0) - { - /* if icuTZ has more than one city, take only the first (i.e. terminate icuTZ at first space) */ - int index=0; - while (! (*icuTZ == '\0' || *icuTZ ==' ')) - { - tmpid[index++]=(char)(*icuTZ++); /* safe to assume 'char' is ASCII compatible on windows */ - } - tmpid[index]='\0'; - } - } + while (!(*icuTZ == '\0' || *icuTZ == ' ')) { + // time zone IDs only contain ASCII invariant characters. + tmpid[index++] = (char)(*icuTZ++); } - } - ures_close(winTZ); - if (idFound) - { - break; + tmpid[index] = '\0'; } } + ures_close(&winTZ); } - /* - * Copy the timezone ID to icuid to be returned. - */ - if (tmpid[0] != 0) - { - len = uprv_strlen(tmpid); - icuid = (char*)uprv_calloc(len + 1, sizeof(char)); - if (icuid != NULL) - { - uprv_strcpy(icuid, tmpid); - } + // Copy the timezone ID to icuid to be returned. + if (tmpid[0] != 0) { + icuid = uprv_strdup(tmpid); } - ures_close(bundle); - return icuid; } -#endif /* U_PLATFORM_USES_ONLY_WIN32_API && (U_PLATFORM_HAS_WINUWP_API == 0) */ +U_NAMESPACE_END +#endif /* U_PLATFORM_USES_ONLY_WIN32_API */ diff --git a/deps/icu-small/source/common/wintz.h b/deps/icu-small/source/common/wintz.h index 9e8cbbcfaba36f..0625bb204b197c 100644 --- a/deps/icu-small/source/common/wintz.h +++ b/deps/icu-small/source/common/wintz.h @@ -16,9 +16,7 @@ #include "unicode/utypes.h" -// This file contains only desktop windows behavior -// Windows UWP calls Windows::Globalization directly, so this isn't needed there. -#if U_PLATFORM_USES_ONLY_WIN32_API && (U_PLATFORM_HAS_WINUWP_API == 0) +#if U_PLATFORM_USES_ONLY_WIN32_API /** * \file @@ -33,6 +31,6 @@ U_CDECL_END U_CFUNC const char* U_EXPORT2 uprv_detectWindowsTimeZone(); -#endif /* U_PLATFORM_USES_ONLY_WIN32_API && (U_PLATFORM_HAS_WINUWP_API == 0) */ +#endif /* U_PLATFORM_USES_ONLY_WIN32_API */ #endif /* __WINTZ */ diff --git a/deps/icu-small/source/data/in/icudt62l.dat b/deps/icu-small/source/data/in/icudt63l.dat similarity index 65% rename from deps/icu-small/source/data/in/icudt62l.dat rename to deps/icu-small/source/data/in/icudt63l.dat index a6ac7ebb374980..60f57612c0d0e3 100644 Binary files a/deps/icu-small/source/data/in/icudt62l.dat and b/deps/icu-small/source/data/in/icudt63l.dat differ diff --git a/deps/icu-small/source/i18n/alphaindex.cpp b/deps/icu-small/source/i18n/alphaindex.cpp index f4a082c5b20e5e..99f70114cbdb15 100644 --- a/deps/icu-small/source/i18n/alphaindex.cpp +++ b/deps/icu-small/source/i18n/alphaindex.cpp @@ -511,8 +511,8 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { ces, errorCode) && current.charAt(current.length() - 1) != 0xFFFF /* !current.endsWith("\uffff") */) { // "AE-ligature" or "Sch" etc. - for (int32_t i = bucketList->size() - 2;; --i) { - Bucket *singleBucket = getBucket(*bucketList, i); + for (int32_t j = bucketList->size() - 2;; --j) { + Bucket *singleBucket = getBucket(*bucketList, j); if (singleBucket->labelType_ != U_ALPHAINDEX_NORMAL) { // There is no single-character bucket since the last // underflow or inflow label. @@ -608,8 +608,8 @@ BucketList *AlphabeticIndex::createBucketList(UErrorCode &errorCode) const { } // Do not call publicBucketList->setDeleter(): // This vector shares its objects with the bucketList. - for (int32_t i = 0; i < bucketList->size(); ++i) { - bucket = getBucket(*bucketList, i); + for (int32_t j = 0; j < bucketList->size(); ++j) { + bucket = getBucket(*bucketList, j); if (bucket->displayBucket_ == NULL) { publicBucketList->addElement(bucket, errorCode); } diff --git a/deps/icu-small/source/i18n/anytrans.cpp b/deps/icu-small/source/i18n/anytrans.cpp index d06469e2ae2746..6e382b824b95b7 100644 --- a/deps/icu-small/source/i18n/anytrans.cpp +++ b/deps/icu-small/source/i18n/anytrans.cpp @@ -391,12 +391,12 @@ void AnyTransliterator::registerIDs() { UnicodeString id; TransliteratorIDParser::STVtoID(UnicodeString(TRUE, ANY, 3), target, variant, id); ec = U_ZERO_ERROR; - AnyTransliterator* t = new AnyTransliterator(id, target, variant, + AnyTransliterator* tl = new AnyTransliterator(id, target, variant, targetScript, ec); if (U_FAILURE(ec)) { - delete t; + delete tl; } else { - Transliterator::_registerInstance(t); + Transliterator::_registerInstance(tl); Transliterator::_registerSpecialInverse(target, UnicodeString(TRUE, NULL_ID, 4), FALSE); } } diff --git a/deps/icu-small/source/i18n/calendar.cpp b/deps/icu-small/source/i18n/calendar.cpp index 61757cb250f77d..24c2fb964e91a1 100644 --- a/deps/icu-small/source/i18n/calendar.cpp +++ b/deps/icu-small/source/i18n/calendar.cpp @@ -3797,7 +3797,7 @@ Calendar::setWeekData(const Locale& desiredLocale, const char *type, UErrorCode& Locale max = Locale::createFromName(maxLocaleID); useLocale = Locale(max.getLanguage(),max.getCountry()); } else { - useLocale = Locale(desiredLocale); + useLocale = desiredLocale; } /* The code here is somewhat of a hack, since week data and weekend data aren't really tied to diff --git a/deps/icu-small/source/i18n/coll.cpp b/deps/icu-small/source/i18n/coll.cpp index b7348a1d16f1e1..2a614fac5dcfbc 100644 --- a/deps/icu-small/source/i18n/coll.cpp +++ b/deps/icu-small/source/i18n/coll.cpp @@ -448,6 +448,13 @@ Collator* U_EXPORT2 Collator::createInstance(const Locale& desiredLocale, #endif { coll = makeInstance(desiredLocale, status); + // Either returns NULL with U_FAILURE(status), or non-NULL with U_SUCCESS(status) + } + // The use of *coll in setAttributesFromKeywords can cause the NULL check to be + // optimized out of the delete even though setAttributesFromKeywords returns + // immediately if U_FAILURE(status), so we add a check here. + if (U_FAILURE(status)) { + return NULL; } setAttributesFromKeywords(desiredLocale, *coll, status); if (U_FAILURE(status)) { @@ -986,8 +993,8 @@ Collator::internalCompareUTF8(const char *left, int32_t leftLength, return UCOL_EQUAL; } return compareUTF8( - StringPiece(left, (leftLength < 0) ? uprv_strlen(left) : leftLength), - StringPiece(right, (rightLength < 0) ? uprv_strlen(right) : rightLength), + StringPiece(left, (leftLength < 0) ? static_cast(uprv_strlen(left)) : leftLength), + StringPiece(right, (rightLength < 0) ? static_cast(uprv_strlen(right)) : rightLength), errorCode); } diff --git a/deps/icu-small/source/i18n/collationkeys.cpp b/deps/icu-small/source/i18n/collationkeys.cpp index 4b9e6b59075e1d..b5c322fb44679b 100644 --- a/deps/icu-small/source/i18n/collationkeys.cpp +++ b/deps/icu-small/source/i18n/collationkeys.cpp @@ -403,13 +403,13 @@ CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter, uint8_t *secs = secondaries.data(); int32_t last = secondaries.length() - 1; if(secSegmentStart < last) { - uint8_t *p = secs + secSegmentStart; - uint8_t *q = secs + last; + uint8_t *q = secs + secSegmentStart; + uint8_t *r = secs + last; do { - uint8_t b = *p; - *p++ = *q; - *q-- = b; - } while(p < q); + uint8_t b = *q; + *q++ = *r; + *r-- = b; + } while(q < r); } secondaries.appendByte(p == Collation::NO_CE_PRIMARY ? Collation::LEVEL_SEPARATOR_BYTE : Collation::MERGE_SEPARATOR_BYTE); diff --git a/deps/icu-small/source/i18n/csrmbcs.cpp b/deps/icu-small/source/i18n/csrmbcs.cpp index 0c2df594d56a75..46d626bb3f4ffe 100644 --- a/deps/icu-small/source/i18n/csrmbcs.cpp +++ b/deps/icu-small/source/i18n/csrmbcs.cpp @@ -166,7 +166,7 @@ int32_t CharsetRecog_mbcs::match_mbcs(InputText *det, const uint16_t commonChars doubleByteCharCount++; if (commonChars != 0) { - if (binarySearch(commonChars, commonCharsLen, iter.charValue) >= 0){ + if (binarySearch(commonChars, commonCharsLen, static_cast(iter.charValue)) >= 0){ commonCharCount += 1; } } diff --git a/deps/icu-small/source/i18n/currpinf.cpp b/deps/icu-small/source/i18n/currpinf.cpp index 6b1efd5f4da721..f5d27e28d8ebfa 100644 --- a/deps/icu-small/source/i18n/currpinf.cpp +++ b/deps/icu-small/source/i18n/currpinf.cpp @@ -17,7 +17,6 @@ #include #endif - #include "unicode/locid.h" #include "unicode/plurrule.h" #include "unicode/strenum.h" @@ -30,7 +29,6 @@ U_NAMESPACE_BEGIN - static const UChar gNumberPatternSeparator = 0x3B; // ; U_CDECL_BEGIN @@ -65,66 +63,86 @@ static const char gDecimalFormatTag[]="decimalFormat"; static const char gCurrUnitPtnTag[]="CurrencyUnitPatterns"; CurrencyPluralInfo::CurrencyPluralInfo(UErrorCode& status) -: fPluralCountToCurrencyUnitPattern(NULL), - fPluralRules(NULL), - fLocale(NULL) { +: fPluralCountToCurrencyUnitPattern(nullptr), + fPluralRules(nullptr), + fLocale(nullptr), + fInternalStatus(U_ZERO_ERROR) { initialize(Locale::getDefault(), status); } CurrencyPluralInfo::CurrencyPluralInfo(const Locale& locale, UErrorCode& status) -: fPluralCountToCurrencyUnitPattern(NULL), - fPluralRules(NULL), - fLocale(NULL) { +: fPluralCountToCurrencyUnitPattern(nullptr), + fPluralRules(nullptr), + fLocale(nullptr), + fInternalStatus(U_ZERO_ERROR) { initialize(locale, status); } CurrencyPluralInfo::CurrencyPluralInfo(const CurrencyPluralInfo& info) : UObject(info), - fPluralCountToCurrencyUnitPattern(NULL), - fPluralRules(NULL), - fLocale(NULL) { + fPluralCountToCurrencyUnitPattern(nullptr), + fPluralRules(nullptr), + fLocale(nullptr), + fInternalStatus(U_ZERO_ERROR) { *this = info; } - CurrencyPluralInfo& CurrencyPluralInfo::operator=(const CurrencyPluralInfo& info) { if (this == &info) { return *this; } + fInternalStatus = info.fInternalStatus; + if (U_FAILURE(fInternalStatus)) { + // bail out early if the object we were copying from was already 'invalid'. + return *this; + } + deleteHash(fPluralCountToCurrencyUnitPattern); - UErrorCode status = U_ZERO_ERROR; - fPluralCountToCurrencyUnitPattern = initHash(status); + fPluralCountToCurrencyUnitPattern = initHash(fInternalStatus); copyHash(info.fPluralCountToCurrencyUnitPattern, - fPluralCountToCurrencyUnitPattern, status); - if ( U_FAILURE(status) ) { + fPluralCountToCurrencyUnitPattern, fInternalStatus); + if ( U_FAILURE(fInternalStatus) ) { return *this; } delete fPluralRules; + fPluralRules = nullptr; delete fLocale; - if (info.fPluralRules) { + fLocale = nullptr; + + if (info.fPluralRules != nullptr) { fPluralRules = info.fPluralRules->clone(); - } else { - fPluralRules = NULL; + if (fPluralRules == nullptr) { + fInternalStatus = U_MEMORY_ALLOCATION_ERROR; + return *this; + } } - if (info.fLocale) { + if (info.fLocale != nullptr) { fLocale = info.fLocale->clone(); - } else { - fLocale = NULL; + if (fLocale == nullptr) { + // Note: If clone had an error parameter, then we could check/set that instead. + fInternalStatus = U_MEMORY_ALLOCATION_ERROR; + return *this; + } + // If the other locale wasn't bogus, but our clone'd locale is bogus, then OOM happened + // during the call to clone(). + if (!info.fLocale->isBogus() && fLocale->isBogus()) { + fInternalStatus = U_MEMORY_ALLOCATION_ERROR; + return *this; + } } return *this; } - CurrencyPluralInfo::~CurrencyPluralInfo() { deleteHash(fPluralCountToCurrencyUnitPattern); - fPluralCountToCurrencyUnitPattern = NULL; + fPluralCountToCurrencyUnitPattern = nullptr; delete fPluralRules; delete fLocale; - fPluralRules = NULL; - fLocale = NULL; + fPluralRules = nullptr; + fLocale = nullptr; } UBool @@ -148,7 +166,14 @@ CurrencyPluralInfo::operator==(const CurrencyPluralInfo& info) const { CurrencyPluralInfo* CurrencyPluralInfo::clone() const { - return new CurrencyPluralInfo(*this); + CurrencyPluralInfo* newObj = new CurrencyPluralInfo(*this); + // Since clone doesn't have a 'status' parameter, the best we can do is return nullptr + // if the new object was not full constructed properly (an error occurred). + if (newObj != nullptr && U_FAILURE(newObj->fInternalStatus)) { + delete newObj; + newObj = nullptr; + } + return newObj; } const PluralRules* @@ -161,15 +186,15 @@ CurrencyPluralInfo::getCurrencyPluralPattern(const UnicodeString& pluralCount, UnicodeString& result) const { const UnicodeString* currencyPluralPattern = (UnicodeString*)fPluralCountToCurrencyUnitPattern->get(pluralCount); - if (currencyPluralPattern == NULL) { + if (currencyPluralPattern == nullptr) { // fall back to "other" if (pluralCount.compare(gPluralCountOther, 5)) { currencyPluralPattern = (UnicodeString*)fPluralCountToCurrencyUnitPattern->get(UnicodeString(TRUE, gPluralCountOther, 5)); } - if (currencyPluralPattern == NULL) { + if (currencyPluralPattern == nullptr) { // no currencyUnitPatterns defined, - // fallback to predefined defult. + // fallback to predefined default. // This should never happen when ICU resource files are // available, since currencyUnitPattern of "other" is always // defined in root. @@ -190,14 +215,11 @@ void CurrencyPluralInfo::setPluralRules(const UnicodeString& ruleDescription, UErrorCode& status) { if (U_SUCCESS(status)) { - if (fPluralRules) { - delete fPluralRules; - } + delete fPluralRules; fPluralRules = PluralRules::createRules(ruleDescription, status); } } - void CurrencyPluralInfo::setCurrencyPluralPattern(const UnicodeString& pluralCount, const UnicodeString& pattern, @@ -206,63 +228,77 @@ CurrencyPluralInfo::setCurrencyPluralPattern(const UnicodeString& pluralCount, UnicodeString* oldValue = static_cast( fPluralCountToCurrencyUnitPattern->get(pluralCount)); delete oldValue; - fPluralCountToCurrencyUnitPattern->put(pluralCount, new UnicodeString(pattern), status); + LocalPointer p(new UnicodeString(pattern), status); + if (U_SUCCESS(status)) { + // the p object allocated above will be owned by fPluralCountToCurrencyUnitPattern + // after the call to put(), even if the method returns failure. + fPluralCountToCurrencyUnitPattern->put(pluralCount, p.orphan(), status); + } } } - void CurrencyPluralInfo::setLocale(const Locale& loc, UErrorCode& status) { initialize(loc, status); } - void CurrencyPluralInfo::initialize(const Locale& loc, UErrorCode& status) { if (U_FAILURE(status)) { return; } delete fLocale; + fLocale = nullptr; + delete fPluralRules; + fPluralRules = nullptr; + fLocale = loc.clone(); - if (fPluralRules) { - delete fPluralRules; + if (fLocale == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + // If the locale passed in wasn't bogus, but our clone'd locale is bogus, then OOM happened + // during the call to loc.clone(). + if (!loc.isBogus() && fLocale->isBogus()) { + status = U_MEMORY_ALLOCATION_ERROR; + return; } fPluralRules = PluralRules::forLocale(loc, status); setupCurrencyPluralPattern(loc, status); } - void CurrencyPluralInfo::setupCurrencyPluralPattern(const Locale& loc, UErrorCode& status) { if (U_FAILURE(status)) { return; } - if (fPluralCountToCurrencyUnitPattern) { - deleteHash(fPluralCountToCurrencyUnitPattern); - } + deleteHash(fPluralCountToCurrencyUnitPattern); fPluralCountToCurrencyUnitPattern = initHash(status); if (U_FAILURE(status)) { return; } - NumberingSystem *ns = NumberingSystem::createInstance(loc,status); + LocalPointer ns(NumberingSystem::createInstance(loc, status), status); + if (U_FAILURE(status)) { + return; + } UErrorCode ec = U_ZERO_ERROR; - UResourceBundle *rb = ures_open(NULL, loc.getName(), &ec); - UResourceBundle *numElements = ures_getByKeyWithFallback(rb, gNumberElementsTag, NULL, &ec); - rb = ures_getByKeyWithFallback(numElements, ns->getName(), rb, &ec); - rb = ures_getByKeyWithFallback(rb, gPatternsTag, rb, &ec); + LocalUResourceBundlePointer rb(ures_open(nullptr, loc.getName(), &ec)); + LocalUResourceBundlePointer numElements(ures_getByKeyWithFallback(rb.getAlias(), gNumberElementsTag, nullptr, &ec)); + ures_getByKeyWithFallback(numElements.getAlias(), ns->getName(), rb.getAlias(), &ec); + ures_getByKeyWithFallback(rb.getAlias(), gPatternsTag, rb.getAlias(), &ec); int32_t ptnLen; - const UChar* numberStylePattern = ures_getStringByKeyWithFallback(rb, gDecimalFormatTag, &ptnLen, &ec); + const UChar* numberStylePattern = ures_getStringByKeyWithFallback(rb.getAlias(), gDecimalFormatTag, &ptnLen, &ec); // Fall back to "latn" if num sys specific pattern isn't there. - if ( ec == U_MISSING_RESOURCE_ERROR && uprv_strcmp(ns->getName(),gLatnTag)) { + if ( ec == U_MISSING_RESOURCE_ERROR && (uprv_strcmp(ns->getName(), gLatnTag) != 0)) { ec = U_ZERO_ERROR; - rb = ures_getByKeyWithFallback(numElements, gLatnTag, rb, &ec); - rb = ures_getByKeyWithFallback(rb, gPatternsTag, rb, &ec); - numberStylePattern = ures_getStringByKeyWithFallback(rb, gDecimalFormatTag, &ptnLen, &ec); + ures_getByKeyWithFallback(numElements.getAlias(), gLatnTag, rb.getAlias(), &ec); + ures_getByKeyWithFallback(rb.getAlias(), gPatternsTag, rb.getAlias(), &ec); + numberStylePattern = ures_getStringByKeyWithFallback(rb.getAlias(), gDecimalFormatTag, &ptnLen, &ec); } int32_t numberStylePatternLen = ptnLen; - const UChar* negNumberStylePattern = NULL; + const UChar* negNumberStylePattern = nullptr; int32_t negNumberStylePatternLen = 0; // TODO: Java // parse to check whether there is ";" separator in the numberStylePattern @@ -279,127 +315,127 @@ CurrencyPluralInfo::setupCurrencyPluralPattern(const Locale& loc, UErrorCode& st } } - ures_close(numElements); - ures_close(rb); - delete ns; - if (U_FAILURE(ec)) { + // If OOM occurred during the above code, then we want to report that back to the caller. + if (ec == U_MEMORY_ALLOCATION_ERROR) { + status = ec; + } return; } - UResourceBundle *currRb = ures_open(U_ICUDATA_CURR, loc.getName(), &ec); - UResourceBundle *currencyRes = ures_getByKeyWithFallback(currRb, gCurrUnitPtnTag, NULL, &ec); + LocalUResourceBundlePointer currRb(ures_open(U_ICUDATA_CURR, loc.getName(), &ec)); + LocalUResourceBundlePointer currencyRes(ures_getByKeyWithFallback(currRb.getAlias(), gCurrUnitPtnTag, nullptr, &ec)); #ifdef CURRENCY_PLURAL_INFO_DEBUG std::cout << "in set up\n"; #endif - StringEnumeration* keywords = fPluralRules->getKeywords(ec); + LocalPointer keywords(fPluralRules->getKeywords(ec), ec); if (U_SUCCESS(ec)) { const char* pluralCount; - while ((pluralCount = keywords->next(NULL, ec)) != NULL) { - if ( U_SUCCESS(ec) ) { - int32_t ptnLen; - UErrorCode err = U_ZERO_ERROR; - const UChar* patternChars = ures_getStringByKeyWithFallback( - currencyRes, pluralCount, &ptnLen, &err); - if (U_SUCCESS(err) && ptnLen > 0) { - UnicodeString* pattern = new UnicodeString(patternChars, ptnLen); + while (((pluralCount = keywords->next(nullptr, ec)) != nullptr) && U_SUCCESS(ec)) { + int32_t ptnLength; + UErrorCode err = U_ZERO_ERROR; + const UChar* patternChars = ures_getStringByKeyWithFallback(currencyRes.getAlias(), pluralCount, &ptnLength, &err); + if (err == U_MEMORY_ALLOCATION_ERROR || patternChars == nullptr) { + ec = err; + break; + } + if (U_SUCCESS(err) && ptnLength > 0) { + UnicodeString* pattern = new UnicodeString(patternChars, ptnLength); + if (pattern == nullptr) { + ec = U_MEMORY_ALLOCATION_ERROR; + break; + } #ifdef CURRENCY_PLURAL_INFO_DEBUG - char result_1[1000]; - pattern->extract(0, pattern->length(), result_1, "UTF-8"); - std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n"; + char result_1[1000]; + pattern->extract(0, pattern->length(), result_1, "UTF-8"); + std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n"; #endif - pattern->findAndReplace(UnicodeString(TRUE, gPart0, 3), - UnicodeString(numberStylePattern, numberStylePatternLen)); - pattern->findAndReplace(UnicodeString(TRUE, gPart1, 3), UnicodeString(TRUE, gTripleCurrencySign, 3)); - - if (hasSeparator) { - UnicodeString negPattern(patternChars, ptnLen); - negPattern.findAndReplace(UnicodeString(TRUE, gPart0, 3), - UnicodeString(negNumberStylePattern, negNumberStylePatternLen)); - negPattern.findAndReplace(UnicodeString(TRUE, gPart1, 3), UnicodeString(TRUE, gTripleCurrencySign, 3)); - pattern->append(gNumberPatternSeparator); - pattern->append(negPattern); - } + pattern->findAndReplace(UnicodeString(TRUE, gPart0, 3), + UnicodeString(numberStylePattern, numberStylePatternLen)); + pattern->findAndReplace(UnicodeString(TRUE, gPart1, 3), UnicodeString(TRUE, gTripleCurrencySign, 3)); + + if (hasSeparator) { + UnicodeString negPattern(patternChars, ptnLength); + negPattern.findAndReplace(UnicodeString(TRUE, gPart0, 3), + UnicodeString(negNumberStylePattern, negNumberStylePatternLen)); + negPattern.findAndReplace(UnicodeString(TRUE, gPart1, 3), UnicodeString(TRUE, gTripleCurrencySign, 3)); + pattern->append(gNumberPatternSeparator); + pattern->append(negPattern); + } #ifdef CURRENCY_PLURAL_INFO_DEBUG - pattern->extract(0, pattern->length(), result_1, "UTF-8"); - std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n"; + pattern->extract(0, pattern->length(), result_1, "UTF-8"); + std::cout << "pluralCount: " << pluralCount << "; pattern: " << result_1 << "\n"; #endif - - fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount, -1, US_INV), pattern, status); - } + // the 'pattern' object allocated above will be owned by the fPluralCountToCurrencyUnitPattern after the call to + // put(), even if the method returns failure. + fPluralCountToCurrencyUnitPattern->put(UnicodeString(pluralCount, -1, US_INV), pattern, status); } } } - delete keywords; - ures_close(currencyRes); - ures_close(currRb); + // If OOM occurred during the above code, then we want to report that back to the caller. + if (ec == U_MEMORY_ALLOCATION_ERROR) { + status = ec; + } } - - void -CurrencyPluralInfo::deleteHash(Hashtable* hTable) -{ - if ( hTable == NULL ) { +CurrencyPluralInfo::deleteHash(Hashtable* hTable) { + if ( hTable == nullptr ) { return; } int32_t pos = UHASH_FIRST; - const UHashElement* element = NULL; - while ( (element = hTable->nextElement(pos)) != NULL ) { + const UHashElement* element = nullptr; + while ( (element = hTable->nextElement(pos)) != nullptr ) { const UHashTok valueTok = element->value; const UnicodeString* value = (UnicodeString*)valueTok.pointer; delete value; } delete hTable; - hTable = NULL; + hTable = nullptr; } - Hashtable* CurrencyPluralInfo::initHash(UErrorCode& status) { - if ( U_FAILURE(status) ) { - return NULL; - } - Hashtable* hTable; - if ( (hTable = new Hashtable(TRUE, status)) == NULL ) { - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; + if (U_FAILURE(status)) { + return nullptr; } - if ( U_FAILURE(status) ) { - delete hTable; - return NULL; + LocalPointer hTable(new Hashtable(TRUE, status), status); + if (U_FAILURE(status)) { + return nullptr; } hTable->setValueComparator(ValueComparator); - return hTable; + return hTable.orphan(); } - void CurrencyPluralInfo::copyHash(const Hashtable* source, Hashtable* target, UErrorCode& status) { - if ( U_FAILURE(status) ) { + if (U_FAILURE(status)) { return; } int32_t pos = UHASH_FIRST; - const UHashElement* element = NULL; - if ( source ) { - while ( (element = source->nextElement(pos)) != NULL ) { + const UHashElement* element = nullptr; + if (source) { + while ( (element = source->nextElement(pos)) != nullptr ) { const UHashTok keyTok = element->key; const UnicodeString* key = (UnicodeString*)keyTok.pointer; const UHashTok valueTok = element->value; const UnicodeString* value = (UnicodeString*)valueTok.pointer; - UnicodeString* copy = new UnicodeString(*value); - target->put(UnicodeString(*key), copy, status); - if ( U_FAILURE(status) ) { + LocalPointer copy(new UnicodeString(*value), status); + if (U_FAILURE(status)) { + return; + } + // The HashTable owns the 'copy' object after the call to put(). + target->put(UnicodeString(*key), copy.orphan(), status); + if (U_FAILURE(status)) { return; } } } } - U_NAMESPACE_END #endif diff --git a/deps/icu-small/source/i18n/currunit.cpp b/deps/icu-small/source/i18n/currunit.cpp index 6d8d1cd6c6f97f..2ece508751bcec 100644 --- a/deps/icu-small/source/i18n/currunit.cpp +++ b/deps/icu-small/source/i18n/currunit.cpp @@ -27,9 +27,14 @@ CurrencyUnit::CurrencyUnit(ConstChar16Ptr _isoCode, UErrorCode& ec) { // The constructor always leaves the CurrencyUnit in a valid state (with a 3-character currency code). // Note: in ICU4J Currency.getInstance(), we check string length for 3, but in ICU4C we allow a // non-NUL-terminated string to be passed as an argument, so it is not possible to check length. + // However, we allow a NUL-terminated empty string, which should have the same behavior as nullptr. + // Consider NUL-terminated strings of length 1 or 2 as invalid. const char16_t* isoCodeToUse; - if (U_FAILURE(ec) || _isoCode == nullptr) { + if (U_FAILURE(ec) || _isoCode == nullptr || _isoCode[0] == 0) { isoCodeToUse = kDefaultCurrency; + } else if (_isoCode[1] == 0 || _isoCode[2] == 0) { + isoCodeToUse = kDefaultCurrency; + ec = U_ILLEGAL_ARGUMENT_ERROR; } else if (!uprv_isInvariantUString(_isoCode, 3)) { // TODO: Perform a more strict ASCII check like in ICU4J isAlpha3Code? isoCodeToUse = kDefaultCurrency; diff --git a/deps/icu-small/source/i18n/dcfmtsym.cpp b/deps/icu-small/source/i18n/dcfmtsym.cpp index 5a432aec8e4df0..04113785f27308 100644 --- a/deps/icu-small/source/i18n/dcfmtsym.cpp +++ b/deps/icu-small/source/i18n/dcfmtsym.cpp @@ -436,7 +436,7 @@ DecimalFormatSymbols::initialize(const Locale& loc, UErrorCode& status, sink.resolveMissingMonetarySeparators(fSymbols); // Resolve codePointZero - UChar32 tempCodePointZero; + UChar32 tempCodePointZero = -1; for (int32_t i=0; i<=9; i++) { const UnicodeString& stringDigit = getConstDigitSymbol(i); if (stringDigit.countChar32() != 1) { diff --git a/deps/icu-small/source/i18n/decimfmt.cpp b/deps/icu-small/source/i18n/decimfmt.cpp index a2638bb74298e3..edd8910d9d4fd6 100644 --- a/deps/icu-small/source/i18n/decimfmt.cpp +++ b/deps/icu-small/source/i18n/decimfmt.cpp @@ -1057,12 +1057,19 @@ UBool DecimalFormat::areSignificantDigitsUsed() const { void DecimalFormat::setSignificantDigitsUsed(UBool useSignificantDigits) { // These are the default values from the old implementation. + if (useSignificantDigits) { + if (fields->properties->minimumSignificantDigits != -1 || + fields->properties->maximumSignificantDigits != -1) { + return; + } + } else { + if (fields->properties->minimumSignificantDigits == -1 && + fields->properties->maximumSignificantDigits == -1) { + return; + } + } int32_t minSig = useSignificantDigits ? 1 : -1; int32_t maxSig = useSignificantDigits ? 6 : -1; - if (fields->properties->minimumSignificantDigits == minSig && - fields->properties->maximumSignificantDigits == maxSig) { - return; - } fields->properties->minimumSignificantDigits = minSig; fields->properties->maximumSignificantDigits = maxSig; touchNoError(); @@ -1175,7 +1182,12 @@ void DecimalFormat::setPropertiesFromPattern(const UnicodeString& pattern, int32 } const numparse::impl::NumberParserImpl* DecimalFormat::getParser(UErrorCode& status) const { - if (U_FAILURE(status)) { return nullptr; } + // TODO: Move this into umutex.h? (similar logic also in numrange_fluent.cpp) + // See ICU-20146 + + if (U_FAILURE(status)) { + return nullptr; + } // First try to get the pre-computed parser auto* ptr = fields->atomicParser.load(); @@ -1185,13 +1197,17 @@ const numparse::impl::NumberParserImpl* DecimalFormat::getParser(UErrorCode& sta // Try computing the parser on our own auto* temp = NumberParserImpl::createParserFromProperties(*fields->properties, *fields->symbols, false, status); + if (U_FAILURE(status)) { + return nullptr; + } if (temp == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; - // although we may still dereference, call sites should be guarded + return nullptr; } - // Note: ptr starts as nullptr; during compare_exchange, it is set to what is actually stored in the - // atomic if another thread beat us to computing the parser object. + // Note: ptr starts as nullptr; during compare_exchange, + // it is set to what is actually stored in the atomic + // if another thread beat us to computing the parser object. auto* nonConstThis = const_cast(this); if (!nonConstThis->fields->atomicParser.compare_exchange_strong(ptr, temp)) { // Another thread beat us to computing the parser diff --git a/deps/icu-small/source/i18n/dtfmtsym.cpp b/deps/icu-small/source/i18n/dtfmtsym.cpp index af421d41c72e61..c9dfa045831d60 100644 --- a/deps/icu-small/source/i18n/dtfmtsym.cpp +++ b/deps/icu-small/source/i18n/dtfmtsym.cpp @@ -1311,7 +1311,7 @@ DateFormatSymbols::initZoneStringsArray(void) { UDate now = Calendar::getNow(); UnicodeString tzDispName; - while ((tzid = tzids->snext(status))) { + while ((tzid = tzids->snext(status)) != 0) { if (U_FAILURE(status)) { break; } @@ -2224,8 +2224,8 @@ DateFormatSymbols::initializeData(const Locale& locale, const char *type, UError ++typeMapPtr; } if (typeMapPtr->usageTypeName != NULL && compResult == 0) { - fCapitalization[typeMapPtr->usageTypeEnumValue][0] = intVector[0]; - fCapitalization[typeMapPtr->usageTypeEnumValue][1] = intVector[1]; + fCapitalization[typeMapPtr->usageTypeEnumValue][0] = static_cast(intVector[0]); + fCapitalization[typeMapPtr->usageTypeEnumValue][1] = static_cast(intVector[1]); } } } diff --git a/deps/icu-small/source/i18n/dtitvfmt.cpp b/deps/icu-small/source/i18n/dtitvfmt.cpp index 743b534fc8f9cf..d952cbf509dc40 100644 --- a/deps/icu-small/source/i18n/dtitvfmt.cpp +++ b/deps/icu-small/source/i18n/dtitvfmt.cpp @@ -877,8 +877,7 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, if ( MCount < 3 ) { normalizedDateSkeleton.append(CAP_M); } else { - int32_t i; - for ( i = 0; i < MCount && i < MAX_M_COUNT; ++i ) { + for ( int32_t j = 0; j < MCount && j < MAX_M_COUNT; ++j) { normalizedDateSkeleton.append(CAP_M); } } @@ -887,8 +886,7 @@ DateIntervalFormat::getDateTimeSkeleton(const UnicodeString& skeleton, if ( ECount <= 3 ) { normalizedDateSkeleton.append(CAP_E); } else { - int32_t i; - for ( i = 0; i < ECount && i < MAX_E_COUNT; ++i ) { + for ( int32_t j = 0; j < ECount && j < MAX_E_COUNT; ++j ) { normalizedDateSkeleton.append(CAP_E); } } diff --git a/deps/icu-small/source/i18n/dtitvinf.cpp b/deps/icu-small/source/i18n/dtitvinf.cpp index c863a683a5c2f8..a289fc79c8da0e 100644 --- a/deps/icu-small/source/i18n/dtitvinf.cpp +++ b/deps/icu-small/source/i18n/dtitvinf.cpp @@ -594,7 +594,7 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton, const UHashElement* elem = NULL; while ( (elem = fIntervalPatterns->nextElement(pos)) != NULL ) { const UHashTok keyTok = elem->key; - UnicodeString* skeleton = (UnicodeString*)keyTok.pointer; + UnicodeString* newSkeleton = (UnicodeString*)keyTok.pointer; #ifdef DTITVINF_DEBUG skeleton->extract(0, skeleton->length(), result, "UTF-8"); sprintf(mesg, "available skeletons: skeleton: %s; \n", result); @@ -606,7 +606,7 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton, for ( i = 0; i < fieldLength; ++i ) { skeletonFieldWidth[i] = 0; } - parseSkeleton(*skeleton, skeletonFieldWidth); + parseSkeleton(*newSkeleton, skeletonFieldWidth); // calculate distance int32_t distance = 0; int8_t fieldDifference = 1; @@ -632,7 +632,7 @@ DateIntervalInfo::getBestSkeleton(const UnicodeString& skeleton, } } if ( distance < bestDistance ) { - bestSkeleton = skeleton; + bestSkeleton = newSkeleton; bestDistance = distance; bestMatchDistanceInfo = fieldDifference; } diff --git a/deps/icu-small/source/i18n/dtptngen.cpp b/deps/icu-small/source/i18n/dtptngen.cpp index aefd70464eb2d8..b44daf37bf5802 100644 --- a/deps/icu-small/source/i18n/dtptngen.cpp +++ b/deps/icu-small/source/i18n/dtptngen.cpp @@ -18,6 +18,7 @@ #include "unicode/decimfmt.h" #include "unicode/dtfmtsym.h" #include "unicode/dtptngen.h" +#include "unicode/localpointer.h" #include "unicode/simpleformatter.h" #include "unicode/smpdtfmt.h" #include "unicode/udat.h" @@ -88,17 +89,17 @@ static void ures_a_open(UResourceBundleAIterator *aiter, UResourceBundle *bund, aiter->num = ures_getSize(aiter->bund); aiter->cursor = 0; #if !defined(U_SORT_ASCII_BUNDLE_ITERATOR) - aiter->entries = NULL; + aiter->entries = nullptr; #else aiter->entries = (UResAEntry*)uprv_malloc(sizeof(UResAEntry)*aiter->num); for(int i=0;inum;i++) { - aiter->entries[i].item = ures_getByIndex(aiter->bund, i, NULL, status); + aiter->entries[i].item = ures_getByIndex(aiter->bund, i, nullptr, status); const char *akey = ures_getKey(aiter->entries[i].item); int32_t len = uprv_strlen(akey)+1; aiter->entries[i].key = (UChar*)uprv_malloc(len*sizeof(UChar)); u_charsToUChars(akey, aiter->entries[i].key, len); } - uprv_sortArray(aiter->entries, aiter->num, sizeof(UResAEntry), ures_a_codepointSort, NULL, TRUE, status); + uprv_sortArray(aiter->entries, aiter->num, sizeof(UResAEntry), ures_a_codepointSort, nullptr, TRUE, status); #endif } @@ -115,7 +116,7 @@ static const UChar *ures_a_getNextString(UResourceBundleAIterator *aiter, int32_ #if !defined(U_SORT_ASCII_BUNDLE_ITERATOR) return ures_getNextString(aiter->bund, len, key, err); #else - if(U_FAILURE(*err)) return NULL; + if(U_FAILURE(*err)) return nullptr; UResourceBundle *item = aiter->entries[aiter->cursor].item; const UChar* ret = ures_getString(item, len, err); *key = ures_getKey(item); @@ -302,49 +303,48 @@ DateTimePatternGenerator::createInstance(UErrorCode& status) { DateTimePatternGenerator* U_EXPORT2 DateTimePatternGenerator::createInstance(const Locale& locale, UErrorCode& status) { if (U_FAILURE(status)) { - return NULL; + return nullptr; } LocalPointer result( new DateTimePatternGenerator(locale, status), status); - return U_SUCCESS(status) ? result.orphan() : NULL; + return U_SUCCESS(status) ? result.orphan() : nullptr; } DateTimePatternGenerator* U_EXPORT2 DateTimePatternGenerator::createEmptyInstance(UErrorCode& status) { - DateTimePatternGenerator *result = new DateTimePatternGenerator(status); - if (result == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - } if (U_FAILURE(status)) { - delete result; - result = NULL; + return nullptr; } - return result; + LocalPointer result( + new DateTimePatternGenerator(status), status); + return U_SUCCESS(status) ? result.orphan() : nullptr; } DateTimePatternGenerator::DateTimePatternGenerator(UErrorCode &status) : - skipMatcher(NULL), - fAvailableFormatKeyHash(NULL) + skipMatcher(nullptr), + fAvailableFormatKeyHash(nullptr), + internalErrorCode(U_ZERO_ERROR) { fp = new FormatParser(); dtMatcher = new DateTimeMatcher(); distanceInfo = new DistanceInfo(); patternMap = new PatternMap(); - if (fp == NULL || dtMatcher == NULL || distanceInfo == NULL || patternMap == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; + if (fp == nullptr || dtMatcher == nullptr || distanceInfo == nullptr || patternMap == nullptr) { + internalErrorCode = status = U_MEMORY_ALLOCATION_ERROR; } } DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorCode &status) : - skipMatcher(NULL), - fAvailableFormatKeyHash(NULL) + skipMatcher(nullptr), + fAvailableFormatKeyHash(nullptr), + internalErrorCode(U_ZERO_ERROR) { fp = new FormatParser(); dtMatcher = new DateTimeMatcher(); distanceInfo = new DistanceInfo(); patternMap = new PatternMap(); - if (fp == NULL || dtMatcher == NULL || distanceInfo == NULL || patternMap == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; + if (fp == nullptr || dtMatcher == nullptr || distanceInfo == nullptr || patternMap == nullptr) { + internalErrorCode = status = U_MEMORY_ALLOCATION_ERROR; } else { initData(locale, status); @@ -353,13 +353,17 @@ DateTimePatternGenerator::DateTimePatternGenerator(const Locale& locale, UErrorC DateTimePatternGenerator::DateTimePatternGenerator(const DateTimePatternGenerator& other) : UObject(), - skipMatcher(NULL), - fAvailableFormatKeyHash(NULL) + skipMatcher(nullptr), + fAvailableFormatKeyHash(nullptr), + internalErrorCode(U_ZERO_ERROR) { fp = new FormatParser(); dtMatcher = new DateTimeMatcher(); distanceInfo = new DistanceInfo(); patternMap = new PatternMap(); + if (fp == nullptr || dtMatcher == nullptr || distanceInfo == nullptr || patternMap == nullptr) { + internalErrorCode = U_MEMORY_ALLOCATION_ERROR; + } *this=other; } @@ -369,6 +373,7 @@ DateTimePatternGenerator::operator=(const DateTimePatternGenerator& other) { if (&other == this) { return *this; } + internalErrorCode = other.internalErrorCode; pLocale = other.pLocale; fDefaultHourFormatChar = other.fDefaultHourFormatChar; *fp = *(other.fp); @@ -380,11 +385,16 @@ DateTimePatternGenerator::operator=(const DateTimePatternGenerator& other) { dateTimeFormat.getTerminatedBuffer(); decimal.getTerminatedBuffer(); delete skipMatcher; - if ( other.skipMatcher == NULL ) { - skipMatcher = NULL; + if ( other.skipMatcher == nullptr ) { + skipMatcher = nullptr; } else { skipMatcher = new DateTimeMatcher(*other.skipMatcher); + if (skipMatcher == nullptr) + { + internalErrorCode = U_MEMORY_ALLOCATION_ERROR; + return *this; + } } for (int32_t i=0; i< UDATPG_FIELD_COUNT; ++i ) { appendItemFormats[i] = other.appendItemFormats[i]; @@ -394,9 +404,8 @@ DateTimePatternGenerator::operator=(const DateTimePatternGenerator& other) { fieldDisplayNames[i][j].getTerminatedBuffer(); // NUL-terminate for the C API. } } - UErrorCode status = U_ZERO_ERROR; - patternMap->copyFrom(*other.patternMap, status); - copyHashtable(other.fAvailableFormatKeyHash, status); + patternMap->copyFrom(*other.patternMap, internalErrorCode); + copyHashtable(other.fAvailableFormatKeyHash, internalErrorCode); return *this; } @@ -431,21 +440,21 @@ DateTimePatternGenerator::operator!=(const DateTimePatternGenerator& other) cons } DateTimePatternGenerator::~DateTimePatternGenerator() { - if (fAvailableFormatKeyHash!=NULL) { + if (fAvailableFormatKeyHash!=nullptr) { delete fAvailableFormatKeyHash; } - if (fp != NULL) delete fp; - if (dtMatcher != NULL) delete dtMatcher; - if (distanceInfo != NULL) delete distanceInfo; - if (patternMap != NULL) delete patternMap; - if (skipMatcher != NULL) delete skipMatcher; + if (fp != nullptr) delete fp; + if (dtMatcher != nullptr) delete dtMatcher; + if (distanceInfo != nullptr) delete distanceInfo; + if (patternMap != nullptr) delete patternMap; + if (skipMatcher != nullptr) delete skipMatcher; } namespace { UInitOnce initOnce = U_INITONCE_INITIALIZER; -UHashtable *localeToAllowedHourFormatsMap = NULL; +UHashtable *localeToAllowedHourFormatsMap = nullptr; // Value deleter for hashmap. U_CFUNC void U_CALLCONV deleteAllowedHourFormats(void *ptr) { @@ -474,8 +483,8 @@ void DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status) { //const char *baseLangName = locale.getBaseName(); // unused - skipMatcher = NULL; - fAvailableFormatKeyHash=NULL; + skipMatcher = nullptr; + fAvailableFormatKeyHash=nullptr; addCanonicalItems(status); addICUPatterns(locale, status); addCLDRData(locale, status); @@ -483,6 +492,8 @@ DateTimePatternGenerator::initData(const Locale& locale, UErrorCode &status) { setDecimalSymbols(locale, status); umtx_initOnce(initOnce, loadAllowedHourFormatsData, status); getAllowedHourFormats(locale, status); + // If any of the above methods failed then the object is in an invalid state. + internalErrorCode = status; } // DateTimePatternGenerator::initData namespace { @@ -505,7 +516,7 @@ struct AllowedHourFormatsSink : public ResourceSink { LocalMemory list; int32_t length; if (value.getType() == URES_STRING) { - if (list.allocateInsteadAndReset(2) == NULL) { + if (list.allocateInsteadAndReset(2) == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return; } @@ -515,7 +526,7 @@ struct AllowedHourFormatsSink : public ResourceSink { else { ResourceArray allowedFormats = value.getArray(errorCode); length = allowedFormats.getSize(); - if (list.allocateInsteadAndReset(length + 1) == NULL) { + if (list.allocateInsteadAndReset(length + 1) == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return; } @@ -555,9 +566,14 @@ AllowedHourFormatsSink::~AllowedHourFormatsSink() {} U_CFUNC void U_CALLCONV DateTimePatternGenerator::loadAllowedHourFormatsData(UErrorCode &status) { if (U_FAILURE(status)) { return; } localeToAllowedHourFormatsMap = uhash_open( - uhash_hashChars, uhash_compareChars, NULL, &status); + uhash_hashChars, uhash_compareChars, nullptr, &status); + if (U_FAILURE(status)) { return; } + uhash_setValueDeleter(localeToAllowedHourFormatsMap, deleteAllowedHourFormats); - LocalUResourceBundlePointer rb(ures_openDirect(NULL, "supplementalData", &status)); + ucln_i18n_registerCleanup(UCLN_I18N_ALLOWED_HOUR_FORMATS, allowedHourFormatsCleanup); + + LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "supplementalData", &status)); + if (U_FAILURE(status)) { return; } AllowedHourFormatsSink sink; // TODO: Currently in the enumeration each table allocates a new array. @@ -567,8 +583,6 @@ U_CFUNC void U_CALLCONV DateTimePatternGenerator::loadAllowedHourFormatsData(UEr // vector (at index enum*2) for easy data sharing, copy sub-arrays into runtime // object. Remember to clean up the vector, too. ures_getAllItemsWithFallback(rb.getAlias(), "timeData", sink, status); - - ucln_i18n_registerCleanup(UCLN_I18N_ALLOWED_HOUR_FORMATS, allowedHourFormatsCleanup); } void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErrorCode &status) { @@ -589,17 +603,17 @@ void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErro const char *language = maxLocale.getLanguage(); CharString langCountry; - langCountry.append(language, uprv_strlen(language), status); + langCountry.append(language, static_cast(uprv_strlen(language)), status); langCountry.append('_', status); - langCountry.append(country, uprv_strlen(country), status); + langCountry.append(country, static_cast(uprv_strlen(country)), status); int32_t *allowedFormats; allowedFormats = (int32_t *)uhash_get(localeToAllowedHourFormatsMap, langCountry.data()); - if (allowedFormats == NULL) { + if (allowedFormats == nullptr) { allowedFormats = (int32_t *)uhash_get(localeToAllowedHourFormatsMap, const_cast(country)); } - if (allowedFormats != NULL) { // Lookup is successful + if (allowedFormats != nullptr) { // Lookup is successful for (int32_t i = 0; i < UPRV_LENGTHOF(fAllowedHourFormats); ++i) { fAllowedHourFormats[i] = allowedFormats[i]; if (allowedFormats[i] == ALLOWED_HOUR_FORMAT_UNKNOWN) { @@ -615,10 +629,10 @@ void DateTimePatternGenerator::getAllowedHourFormats(const Locale &locale, UErro UnicodeString DateTimePatternGenerator::getSkeleton(const UnicodeString& pattern, UErrorCode& /*status*/) { - FormatParser fp; + FormatParser fp2; DateTimeMatcher matcher; PtnSkeleton localSkeleton; - matcher.set(pattern, &fp, localSkeleton); + matcher.set(pattern, &fp2, localSkeleton); return localSkeleton.getSkeleton(); } @@ -634,10 +648,10 @@ DateTimePatternGenerator::staticGetSkeleton( UnicodeString DateTimePatternGenerator::getBaseSkeleton(const UnicodeString& pattern, UErrorCode& /*status*/) { - FormatParser fp; + FormatParser fp2; DateTimeMatcher matcher; PtnSkeleton localSkeleton; - matcher.set(pattern, &fp, localSkeleton); + matcher.set(pattern, &fp2, localSkeleton); return localSkeleton.getBaseSkeleton(); } @@ -663,7 +677,7 @@ DateTimePatternGenerator::addICUPatterns(const Locale& locale, UErrorCode& statu DateFormat::EStyle style = (DateFormat::EStyle)i; df = DateFormat::createDateInstance(style, locale); SimpleDateFormat* sdf; - if (df != NULL && (sdf = dynamic_cast(df)) != NULL) { + if (df != nullptr && (sdf = dynamic_cast(df)) != nullptr) { sdf->toPattern(dfPattern); addPattern(dfPattern, FALSE, conflictingString, status); } @@ -672,7 +686,7 @@ DateTimePatternGenerator::addICUPatterns(const Locale& locale, UErrorCode& statu if (U_FAILURE(status)) { return; } df = DateFormat::createTimeInstance(style, locale); - if (df != NULL && (sdf = dynamic_cast(df)) != NULL) { + if (df != nullptr && (sdf = dynamic_cast(df)) != nullptr) { sdf->toPattern(dfPattern); addPattern(dfPattern, FALSE, conflictingString, status); @@ -747,13 +761,14 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& ures_getFunctionalEquivalent( localeWithCalendarKey, ULOC_LOCALE_IDENTIFIER_CAPACITY, - NULL, + nullptr, "calendar", "calendar", locale.getName(), - NULL, + nullptr, FALSE, &err); + if (U_FAILURE(err)) { return; } localeWithCalendarKey[ULOC_LOCALE_IDENTIFIER_CAPACITY-1] = 0; // ensure null termination // now get the calendar key value from that locale char calendarType[ULOC_KEYWORDS_CAPACITY]; @@ -763,7 +778,8 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& calendarType, ULOC_KEYWORDS_CAPACITY, &err); - if (U_SUCCESS(err) && calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { + if (U_FAILURE(err)) { return; } + if (calendarTypeLen < ULOC_KEYWORDS_CAPACITY) { destination.clear().append(calendarType, -1, err); if (U_FAILURE(err)) { return; } } @@ -774,7 +790,7 @@ DateTimePatternGenerator::getCalendarTypeToUse(const Locale& locale, CharString& void DateTimePatternGenerator::consumeShortTimePattern(const UnicodeString& shortTimePattern, UErrorCode& status) { - + if (U_FAILURE(status)) { return; } // set fDefaultHourFormatChar to the hour format character from this pattern int32_t tfIdx, tfLen = shortTimePattern.length(); UBool ignoreChars = FALSE; @@ -782,7 +798,7 @@ DateTimePatternGenerator::consumeShortTimePattern(const UnicodeString& shortTime UChar tfChar = shortTimePattern.charAt(tfIdx); if ( tfChar == SINGLE_QUOTE ) { ignoreChars = !ignoreChars; // toggle (handle quoted literals & '' for single quote) - } else if ( !ignoreChars && u_strchr(hourFormatChars, tfChar) != NULL ) { + } else if ( !ignoreChars && u_strchr(hourFormatChars, tfChar) != nullptr ) { fDefaultHourFormatChar = tfChar; break; } @@ -872,9 +888,9 @@ struct DateTimePatternGenerator::AppendItemNamesSink : public ResourceSink { valueStr.getTerminatedBuffer(); } for (int32_t j = 1; j < UDATPG_WIDTH_COUNT; j++) { - UnicodeString& valueStr = dtpg.getMutableFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)j); - if (valueStr.isEmpty()) { - valueStr = dtpg.getFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)(j-1)); + UnicodeString& valueStr2 = dtpg.getMutableFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)j); + if (valueStr2.isEmpty()) { + valueStr2 = dtpg.getFieldDisplayName((UDateTimePatternField)i, (UDateTimePGDisplayWidth)(j-1)); } } } @@ -921,7 +937,7 @@ DateTimePatternGenerator::addCLDRData(const Locale& locale, UErrorCode& errorCod UnicodeString rbPattern, value, field; CharString path; - LocalUResourceBundlePointer rb(ures_open(NULL, locale.getName(), &errorCode)); + LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &errorCode)); if (U_FAILURE(errorCode)) { return; } CharString calendarTypeToUse; // to be filled in with the type to use, if all goes well @@ -966,12 +982,13 @@ DateTimePatternGenerator::addCLDRData(const Locale& locale, UErrorCode& errorCod void DateTimePatternGenerator::initHashtable(UErrorCode& err) { - if (fAvailableFormatKeyHash!=NULL) { + if (U_FAILURE(err)) { return; } + if (fAvailableFormatKeyHash!=nullptr) { return; } - if ((fAvailableFormatKeyHash = new Hashtable(FALSE, err))==NULL) { - err=U_MEMORY_ALLOCATION_ERROR; - return; + LocalPointer hash(new Hashtable(FALSE, err), err); + if (U_SUCCESS(err)) { + fAvailableFormatKeyHash = hash.orphan(); } } @@ -1028,7 +1045,14 @@ DateTimePatternGenerator::getBestPattern(const UnicodeString& patternForm, UErro UnicodeString DateTimePatternGenerator::getBestPattern(const UnicodeString& patternForm, UDateTimePatternMatchOptions options, UErrorCode& status) { - const UnicodeString *bestPattern=NULL; + if (U_FAILURE(status)) { + return UnicodeString(); + } + if (U_FAILURE(internalErrorCode)) { + status = internalErrorCode; + return UnicodeString(); + } + const UnicodeString *bestPattern = nullptr; UnicodeString dtFormat; UnicodeString resultPattern; int32_t flags = kDTPGNoFlags; @@ -1044,16 +1068,23 @@ DateTimePatternGenerator::getBestPattern(const UnicodeString& patternForm, UDate resultPattern.remove(); dtMatcher->set(patternFormMapped, fp); - const PtnSkeleton* specifiedSkeleton=NULL; - bestPattern=getBestRaw(*dtMatcher, -1, distanceInfo, &specifiedSkeleton); + const PtnSkeleton* specifiedSkeleton = nullptr; + bestPattern=getBestRaw(*dtMatcher, -1, distanceInfo, status, &specifiedSkeleton); + if (U_FAILURE(status)) { + return UnicodeString(); + } + if ( distanceInfo->missingFieldMask==0 && distanceInfo->extraFieldMask==0 ) { resultPattern = adjustFieldTypes(*bestPattern, specifiedSkeleton, flags, options); return resultPattern; } int32_t neededFields = dtMatcher->getFieldMask(); - UnicodeString datePattern=getBestAppending(neededFields & dateMask, flags, options); - UnicodeString timePattern=getBestAppending(neededFields & timeMask, flags, options); + UnicodeString datePattern=getBestAppending(neededFields & dateMask, flags, status, options); + UnicodeString timePattern=getBestAppending(neededFields & timeMask, flags, status, options); + if (U_FAILURE(status)) { + return UnicodeString(); + } if (datePattern.length()==0) { if (timePattern.length()==0) { resultPattern.remove(); @@ -1074,7 +1105,7 @@ DateTimePatternGenerator::getBestPattern(const UnicodeString& patternForm, UDate /* * Map a skeleton that may have metacharacters jJC to one without, by replacing - * the metacharacters with locale-appropriate fields of of h/H/k/K and of a/b/B + * the metacharacters with locale-appropriate fields of h/H/k/K and of a/b/B * (depends on fDefaultHourFormatChar and fAllowedHourFormats being set, which in * turn depends on initData having been run). This method also updates the flags * as necessary. Returns the updated skeleton. @@ -1159,9 +1190,16 @@ UnicodeString DateTimePatternGenerator::replaceFieldTypes(const UnicodeString& pattern, const UnicodeString& skeleton, UDateTimePatternMatchOptions options, - UErrorCode& /*status*/) { + UErrorCode& status) { + if (U_FAILURE(status)) { + return UnicodeString(); + } + if (U_FAILURE(internalErrorCode)) { + status = internalErrorCode; + return UnicodeString(); + } dtMatcher->set(skeleton, fp); - UnicodeString result = adjustFieldTypes(pattern, NULL, kDTPGNoFlags, options); + UnicodeString result = adjustFieldTypes(pattern, nullptr, kDTPGNoFlags, options); return result; } @@ -1204,20 +1242,24 @@ DateTimePatternGenerator::getDateTimeFormat() const { void DateTimePatternGenerator::setDateTimeFromCalendar(const Locale& locale, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + const UChar *resStr; int32_t resStrLen = 0; - Calendar* fCalendar = Calendar::createInstance(locale, status); + LocalPointer fCalendar(Calendar::createInstance(locale, status), status); if (U_FAILURE(status)) { return; } - LocalUResourceBundlePointer calData(ures_open(NULL, locale.getBaseName(), &status)); + LocalUResourceBundlePointer calData(ures_open(nullptr, locale.getBaseName(), &status)); + if (U_FAILURE(status)) { return; } ures_getByKey(calData.getAlias(), DT_DateTimeCalendarTag, calData.getAlias(), &status); + if (U_FAILURE(status)) { return; } LocalUResourceBundlePointer dateTimePatterns; - if (fCalendar != NULL && fCalendar->getType() != NULL && *fCalendar->getType() != '\0' + if (fCalendar->getType() != nullptr && *fCalendar->getType() != '\0' && uprv_strcmp(fCalendar->getType(), DT_DateTimeGregorianTag) != 0) { dateTimePatterns.adoptInstead(ures_getByKeyWithFallback(calData.getAlias(), fCalendar->getType(), - NULL, &status)); + nullptr, &status)); ures_getByKeyWithFallback(dateTimePatterns.getAlias(), DT_DateTimePatternsTag, dateTimePatterns.getAlias(), &status); } @@ -1238,8 +1280,6 @@ DateTimePatternGenerator::setDateTimeFromCalendar(const Locale& locale, UErrorCo } resStr = ures_getStringByIndex(dateTimePatterns.getAlias(), (int32_t)DateFormat::kDateTime, &resStrLen, &status); setDateTimeFormat(UnicodeString(TRUE, resStr, resStrLen)); - - delete fCalendar; } void @@ -1259,7 +1299,12 @@ DateTimePatternGenerator::addPattern( UnicodeString &conflictingPattern, UErrorCode& status) { - return addPatternWithSkeleton(pattern, NULL, override, conflictingPattern, status); + if (U_FAILURE(internalErrorCode)) { + status = internalErrorCode; + return UDATPG_NO_CONFLICT; + } + + return addPatternWithSkeleton(pattern, nullptr, override, conflictingPattern, status); } // For DateTimePatternGenerator::addPatternWithSkeleton - @@ -1280,13 +1325,17 @@ DateTimePatternGenerator::addPatternWithSkeleton( UnicodeString& conflictingPattern, UErrorCode& status) { + if (U_FAILURE(internalErrorCode)) { + status = internalErrorCode; + return UDATPG_NO_CONFLICT; + } UnicodeString basePattern; PtnSkeleton skeleton; UDateTimePatternConflict conflictingStatus = UDATPG_NO_CONFLICT; DateTimeMatcher matcher; - if ( skeletonToUse == NULL ) { + if ( skeletonToUse == nullptr ) { matcher.set(pattern, fp, skeleton); matcher.getBasePattern(basePattern); } else { @@ -1302,7 +1351,7 @@ DateTimePatternGenerator::addPatternWithSkeleton( // availableFormats items from root, which should not override any previous entry with the same base. UBool entryHadSpecifiedSkeleton; const UnicodeString *duplicatePattern = patternMap->getPatternFromBasePattern(basePattern, entryHadSpecifiedSkeleton); - if (duplicatePattern != NULL && (!entryHadSpecifiedSkeleton || (skeletonToUse != NULL && !override))) { + if (duplicatePattern != nullptr && (!entryHadSpecifiedSkeleton || (skeletonToUse != nullptr && !override))) { conflictingStatus = UDATPG_BASE_CONFLICT; conflictingPattern = *duplicatePattern; if (!override) { @@ -1313,16 +1362,16 @@ DateTimePatternGenerator::addPatternWithSkeleton( // items from CLDR data. In that case, we don't want an item from a parent locale to replace an item with // same skeleton from the specified locale, so skip the current item if skeletonWasSpecified is true for // the previously-specified conflicting item. - const PtnSkeleton* entrySpecifiedSkeleton = NULL; + const PtnSkeleton* entrySpecifiedSkeleton = nullptr; duplicatePattern = patternMap->getPatternFromSkeleton(skeleton, &entrySpecifiedSkeleton); - if (duplicatePattern != NULL ) { + if (duplicatePattern != nullptr ) { conflictingStatus = UDATPG_CONFLICT; conflictingPattern = *duplicatePattern; - if (!override || (skeletonToUse != NULL && entrySpecifiedSkeleton != NULL)) { + if (!override || (skeletonToUse != nullptr && entrySpecifiedSkeleton != nullptr)) { return conflictingStatus; } } - patternMap->add(basePattern, skeleton, pattern, skeletonToUse != NULL, status); + patternMap->add(basePattern, skeleton, pattern, skeletonToUse != nullptr, status); if(U_FAILURE(status)) { return conflictingStatus; } @@ -1369,13 +1418,16 @@ const UnicodeString* DateTimePatternGenerator::getBestRaw(DateTimeMatcher& source, int32_t includeMask, DistanceInfo* missingFields, + UErrorCode &status, const PtnSkeleton** specifiedSkeletonPtr) { int32_t bestDistance = 0x7fffffff; DistanceInfo tempInfo; - const UnicodeString *bestPattern=NULL; - const PtnSkeleton* specifiedSkeleton=NULL; + const UnicodeString *bestPattern=nullptr; + const PtnSkeleton* specifiedSkeleton=nullptr; + + PatternMapIterator it(status); + if (U_FAILURE(status)) { return nullptr; } - PatternMapIterator it; for (it.set(*patternMap); it.hasNext(); ) { DateTimeMatcher trial = it.next(); if (trial.equals(skipMatcher)) { @@ -1485,8 +1537,8 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern, c = fDefaultHourFormatChar; } field.remove(); - for (int32_t i=adjFieldLen; i>0; --i) { - field+=c; + for (int32_t j=adjFieldLen; j>0; --j) { + field += c; } } newPattern+=field; @@ -1496,14 +1548,21 @@ DateTimePatternGenerator::adjustFieldTypes(const UnicodeString& pattern, } UnicodeString -DateTimePatternGenerator::getBestAppending(int32_t missingFields, int32_t flags, UDateTimePatternMatchOptions options) { +DateTimePatternGenerator::getBestAppending(int32_t missingFields, int32_t flags, UErrorCode &status, UDateTimePatternMatchOptions options) { + if (U_FAILURE(status)) { + return UnicodeString(); + } UnicodeString resultPattern, tempPattern; - UErrorCode err=U_ZERO_ERROR; + const UnicodeString* tempPatternPtr; int32_t lastMissingFieldMask=0; if (missingFields!=0) { resultPattern=UnicodeString(); - const PtnSkeleton* specifiedSkeleton=NULL; - tempPattern = *getBestRaw(*dtMatcher, missingFields, distanceInfo, &specifiedSkeleton); + const PtnSkeleton* specifiedSkeleton=nullptr; + tempPatternPtr = getBestRaw(*dtMatcher, missingFields, distanceInfo, status, &specifiedSkeleton); + if (U_FAILURE(status)) { + return UnicodeString(); + } + tempPattern = *tempPatternPtr; resultPattern = adjustFieldTypes(tempPattern, specifiedSkeleton, flags, options); if ( distanceInfo->missingFieldMask==0 ) { return resultPattern; @@ -1519,19 +1578,26 @@ DateTimePatternGenerator::getBestAppending(int32_t missingFields, int32_t flags, continue; } int32_t startingMask = distanceInfo->missingFieldMask; - tempPattern = *getBestRaw(*dtMatcher, distanceInfo->missingFieldMask, distanceInfo, &specifiedSkeleton); + tempPatternPtr = getBestRaw(*dtMatcher, distanceInfo->missingFieldMask, distanceInfo, status, &specifiedSkeleton); + if (U_FAILURE(status)) { + return UnicodeString(); + } + tempPattern = *tempPatternPtr; tempPattern = adjustFieldTypes(tempPattern, specifiedSkeleton, flags, options); int32_t foundMask=startingMask& ~distanceInfo->missingFieldMask; int32_t topField=getTopBitNumber(foundMask); - UnicodeString appendName; - getAppendName((UDateTimePatternField)topField, appendName); - const UnicodeString *values[3] = { - &resultPattern, - &tempPattern, - &appendName - }; - SimpleFormatter(appendItemFormats[topField], 2, 3, err). - formatAndReplace(values, 3, resultPattern, NULL, 0, err); + + if (appendItemFormats[topField].length() != 0) { + UnicodeString appendName; + getAppendName((UDateTimePatternField)topField, appendName); + const UnicodeString *values[3] = { + &resultPattern, + &tempPattern, + &appendName + }; + SimpleFormatter(appendItemFormats[topField], 2, 3, status). + formatAndReplace(values, 3, resultPattern, nullptr, 0, status); + } lastMissingFieldMask = distanceInfo->missingFieldMask; } } @@ -1539,7 +1605,7 @@ DateTimePatternGenerator::getBestAppending(int32_t missingFields, int32_t flags, } int32_t -DateTimePatternGenerator::getTopBitNumber(int32_t foundMask) { +DateTimePatternGenerator::getTopBitNumber(int32_t foundMask) const { if ( foundMask==0 ) { return 0; } @@ -1568,22 +1634,21 @@ DateTimePatternGenerator::isAvailableFormatSet(const UnicodeString &key) const { void DateTimePatternGenerator::copyHashtable(Hashtable *other, UErrorCode &status) { - - if (other == NULL) { + if (other == nullptr || U_FAILURE(status)) { return; } - if (fAvailableFormatKeyHash != NULL) { + if (fAvailableFormatKeyHash != nullptr) { delete fAvailableFormatKeyHash; - fAvailableFormatKeyHash = NULL; + fAvailableFormatKeyHash = nullptr; } initHashtable(status); if(U_FAILURE(status)){ return; } int32_t pos = UHASH_FIRST; - const UHashElement* elem = NULL; + const UHashElement* elem = nullptr; // walk through the hash table and create a deep clone - while((elem = other->nextElement(pos))!= NULL){ + while((elem = other->nextElement(pos))!= nullptr){ const UHashTok otherKeyTok = elem->key; UnicodeString* otherKey = (UnicodeString*)otherKeyTok.pointer; fAvailableFormatKeyHash->puti(*otherKey, 1, status); @@ -1595,8 +1660,17 @@ DateTimePatternGenerator::copyHashtable(Hashtable *other, UErrorCode &status) { StringEnumeration* DateTimePatternGenerator::getSkeletons(UErrorCode& status) const { - StringEnumeration* skeletonEnumerator = new DTSkeletonEnumeration(*patternMap, DT_SKELETON, status); - return skeletonEnumerator; + if (U_FAILURE(status)) { + return nullptr; + } + if (U_FAILURE(internalErrorCode)) { + status = internalErrorCode; + return nullptr; + } + LocalPointer skeletonEnumerator( + new DTSkeletonEnumeration(*patternMap, DT_SKELETON, status), status); + + return U_SUCCESS(status) ? skeletonEnumerator.orphan() : nullptr; } const UnicodeString& @@ -1607,47 +1681,70 @@ DateTimePatternGenerator::getPatternForSkeleton(const UnicodeString& skeleton) c return emptyString; } curElem = patternMap->getHeader(skeleton.charAt(0)); - while ( curElem != NULL ) { + while ( curElem != nullptr ) { if ( curElem->skeleton->getSkeleton()==skeleton ) { return curElem->pattern; } - curElem=curElem->next; + curElem = curElem->next.getAlias(); } return emptyString; } StringEnumeration* DateTimePatternGenerator::getBaseSkeletons(UErrorCode& status) const { - StringEnumeration* baseSkeletonEnumerator = new DTSkeletonEnumeration(*patternMap, DT_BASESKELETON, status); - return baseSkeletonEnumerator; + if (U_FAILURE(status)) { + return nullptr; + } + if (U_FAILURE(internalErrorCode)) { + status = internalErrorCode; + return nullptr; + } + LocalPointer baseSkeletonEnumerator( + new DTSkeletonEnumeration(*patternMap, DT_BASESKELETON, status), status); + + return U_SUCCESS(status) ? baseSkeletonEnumerator.orphan() : nullptr; } StringEnumeration* DateTimePatternGenerator::getRedundants(UErrorCode& status) { - StringEnumeration* output = new DTRedundantEnumeration(); + if (U_FAILURE(status)) { return nullptr; } + if (U_FAILURE(internalErrorCode)) { + status = internalErrorCode; + return nullptr; + } + LocalPointer output(new DTRedundantEnumeration(), status); + if (U_FAILURE(status)) { return nullptr; } const UnicodeString *pattern; - PatternMapIterator it; + PatternMapIterator it(status); + if (U_FAILURE(status)) { return nullptr; } + for (it.set(*patternMap); it.hasNext(); ) { DateTimeMatcher current = it.next(); pattern = patternMap->getPatternFromSkeleton(*(it.getSkeleton())); if ( isCanonicalItem(*pattern) ) { continue; } - if ( skipMatcher == NULL ) { + if ( skipMatcher == nullptr ) { skipMatcher = new DateTimeMatcher(current); + if (skipMatcher == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } } else { *skipMatcher = current; } UnicodeString trial = getBestPattern(current.getPattern(), status); + if (U_FAILURE(status)) { return nullptr; } if (trial == *pattern) { - ((DTRedundantEnumeration *)output)->add(*pattern, status); + ((DTRedundantEnumeration *)output.getAlias())->add(*pattern, status); + if (U_FAILURE(status)) { return nullptr; } } if (current.equals(skipMatcher)) { continue; } } - return output; + return output.orphan(); } UBool @@ -1671,45 +1768,54 @@ DateTimePatternGenerator::clone() const { PatternMap::PatternMap() { for (int32_t i=0; i < MAX_PATTERN_ENTRIES; ++i ) { - boot[i]=NULL; + boot[i] = nullptr; } isDupAllowed = TRUE; } void PatternMap::copyFrom(const PatternMap& other, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } this->isDupAllowed = other.isDupAllowed; - for (int32_t bootIndex=0; bootIndexbasePattern, otherElem->pattern))==NULL) { - // out of memory - status = U_MEMORY_ALLOCATION_ERROR; - return; + while (otherElem != nullptr) { + LocalPointer newElem(new PtnElem(otherElem->basePattern, otherElem->pattern), status); + if (U_FAILURE(status)) { + return; // out of memory } - if ( this->boot[bootIndex]== NULL ) { - this->boot[bootIndex] = curElem; + newElem->skeleton.adoptInsteadAndCheckErrorCode(new PtnSkeleton(*(otherElem->skeleton)), status); + if (U_FAILURE(status)) { + return; // out of memory } - if ((curElem->skeleton=new PtnSkeleton(*(otherElem->skeleton))) == NULL ) { - // out of memory - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - curElem->skeletonWasSpecified = otherElem->skeletonWasSpecified; - if (prevElem!=NULL) { - prevElem->next=curElem; + newElem->skeletonWasSpecified = otherElem->skeletonWasSpecified; + + // Release ownership from the LocalPointer of the PtnElem object. + // The PtnElem will now be owned by either the boot (for the first entry in the linked-list) + // or owned by the previous PtnElem object in the linked-list. + curElem = newElem.orphan(); + + if (this->boot[bootIndex] == nullptr) { + this->boot[bootIndex] = curElem; + } else { + if (prevElem != nullptr) { + prevElem->next.adoptInstead(curElem); + } else { + U_ASSERT(false); + } } - curElem->next=NULL; prevElem = curElem; - otherElem = otherElem->next; + otherElem = otherElem->next.getAlias(); } } } PtnElem* -PatternMap::getHeader(UChar baseChar) { +PatternMap::getHeader(UChar baseChar) const { PtnElem* curElem; if ( (baseChar >= CAP_A) && (baseChar <= CAP_Z) ) { @@ -1720,7 +1826,7 @@ PatternMap::getHeader(UChar baseChar) { curElem = boot[26+baseChar-LOW_A]; } else { - return NULL; + return nullptr; } } return curElem; @@ -1728,9 +1834,9 @@ PatternMap::getHeader(UChar baseChar) { PatternMap::~PatternMap() { for (int32_t i=0; i < MAX_PATTERN_ENTRIES; ++i ) { - if (boot[i]!=NULL ) { + if (boot[i] != nullptr ) { delete boot[i]; - boot[i]=NULL; + boot[i] = nullptr; } } } // PatternMap destructor @@ -1759,39 +1865,45 @@ PatternMap::add(const UnicodeString& basePattern, } } - if (baseElem == NULL) { - if ((curElem = new PtnElem(basePattern, value)) == NULL ) { - // out of memory - status = U_MEMORY_ALLOCATION_ERROR; - return; + if (baseElem == nullptr) { + LocalPointer newElem(new PtnElem(basePattern, value), status); + if (U_FAILURE(status)) { + return; // out of memory } + newElem->skeleton.adoptInsteadAndCheckErrorCode(new PtnSkeleton(skeleton), status); + if (U_FAILURE(status)) { + return; // out of memory + } + newElem->skeletonWasSpecified = skeletonWasSpecified; if (baseChar >= LOW_A) { - boot[26 + (baseChar-LOW_A)] = curElem; + boot[26 + (baseChar - LOW_A)] = newElem.orphan(); // the boot array now owns the PtnElem. } else { - boot[baseChar-CAP_A] = curElem; + boot[baseChar - CAP_A] = newElem.orphan(); // the boot array now owns the PtnElem. } - curElem->skeleton = new PtnSkeleton(skeleton); - curElem->skeletonWasSpecified = skeletonWasSpecified; } - if ( baseElem != NULL ) { + if ( baseElem != nullptr ) { curElem = getDuplicateElem(basePattern, skeleton, baseElem); - if (curElem == NULL) { + if (curElem == nullptr) { // add new element to the list. curElem = baseElem; - while( curElem -> next != NULL ) + while( curElem -> next != nullptr ) { - curElem = curElem->next; + curElem = curElem->next.getAlias(); } - if ((curElem->next = new PtnElem(basePattern, value)) == NULL ) { - // out of memory - status = U_MEMORY_ALLOCATION_ERROR; - return; + + LocalPointer newElem(new PtnElem(basePattern, value), status); + if (U_FAILURE(status)) { + return; // out of memory } - curElem=curElem->next; - curElem->skeleton = new PtnSkeleton(skeleton); - curElem->skeletonWasSpecified = skeletonWasSpecified; + newElem->skeleton.adoptInsteadAndCheckErrorCode(new PtnSkeleton(skeleton), status); + if (U_FAILURE(status)) { + return; // out of memory + } + newElem->skeletonWasSpecified = skeletonWasSpecified; + curElem->next.adoptInstead(newElem.orphan()); + curElem = curElem->next.getAlias(); } else { // Pattern exists in the list already. @@ -1809,11 +1921,11 @@ PatternMap::add(const UnicodeString& basePattern, // Find the pattern from the given basePattern string. const UnicodeString * -PatternMap::getPatternFromBasePattern(UnicodeString& basePattern, UBool& skeletonWasSpecified) { // key to search for +PatternMap::getPatternFromBasePattern(const UnicodeString& basePattern, UBool& skeletonWasSpecified) const { // key to search for PtnElem *curElem; - if ((curElem=getHeader(basePattern.charAt(0)))==NULL) { - return NULL; // no match + if ((curElem=getHeader(basePattern.charAt(0)))==nullptr) { + return nullptr; // no match } do { @@ -1821,10 +1933,10 @@ PatternMap::getPatternFromBasePattern(UnicodeString& basePattern, UBool& skeleto skeletonWasSpecified = curElem->skeletonWasSpecified; return &(curElem->pattern); } - curElem=curElem->next; - }while (curElem != NULL); + curElem = curElem->next.getAlias(); + } while (curElem != nullptr); - return NULL; + return nullptr; } // PatternMap::getFromBasePattern @@ -1835,69 +1947,69 @@ PatternMap::getPatternFromBasePattern(UnicodeString& basePattern, UBool& skeleto // optimum distance value in getBestRaw. When this is called from public getRedundants (specifiedSkeletonPtr is NULL), // for now it will continue to compare based on baseOriginal so as not to change the behavior unnecessarily. const UnicodeString * -PatternMap::getPatternFromSkeleton(PtnSkeleton& skeleton, const PtnSkeleton** specifiedSkeletonPtr) { // key to search for +PatternMap::getPatternFromSkeleton(const PtnSkeleton& skeleton, const PtnSkeleton** specifiedSkeletonPtr) const { // key to search for PtnElem *curElem; if (specifiedSkeletonPtr) { - *specifiedSkeletonPtr = NULL; + *specifiedSkeletonPtr = nullptr; } // find boot entry UChar baseChar = skeleton.getFirstChar(); - if ((curElem=getHeader(baseChar))==NULL) { - return NULL; // no match + if ((curElem=getHeader(baseChar))==nullptr) { + return nullptr; // no match } do { UBool equal; - if (specifiedSkeletonPtr != NULL) { // called from DateTimePatternGenerator::getBestRaw or addPattern, use original + if (specifiedSkeletonPtr != nullptr) { // called from DateTimePatternGenerator::getBestRaw or addPattern, use original equal = curElem->skeleton->original == skeleton.original; } else { // called from DateTimePatternGenerator::getRedundants, use baseOriginal equal = curElem->skeleton->baseOriginal == skeleton.baseOriginal; } if (equal) { if (specifiedSkeletonPtr && curElem->skeletonWasSpecified) { - *specifiedSkeletonPtr = curElem->skeleton; + *specifiedSkeletonPtr = curElem->skeleton.getAlias(); } return &(curElem->pattern); } - curElem=curElem->next; - }while (curElem != NULL); + curElem = curElem->next.getAlias(); + } while (curElem != nullptr); - return NULL; + return nullptr; } UBool -PatternMap::equals(const PatternMap& other) { +PatternMap::equals(const PatternMap& other) const { if ( this==&other ) { return TRUE; } - for (int32_t bootIndex=0; bootIndexbasePattern != otherElem->basePattern) || (myElem->pattern != otherElem->pattern) ) { return FALSE; } - if ((myElem->skeleton!=otherElem->skeleton)&& + if ((myElem->skeleton.getAlias() != otherElem->skeleton.getAlias()) && !myElem->skeleton->equals(*(otherElem->skeleton))) { return FALSE; } - myElem = myElem->next; - otherElem=otherElem->next; + myElem = myElem->next.getAlias(); + otherElem = otherElem->next.getAlias(); } } return TRUE; @@ -1909,21 +2021,21 @@ PtnElem* PatternMap::getDuplicateElem( const UnicodeString &basePattern, const PtnSkeleton &skeleton, - PtnElem *baseElem) { + PtnElem *baseElem) { PtnElem *curElem; - if ( baseElem == (PtnElem *)NULL ) { - return (PtnElem*)NULL; + if ( baseElem == nullptr ) { + return nullptr; } else { curElem = baseElem; } do { if ( basePattern.compare(curElem->basePattern)==0 ) { - UBool isEqual=TRUE; - for (int32_t i=0; iskeleton->type[i] != skeleton.type[i] ) { - isEqual=FALSE; + isEqual = FALSE; break; } } @@ -1931,11 +2043,11 @@ PatternMap::getDuplicateElem( return curElem; } } - curElem = curElem->next; - } while( curElem != (PtnElem *)NULL ); + curElem = curElem->next.getAlias(); + } while( curElem != nullptr ); // end of the list - return (PtnElem*)NULL; + return nullptr; } // PatternMap::getDuplicateElem @@ -1976,7 +2088,7 @@ DateTimeMatcher::set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton continue; } int32_t canonicalIndex = fp->getCanonicalIndex(value); - if (canonicalIndex < 0 ) { + if (canonicalIndex < 0) { continue; } const dtTypeElem *row = &dtTypes[canonicalIndex]; @@ -1986,8 +2098,9 @@ DateTimeMatcher::set(const UnicodeString& pattern, FormatParser* fp, PtnSkeleton int32_t repeatCount = row->minLen; skeletonResult.baseOriginal.populate(field, repeatChar, repeatCount); int16_t subField = row->type; - if ( row->type > 0) { - subField += value.length(); + if (row->type > 0) { + U_ASSERT(value.length() < INT16_MAX); + subField += static_cast(value.length()); } skeletonResult.type[field] = subField; } @@ -2031,8 +2144,8 @@ DateTimeMatcher::getPattern() { } int32_t -DateTimeMatcher::getDistance(const DateTimeMatcher& other, int32_t includeMask, DistanceInfo& distanceInfo) { - int32_t result=0; +DateTimeMatcher::getDistance(const DateTimeMatcher& other, int32_t includeMask, DistanceInfo& distanceInfo) const { + int32_t result = 0; distanceInfo.clear(); for (int32_t i=0; iskeleton.original; } int32_t -DateTimeMatcher::getFieldMask() { - int32_t result=0; +DateTimeMatcher::getFieldMask() const { + int32_t result = 0; for (int32_t i=0; i= pattern.length()) { return DONE; } @@ -2132,10 +2245,10 @@ FormatParser::setTokens(const UnicodeString& pattern, int32_t startPos, int32_t void FormatParser::set(const UnicodeString& pattern) { - int32_t startPos=0; - TokenStatus result=START; - int32_t len=0; - itemNumber =0; + int32_t startPos = 0; + TokenStatus result = START; + int32_t len = 0; + itemNumber = 0; do { result = setTokens( pattern, startPos, &len ); @@ -2186,14 +2299,14 @@ FormatParser::getCanonicalIndex(const UnicodeString& s, UBool strict) { UBool FormatParser::isQuoteLiteral(const UnicodeString& s) { - return (UBool)(s.charAt(0)==SINGLE_QUOTE); + return (UBool)(s.charAt(0) == SINGLE_QUOTE); } -// This function aussumes the current itemIndex points to the quote literal. +// This function assumes the current itemIndex points to the quote literal. // Please call isQuoteLiteral prior to this function. void FormatParser::getQuoteLiteral(UnicodeString& quote, int32_t *itemIndex) { - int32_t i=*itemIndex; + int32_t i = *itemIndex; quote.remove(); if (items[i].charAt(0)==SINGLE_QUOTE) { @@ -2222,7 +2335,7 @@ FormatParser::getQuoteLiteral(UnicodeString& quote, int32_t *itemIndex) { } UBool -FormatParser::isPatternSeparator(UnicodeString& field) { +FormatParser::isPatternSeparator(const UnicodeString& field) const { for (int32_t i=0; iskeleton; + return nodePtr->skeleton.getAlias(); } } UBool -PatternMapIterator::hasNext() { - int32_t headIndex=bootIndex; - PtnElem *curPtr=nodePtr; +PatternMapIterator::hasNext() const { + int32_t headIndex = bootIndex; + PtnElem *curPtr = nodePtr; - if (patternMap==NULL) { + if (patternMap==nullptr) { return FALSE; } while ( headIndex < MAX_PATTERN_ENTRIES ) { - if ( curPtr != NULL ) { - if ( curPtr->next != NULL ) { + if ( curPtr != nullptr ) { + if ( curPtr->next != nullptr ) { return TRUE; } else { headIndex++; - curPtr=NULL; + curPtr=nullptr; continue; } } else { - if ( patternMap->boot[headIndex] != NULL ) { + if ( patternMap->boot[headIndex] != nullptr ) { return TRUE; } else { @@ -2299,7 +2410,6 @@ PatternMapIterator::hasNext() { continue; } } - } return FALSE; } @@ -2307,19 +2417,19 @@ PatternMapIterator::hasNext() { DateTimeMatcher& PatternMapIterator::next() { while ( bootIndex < MAX_PATTERN_ENTRIES ) { - if ( nodePtr != NULL ) { - if ( nodePtr->next != NULL ) { - nodePtr = nodePtr->next; + if ( nodePtr != nullptr ) { + if ( nodePtr->next != nullptr ) { + nodePtr = nodePtr->next.getAlias(); break; } else { bootIndex++; - nodePtr=NULL; + nodePtr=nullptr; continue; } } else { - if ( patternMap->boot[bootIndex] != NULL ) { + if ( patternMap->boot[bootIndex] != nullptr ) { nodePtr = patternMap->boot[bootIndex]; break; } @@ -2329,7 +2439,7 @@ PatternMapIterator::next() { } } } - if (nodePtr!=NULL) { + if (nodePtr!=nullptr) { matcher->copyFrom(*nodePtr->skeleton); } else { @@ -2468,36 +2578,28 @@ PtnSkeleton::~PtnSkeleton() { } PtnElem::PtnElem(const UnicodeString &basePat, const UnicodeString &pat) : -basePattern(basePat), -skeleton(NULL), -pattern(pat), -next(NULL) + basePattern(basePat), skeleton(nullptr), pattern(pat), next(nullptr) { } PtnElem::~PtnElem() { - - if (next!=NULL) { - delete next; - } - delete skeleton; } -DTSkeletonEnumeration::DTSkeletonEnumeration(PatternMap &patternMap, dtStrEnum type, UErrorCode& status) { +DTSkeletonEnumeration::DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum type, UErrorCode& status) : fSkeletons(nullptr) { PtnElem *curElem; PtnSkeleton *curSkeleton; UnicodeString s; int32_t bootIndex; pos=0; - fSkeletons = new UVector(status); + fSkeletons.adoptInsteadAndCheckErrorCode(new UVector(status), status); if (U_FAILURE(status)) { - delete fSkeletons; return; } + for (bootIndex=0; bootIndexbasePattern; @@ -2506,32 +2608,36 @@ DTSkeletonEnumeration::DTSkeletonEnumeration(PatternMap &patternMap, dtStrEnum t s=curElem->pattern; break; case DT_SKELETON: - curSkeleton=curElem->skeleton; + curSkeleton=curElem->skeleton.getAlias(); s=curSkeleton->getSkeleton(); break; } if ( !isCanonicalItem(s) ) { - fSkeletons->addElement(new UnicodeString(s), status); + LocalPointer newElem(new UnicodeString(s), status); if (U_FAILURE(status)) { - delete fSkeletons; - fSkeletons = NULL; return; } + fSkeletons->addElement(newElem.getAlias(), status); + if (U_FAILURE(status)) { + fSkeletons.adoptInstead(nullptr); + return; + } + newElem.orphan(); // fSkeletons vector now owns the UnicodeString. } - curElem = curElem->next; + curElem = curElem->next.getAlias(); } } - if ((bootIndex==MAX_PATTERN_ENTRIES) && (curElem!=NULL) ) { + if ((bootIndex==MAX_PATTERN_ENTRIES) && (curElem!=nullptr) ) { status = U_BUFFER_OVERFLOW_ERROR; } } const UnicodeString* DTSkeletonEnumeration::snext(UErrorCode& status) { - if (U_SUCCESS(status) && pos < fSkeletons->size()) { + if (U_SUCCESS(status) && fSkeletons.isValid() && pos < fSkeletons->size()) { return (const UnicodeString*)fSkeletons->elementAt(pos++); } - return NULL; + return nullptr; } void @@ -2541,7 +2647,7 @@ DTSkeletonEnumeration::reset(UErrorCode& /*status*/) { int32_t DTSkeletonEnumeration::count(UErrorCode& /*status*/) const { - return (fSkeletons==NULL) ? 0 : fSkeletons->size(); + return (fSkeletons.isNull()) ? 0 : fSkeletons->size(); } UBool @@ -2559,44 +2665,45 @@ DTSkeletonEnumeration::isCanonicalItem(const UnicodeString& item) { DTSkeletonEnumeration::~DTSkeletonEnumeration() { UnicodeString *s; - for (int32_t i=0; isize(); ++i) { - if ((s=(UnicodeString *)fSkeletons->elementAt(i))!=NULL) { - delete s; + if (fSkeletons.isValid()) { + for (int32_t i = 0; i < fSkeletons->size(); ++i) { + if ((s = (UnicodeString *)fSkeletons->elementAt(i)) != nullptr) { + delete s; + } } } - delete fSkeletons; } -DTRedundantEnumeration::DTRedundantEnumeration() { - pos=0; - fPatterns = NULL; +DTRedundantEnumeration::DTRedundantEnumeration() : pos(0), fPatterns(nullptr) { } void DTRedundantEnumeration::add(const UnicodeString& pattern, UErrorCode& status) { - if (U_FAILURE(status)) return; - if (fPatterns == NULL) { - fPatterns = new UVector(status); + if (U_FAILURE(status)) { return; } + if (fPatterns.isNull()) { + fPatterns.adoptInsteadAndCheckErrorCode(new UVector(status), status); if (U_FAILURE(status)) { - delete fPatterns; - fPatterns = NULL; return; } } - fPatterns->addElement(new UnicodeString(pattern), status); + LocalPointer newElem(new UnicodeString(pattern), status); + if (U_FAILURE(status)) { + return; + } + fPatterns->addElement(newElem.getAlias(), status); if (U_FAILURE(status)) { - delete fPatterns; - fPatterns = NULL; + fPatterns.adoptInstead(nullptr); return; } + newElem.orphan(); // fPatterns now owns the string. } const UnicodeString* DTRedundantEnumeration::snext(UErrorCode& status) { - if (U_SUCCESS(status) && pos < fPatterns->size()) { + if (U_SUCCESS(status) && fPatterns.isValid() && pos < fPatterns->size()) { return (const UnicodeString*)fPatterns->elementAt(pos++); } - return NULL; + return nullptr; } void @@ -2606,11 +2713,11 @@ DTRedundantEnumeration::reset(UErrorCode& /*status*/) { int32_t DTRedundantEnumeration::count(UErrorCode& /*status*/) const { - return (fPatterns==NULL) ? 0 : fPatterns->size(); + return (fPatterns.isNull()) ? 0 : fPatterns->size(); } UBool -DTRedundantEnumeration::isCanonicalItem(const UnicodeString& item) { +DTRedundantEnumeration::isCanonicalItem(const UnicodeString& item) const { if ( item.length() != 1 ) { return FALSE; } @@ -2624,12 +2731,13 @@ DTRedundantEnumeration::isCanonicalItem(const UnicodeString& item) { DTRedundantEnumeration::~DTRedundantEnumeration() { UnicodeString *s; - for (int32_t i=0; isize(); ++i) { - if ((s=(UnicodeString *)fPatterns->elementAt(i))!=NULL) { - delete s; + if (fPatterns.isValid()) { + for (int32_t i = 0; i < fPatterns->size(); ++i) { + if ((s = (UnicodeString *)fPatterns->elementAt(i)) != nullptr) { + delete s; + } } } - delete fPatterns; } U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/dtptngen_impl.h b/deps/icu-small/source/i18n/dtptngen_impl.h index 2ea31a75c488fa..95219f0ba257f5 100644 --- a/deps/icu-small/source/i18n/dtptngen_impl.h +++ b/deps/icu-small/source/i18n/dtptngen_impl.h @@ -116,7 +116,7 @@ typedef struct dtTypeElem { int16_t type; int16_t minLen; int16_t weight; -}dtTypeElem; +} dtTypeElem; // A compact storage mechanism for skeleton field strings. Several dozen of these will be created // for a typical DateTimePatternGenerator instance. @@ -172,30 +172,28 @@ class PtnSkeleton : public UMemory { virtual ~PtnSkeleton(); }; - class PtnElem : public UMemory { public: UnicodeString basePattern; - PtnSkeleton *skeleton; + LocalPointer skeleton; UnicodeString pattern; UBool skeletonWasSpecified; // if specified in availableFormats, not derived - PtnElem *next; + LocalPointer next; PtnElem(const UnicodeString &basePattern, const UnicodeString &pattern); virtual ~PtnElem(); - }; class FormatParser : public UMemory { public: UnicodeString items[MAX_DT_TOKEN]; - int32_t itemNumber; + int32_t itemNumber; FormatParser(); virtual ~FormatParser(); void set(const UnicodeString& patternString); void getQuoteLiteral(UnicodeString& quote, int32_t *itemIndex); - UBool isPatternSeparator(UnicodeString& field); + UBool isPatternSeparator(const UnicodeString& field) const; static UBool isQuoteLiteral(const UnicodeString& s); static int32_t getCanonicalIndex(const UnicodeString& s) { return getCanonicalIndex(s, TRUE); } static int32_t getCanonicalIndex(const UnicodeString& s, UBool strict); @@ -206,7 +204,7 @@ class FormatParser : public UMemory { ADD_TOKEN, SYNTAX_ERROR, DONE - } ToeknStatus; + } TokenStatus; TokenStatus status; virtual TokenStatus setTokens(const UnicodeString& pattern, int32_t startPos, int32_t *len); @@ -220,7 +218,7 @@ class DistanceInfo : public UMemory { DistanceInfo() {} virtual ~DistanceInfo(); void clear() { missingFieldMask = extraFieldMask = 0; } - void setTo(DistanceInfo& other); + void setTo(const DistanceInfo& other); void addMissing(int32_t field) { missingFieldMask |= (1< matcher; PatternMap *patternMap; }; class DTSkeletonEnumeration : public StringEnumeration { public: - DTSkeletonEnumeration(PatternMap &patternMap, dtStrEnum type, UErrorCode& status); + DTSkeletonEnumeration(PatternMap& patternMap, dtStrEnum type, UErrorCode& status); virtual ~DTSkeletonEnumeration(); static UClassID U_EXPORT2 getStaticClassID(void); virtual UClassID getDynamicClassID(void) const; @@ -287,7 +285,7 @@ class DTSkeletonEnumeration : public StringEnumeration { private: int32_t pos; UBool isCanonicalItem(const UnicodeString& item); - UVector *fSkeletons; + LocalPointer fSkeletons; }; class DTRedundantEnumeration : public StringEnumeration { @@ -302,8 +300,8 @@ class DTRedundantEnumeration : public StringEnumeration { void add(const UnicodeString &pattern, UErrorCode& status); private: int32_t pos; - UBool isCanonicalItem(const UnicodeString& item); - UVector *fPatterns; + UBool isCanonicalItem(const UnicodeString& item) const; + LocalPointer fPatterns; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/erarules.cpp b/deps/icu-small/source/i18n/erarules.cpp new file mode 100644 index 00000000000000..669f84423046fd --- /dev/null +++ b/deps/icu-small/source/i18n/erarules.cpp @@ -0,0 +1,307 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include +#include "unicode/ucal.h" +#include "unicode/ures.h" +#include "unicode/ustring.h" +#include "cmemory.h" +#include "cstring.h" +#include "erarules.h" +#include "gregoimp.h" +#include "uassert.h" + +U_NAMESPACE_BEGIN + +static const int32_t MAX_ENCODED_START_YEAR = 32767; +static const int32_t MIN_ENCODED_START_YEAR = -32768; +static const int32_t MIN_ENCODED_START = -2147483391; // encodeDate(MIN_ENCODED_START_YEAR, 1, 1, ...); + +static const int32_t YEAR_MASK = 0xFFFF0000; +static const int32_t MONTH_MASK = 0x0000FF00; +static const int32_t DAY_MASK = 0x000000FF; + +static const int32_t MAX_INT32 = 0x7FFFFFFF; +static const int32_t MIN_INT32 = 0xFFFFFFFF; + +static const UChar VAL_FALSE[] = {0x66, 0x61, 0x6c, 0x73, 0x65}; // "false" +static const UChar VAL_FALSE_LEN = 5; + +static UBool isSet(int startDate) { + return startDate != 0; +} + +static UBool isValidRuleStartDate(int32_t year, int32_t month, int32_t day) { + return year >= MIN_ENCODED_START_YEAR && year <= MAX_ENCODED_START_YEAR + && month >= 1 && month <= 12 && day >=1 && day <= 31; +} + +/** + * Encode year/month/date to a single integer. + * year is high 16 bits (-32768 to 32767), month is + * next 8 bits and day of month is last 8 bits. + * + * @param year year + * @param month month (1-base) + * @param day day of month + * @return an encoded date. + */ +static int32_t encodeDate(int32_t year, int32_t month, int32_t day) { + return year << 16 | month << 8 | day; +} + +static void decodeDate(int32_t encodedDate, int32_t (&fields)[3]) { + if (encodedDate == MIN_ENCODED_START) { + fields[0] = MIN_INT32; + fields[1] = 1; + fields[2] = 1; + } else { + fields[0] = (encodedDate & YEAR_MASK) >> 16; + fields[1] = (encodedDate & MONTH_MASK) >> 8; + fields[2] = encodedDate & DAY_MASK; + } +} + +/** + * Compare an encoded date with another date specified by year/month/day. + * @param encoded An encoded date + * @param year Year of another date + * @param month Month of another date + * @param day Day of another date + * @return -1 when encoded date is earlier, 0 when two dates are same, + * and 1 when encoded date is later. + */ +static int32_t compareEncodedDateWithYMD(int encoded, int year, int month, int day) { + if (year < MIN_ENCODED_START_YEAR) { + if (encoded == MIN_ENCODED_START) { + if (year > MIN_INT32 || month > 1 || day > 1) { + return -1; + } + return 0; + } else { + return 1; + } + } else if (year > MAX_ENCODED_START_YEAR) { + return -1; + } else { + int tmp = encodeDate(year, month, day); + if (encoded < tmp) { + return -1; + } else if (encoded == tmp) { + return 0; + } else { + return 1; + } + } +} + +EraRules::EraRules(LocalMemory& eraStartDates, int32_t numEras) + : numEras(numEras) { + startDates.moveFrom(eraStartDates); + initCurrentEra(); +} + +EraRules::~EraRules() { +} + +EraRules* EraRules::createInstance(const char *calType, UBool includeTentativeEra, UErrorCode& status) { + if(U_FAILURE(status)) { + return nullptr; + } + LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "supplementalData", &status)); + ures_getByKey(rb.getAlias(), "calendarData", rb.getAlias(), &status); + ures_getByKey(rb.getAlias(), calType, rb.getAlias(), &status); + ures_getByKey(rb.getAlias(), "eras", rb.getAlias(), &status); + + if (U_FAILURE(status)) { + return nullptr; + } + + int32_t numEras = ures_getSize(rb.getAlias()); + int32_t firstTentativeIdx = MAX_INT32; + + LocalMemory startDates(static_cast(uprv_malloc(numEras * sizeof(int32_t)))); + if (startDates.isNull()) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + uprv_memset(startDates.getAlias(), 0 , numEras * sizeof(int32_t)); + + while (ures_hasNext(rb.getAlias())) { + LocalUResourceBundlePointer eraRuleRes(ures_getNextResource(rb.getAlias(), nullptr, &status)); + if (U_FAILURE(status)) { + return nullptr; + } + const char *eraIdxStr = ures_getKey(eraRuleRes.getAlias()); + char *endp; + int32_t eraIdx = (int32_t)strtol(eraIdxStr, &endp, 10); + if ((size_t)(endp - eraIdxStr) != uprv_strlen(eraIdxStr)) { + status = U_INVALID_FORMAT_ERROR; + return nullptr; + } + if (eraIdx < 0 || eraIdx >= numEras) { + status = U_INVALID_FORMAT_ERROR; + return nullptr; + } + if (isSet(startDates[eraIdx])) { + // start date of the index was already set + status = U_INVALID_FORMAT_ERROR; + return nullptr; + } + + UBool hasName = TRUE; + UBool hasEnd = TRUE; + int32_t len; + while (ures_hasNext(eraRuleRes.getAlias())) { + LocalUResourceBundlePointer res(ures_getNextResource(eraRuleRes.getAlias(), nullptr, &status)); + if (U_FAILURE(status)) { + return nullptr; + } + const char *key = ures_getKey(res.getAlias()); + if (uprv_strcmp(key, "start") == 0) { + const int32_t *fields = ures_getIntVector(res.getAlias(), &len, &status); + if (U_FAILURE(status)) { + return nullptr; + } + if (len != 3 || !isValidRuleStartDate(fields[0], fields[1], fields[2])) { + status = U_INVALID_FORMAT_ERROR; + return nullptr; + } + startDates[eraIdx] = encodeDate(fields[0], fields[1], fields[2]); + } else if (uprv_strcmp(key, "named") == 0) { + const UChar *val = ures_getString(res.getAlias(), &len, &status); + if (u_strncmp(val, VAL_FALSE, VAL_FALSE_LEN) == 0) { + hasName = FALSE; + } + } else if (uprv_strcmp(key, "end") == 0) { + hasEnd = TRUE; + } + } + + if (isSet(startDates[eraIdx])) { + if (hasEnd) { + // This implementation assumes either start or end is available, not both. + // For now, just ignore the end rule. + } + } else { + if (hasEnd) { + if (eraIdx != 0) { + // This implementation does not support end only rule for eras other than + // the first one. + status = U_INVALID_FORMAT_ERROR; + return nullptr; + } + U_ASSERT(eraIdx == 0); + startDates[eraIdx] = MIN_ENCODED_START; + } else { + status = U_INVALID_FORMAT_ERROR; + return nullptr; + } + } + + if (hasName) { + if (eraIdx >= firstTentativeIdx) { + status = U_INVALID_FORMAT_ERROR; + return nullptr; + } + } else { + if (eraIdx < firstTentativeIdx) { + firstTentativeIdx = eraIdx; + } + } + } + + EraRules *result; + if (firstTentativeIdx < MAX_INT32 && !includeTentativeEra) { + result = new EraRules(startDates, firstTentativeIdx); + } else { + result = new EraRules(startDates, numEras); + } + + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return result; +} + +void EraRules::getStartDate(int32_t eraIdx, int32_t (&fields)[3], UErrorCode& status) const { + if(U_FAILURE(status)) { + return; + } + if (eraIdx < 0 || eraIdx >= numEras) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + decodeDate(startDates[eraIdx], fields); +} + +int32_t EraRules::getStartYear(int32_t eraIdx, UErrorCode& status) const { + int year = MAX_INT32; // bogus value + if(U_FAILURE(status)) { + return year; + } + if (eraIdx < 0 || eraIdx >= numEras) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return year; + } + int fields[3]; + decodeDate(startDates[eraIdx], fields); + year = fields[0]; + + return year; +} + +int32_t EraRules::getEraIndex(int32_t year, int32_t month, int32_t day, UErrorCode& status) const { + if(U_FAILURE(status)) { + return -1; + } + + if (month < 1 || month > 12 || day < 1 || day > 31) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + int32_t high = numEras; // last index + 1 + int32_t low; + + // Short circuit for recent years. Most modern computations will + // occur in the last few eras. + if (compareEncodedDateWithYMD(startDates[getCurrentEraIndex()], year, month, day) <= 0) { + low = getCurrentEraIndex(); + } else { + low = 0; + } + + // Do binary search + while (low < high - 1) { + int i = (low + high) / 2; + if (compareEncodedDateWithYMD(startDates[i], year, month, day) <= 0) { + low = i; + } else { + high = i; + } + } + return low; +} + +void EraRules::initCurrentEra() { + UDate now = ucal_getNow(); + int year, month0, dom, dow, doy, mid; + Grego::timeToFields(now, year, month0, dom, dow, doy, mid); + int currentEncodedDate = encodeDate(year, month0 + 1 /* changes to 1-base */, dom); + int eraIdx = numEras - 1; + while (eraIdx > 0) { + if (currentEncodedDate >= startDates[eraIdx]) { + break; + } + eraIdx--; + } + // Note: current era could be before the first era. + // In this case, this implementation returns the first era index (0). + currentEra = eraIdx;} + +U_NAMESPACE_END +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/erarules.h b/deps/icu-small/source/i18n/erarules.h new file mode 100644 index 00000000000000..4ed86408325ed7 --- /dev/null +++ b/deps/icu-small/source/i18n/erarules.h @@ -0,0 +1,92 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#ifndef ERARULES_H_ +#define ERARULES_H_ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +#include "unicode/localpointer.h" +#include "unicode/uobject.h" +#include "cmemory.h" + +U_NAMESPACE_BEGIN + +// Export an explicit template instantiation of LocalMemory used as a data member of EraRules. +// When building DLLs for Windows this is required even though no direct access leaks out of the i18n library. +// See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples. +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +// Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= +#pragma warning(suppress: 4661) +template class U_I18N_API LocalPointerBase; +template class U_I18N_API LocalMemory; +#endif + +class U_I18N_API EraRules : public UMemory { +public: + ~EraRules(); + + static EraRules* createInstance(const char *calType, UBool includeTentativeEra, UErrorCode& status); + + /** + * Gets number of effective eras + * @return number of effective eras + */ + inline int32_t getNumberOfEras() const { + return numEras; + } + + /** + * Gets start date of an era + * @param eraIdx Era index + * @param fields Receives date fields. The result includes values of year, month, + * day of month in this order. When an era has no start date, the result + * will be January 1st in year whose value is minimum integer. + * @param status Receives status. + */ + void getStartDate(int32_t eraIdx, int32_t (&fields)[3], UErrorCode& status) const; + + /** + * Gets start year of an era + * @param eraIdx Era index + * @param status Receives status. + * @return The first year of an era. When a era has no start date, minimum int32 + * value is returned. + */ + int32_t getStartYear(int32_t eraIdx, UErrorCode& status) const; + + /** + * Returns era index for the specified year/month/day. + * @param year Year + * @param month Month (1-base) + * @param day Day of month + * @param status Receives status + * @return era index (or 0, when the specified date is before the first era) + */ + int32_t getEraIndex(int32_t year, int32_t month, int32_t day, UErrorCode& status) const; + + /** + * Gets the current era index. This is calculated only once for an instance of + * EraRules. + * + * @return era index of current era (or 0, when current date is before the first era) + */ + inline int32_t getCurrentEraIndex() const { + return currentEra; + } + +private: + EraRules(LocalMemory& eraStartDates, int32_t numEra); + + void initCurrentEra(); + + LocalMemory startDates; + int32_t numEras; + int32_t currentEra; +}; + +U_NAMESPACE_END +#endif /* #if !UCONFIG_NO_FORMATTING */ +#endif /* ERARULES_H_ */ diff --git a/deps/icu-small/source/i18n/fphdlimp.h b/deps/icu-small/source/i18n/fphdlimp.h index 2e9d5622b1b5b0..a6827e01e98b4b 100644 --- a/deps/icu-small/source/i18n/fphdlimp.h +++ b/deps/icu-small/source/i18n/fphdlimp.h @@ -10,9 +10,10 @@ #ifndef FPHDLIMP_H #define FPHDLIMP_H +#include "unicode/utypes.h" + #if !UCONFIG_NO_FORMATTING -#include "unicode/utypes.h" #include "unicode/fieldpos.h" #include "unicode/fpositer.h" diff --git a/deps/icu-small/source/i18n/gregocal.cpp b/deps/icu-small/source/i18n/gregocal.cpp index 49c4226049d9c8..4db66758df1f1f 100644 --- a/deps/icu-small/source/i18n/gregocal.cpp +++ b/deps/icu-small/source/i18n/gregocal.cpp @@ -541,8 +541,8 @@ int32_t GregorianCalendar::handleComputeMonthStart(int32_t eyear, int32_t month, } UBool isLeap = eyear%4 == 0; - int32_t y = eyear-1; - int32_t julianDay = 365*y + ClockMath::floorDivide(y, 4) + (kJan1_1JulianDay - 3); + int64_t y = (int64_t)eyear-1; + int64_t julianDay = 365*y + ClockMath::floorDivide(y, (int64_t)4) + (kJan1_1JulianDay - 3); nonConstThis->fIsGregorian = (eyear >= fGregorianCutoverYear); #if defined (U_DEBUG_CAL) @@ -572,7 +572,7 @@ int32_t GregorianCalendar::handleComputeMonthStart(int32_t eyear, int32_t month, julianDay += isLeap?kLeapNumDays[month]:kNumDays[month]; } - return julianDay; + return static_cast(julianDay); } int32_t GregorianCalendar::handleGetMonthLength(int32_t extendedYear, int32_t month) const diff --git a/deps/icu-small/source/i18n/gregoimp.h b/deps/icu-small/source/i18n/gregoimp.h index 55922a6b40aa52..eb3844f8523eeb 100644 --- a/deps/icu-small/source/i18n/gregoimp.h +++ b/deps/icu-small/source/i18n/gregoimp.h @@ -299,8 +299,8 @@ inline int32_t Grego::millisToJulianDay(double millis) { } inline int32_t Grego::gregorianShift(int32_t eyear) { - int32_t y = eyear-1; - int32_t gregShift = ClockMath::floorDivide(y, 400) - ClockMath::floorDivide(y, 100) + 2; + int64_t y = (int64_t)eyear-1; + int32_t gregShift = static_cast(ClockMath::floorDivide(y, (int64_t)400) - ClockMath::floorDivide(y, (int64_t)100) + 2); return gregShift; } diff --git a/deps/icu-small/source/i18n/indiancal.cpp b/deps/icu-small/source/i18n/indiancal.cpp index a2a7f2dcdd3e9e..667b6f2d7a32c5 100644 --- a/deps/icu-small/source/i18n/indiancal.cpp +++ b/deps/icu-small/source/i18n/indiancal.cpp @@ -347,12 +347,15 @@ IndianCalendar::inDaylightTime(UErrorCode& status) const return (UBool)(U_SUCCESS(status) ? (internalGet(UCAL_DST_OFFSET) != 0) : FALSE); } -// default century -const UDate IndianCalendar::fgSystemDefaultCentury = DBL_MIN; -const int32_t IndianCalendar::fgSystemDefaultCenturyYear = -1; -UDate IndianCalendar::fgSystemDefaultCenturyStart = DBL_MIN; -int32_t IndianCalendar::fgSystemDefaultCenturyStartYear = -1; +/** + * The system maintains a static default century start date and Year. They are + * initialized the first time they are used. Once the system default century date + * and year are set, they do not change. + */ +static UDate gSystemDefaultCenturyStart = DBL_MIN; +static int32_t gSystemDefaultCenturyStartYear = -1; +static icu::UInitOnce gSystemDefaultCenturyInit = U_INITONCE_INITIALIZER; UBool IndianCalendar::haveDefaultCentury() const @@ -360,87 +363,45 @@ UBool IndianCalendar::haveDefaultCentury() const return TRUE; } -UDate IndianCalendar::defaultCenturyStart() const +static void U_CALLCONV +initializeSystemDefaultCentury() { - return internalGetDefaultCenturyStart(); -} + // initialize systemDefaultCentury and systemDefaultCenturyYear based + // on the current time. They'll be set to 80 years before + // the current time. + UErrorCode status = U_ZERO_ERROR; -int32_t IndianCalendar::defaultCenturyStartYear() const -{ - return internalGetDefaultCenturyStartYear(); -} + IndianCalendar calendar ( Locale ( "@calendar=Indian" ), status); + if ( U_SUCCESS ( status ) ) { + calendar.setTime ( Calendar::getNow(), status ); + calendar.add ( UCAL_YEAR, -80, status ); -UDate -IndianCalendar::internalGetDefaultCenturyStart() const -{ - // lazy-evaluate systemDefaultCenturyStart - UBool needsUpdate; - { - Mutex m; - needsUpdate = (fgSystemDefaultCenturyStart == fgSystemDefaultCentury); - } + UDate newStart = calendar.getTime ( status ); + int32_t newYear = calendar.get ( UCAL_YEAR, status ); - if (needsUpdate) { - initializeSystemDefaultCentury(); + gSystemDefaultCenturyStart = newStart; + gSystemDefaultCenturyStartYear = newYear; } + // We have no recourse upon failure. +} - // use defaultCenturyStart unless it's the flag value; - // then use systemDefaultCenturyStart - return fgSystemDefaultCenturyStart; +UDate +IndianCalendar::defaultCenturyStart() const +{ + // lazy-evaluate systemDefaultCenturyStart + umtx_initOnce(gSystemDefaultCenturyInit, &initializeSystemDefaultCentury); + return gSystemDefaultCenturyStart; } int32_t -IndianCalendar::internalGetDefaultCenturyStartYear() const +IndianCalendar::defaultCenturyStartYear() const { // lazy-evaluate systemDefaultCenturyStartYear - UBool needsUpdate; - { - Mutex m; - - needsUpdate = (fgSystemDefaultCenturyStart == fgSystemDefaultCentury); - } - - if (needsUpdate) { - initializeSystemDefaultCentury(); - } - - // use defaultCenturyStart unless it's the flag value; - // then use systemDefaultCenturyStartYear - - return fgSystemDefaultCenturyStartYear; + umtx_initOnce(gSystemDefaultCenturyInit, &initializeSystemDefaultCentury); + return gSystemDefaultCenturyStartYear; } -void -IndianCalendar::initializeSystemDefaultCentury() -{ - // initialize systemDefaultCentury and systemDefaultCenturyYear based - // on the current time. They'll be set to 80 years before - // the current time. - // No point in locking as it should be idempotent. - if (fgSystemDefaultCenturyStart == fgSystemDefaultCentury) { - UErrorCode status = U_ZERO_ERROR; - - IndianCalendar calendar(Locale("@calendar=Indian"),status); - if (U_SUCCESS(status)) { - calendar.setTime(Calendar::getNow(), status); - calendar.add(UCAL_YEAR, -80, status); - - UDate newStart = calendar.getTime(status); - int32_t newYear = calendar.get(UCAL_YEAR, status); - - { - Mutex m; - - fgSystemDefaultCenturyStart = newStart; - fgSystemDefaultCenturyStartYear = newYear; - } - } - - // We have no recourse upon failure unless we want to propagate the failure - // out. - } -} UOBJECT_DEFINE_RTTI_IMPLEMENTATION(IndianCalendar) diff --git a/deps/icu-small/source/i18n/indiancal.h b/deps/icu-small/source/i18n/indiancal.h index d40af5ad450c9e..ffd4ae8b8a40cd 100644 --- a/deps/icu-small/source/i18n/indiancal.h +++ b/deps/icu-small/source/i18n/indiancal.h @@ -68,7 +68,7 @@ U_NAMESPACE_BEGIN */ -class IndianCalendar : public Calendar { +class U_I18N_API IndianCalendar : public Calendar { public: /** * Useful constants for IndianCalendar. @@ -274,10 +274,10 @@ class IndianCalendar : public Calendar { * @return The class ID for all objects of this class. * @internal */ - U_I18N_API static UClassID U_EXPORT2 getStaticClassID(void); + static UClassID U_EXPORT2 getStaticClassID(void); /** - * return the calendar type, "buddhist". + * return the calendar type, "indian". * * @return calendar type * @internal @@ -320,49 +320,6 @@ class IndianCalendar : public Calendar { * @internal */ virtual int32_t defaultCenturyStartYear() const; - - private: // default century stuff. - /** - * The system maintains a static default century start date. This is initialized - * the first time it is used. Before then, it is set to SYSTEM_DEFAULT_CENTURY to - * indicate an uninitialized state. Once the system default century date and year - * are set, they do not change. - */ - static UDate fgSystemDefaultCenturyStart; - - /** - * See documentation for systemDefaultCenturyStart. - */ - static int32_t fgSystemDefaultCenturyStartYear; - - /** - * Default value that indicates the defaultCenturyStartYear is unitialized - */ - static const int32_t fgSystemDefaultCenturyYear; - - /** - * start of default century, as a date - */ - static const UDate fgSystemDefaultCentury; - - /** - * Returns the beginning date of the 100-year window that dates - * with 2-digit years are considered to fall within. - */ - UDate internalGetDefaultCenturyStart(void) const; - - /** - * Returns the first year of the 100-year window that dates with - * 2-digit years are considered to fall within. - */ - int32_t internalGetDefaultCenturyStartYear(void) const; - - /** - * Initializes the 100-year window that dates with 2-digit years - * are considered to fall within so that its start date is 80 years - * before the current time. - */ - static void initializeSystemDefaultCentury(void); }; U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/japancal.cpp b/deps/icu-small/source/i18n/japancal.cpp index a2738e86a91edb..056781617d6d9d 100644 --- a/deps/icu-small/source/i18n/japancal.cpp +++ b/deps/icu-small/source/i18n/japancal.cpp @@ -16,286 +16,88 @@ #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING - +#if U_PLATFORM_HAS_WINUWP_API == 0 +#include // getenv() is not available in UWP env +#endif #include "cmemory.h" +#include "erarules.h" #include "japancal.h" #include "unicode/gregocal.h" #include "umutex.h" #include "uassert.h" - -//#define U_DEBUG_JCAL - -#ifdef U_DEBUG_JCAL -#include -#endif +#include "ucln_in.h" +#include "cstring.h" + +static icu::EraRules * gJapaneseEraRules = nullptr; +static icu::UInitOnce gJapaneseEraRulesInitOnce = U_INITONCE_INITIALIZER; +static int32_t gCurrentEra = 0; + +U_CDECL_BEGIN +static UBool japanese_calendar_cleanup(void) { + if (gJapaneseEraRules) { + delete gJapaneseEraRules; + gJapaneseEraRules = nullptr; + } + gCurrentEra = 0; + gJapaneseEraRulesInitOnce.reset(); + return TRUE; +} +U_CDECL_END U_NAMESPACE_BEGIN UOBJECT_DEFINE_RTTI_IMPLEMENTATION(JapaneseCalendar) -// Gregorian date of each emperor's ascension -// Years are AD, months are 1-based. -static const struct { - int16_t year; - int8_t month; - int8_t day; -} kEraInfo[] = { - // Year Month Day - { 645, 6, 19 }, // Taika 0 - { 650, 2, 15 }, // Hakuchi 1 - { 672, 1, 1 }, // Hakuho 2 - { 686, 7, 20 }, // Shucho 3 - { 701, 3, 21 }, // Taiho 4 - { 704, 5, 10 }, // Keiun 5 - { 708, 1, 11 }, // Wado 6 - { 715, 9, 2 }, // Reiki 7 - { 717, 11, 17 }, // Yoro 8 - { 724, 2, 4 }, // Jinki 9 - { 729, 8, 5 }, // Tempyo 10 - { 749, 4, 14 }, // Tempyo-kampo 11 - { 749, 7, 2 }, // Tempyo-shoho 12 - { 757, 8, 18 }, // Tempyo-hoji 13 - { 765, 1, 7 }, // Tempho-jingo 14 - { 767, 8, 16 }, // Jingo-keiun 15 - { 770, 10, 1 }, // Hoki 16 - { 781, 1, 1 }, // Ten-o 17 - { 782, 8, 19 }, // Enryaku 18 - { 806, 5, 18 }, // Daido 19 - { 810, 9, 19 }, // Konin 20 - { 824, 1, 5 }, // Tencho - { 834, 1, 3 }, // Showa - { 848, 6, 13 }, // Kajo - { 851, 4, 28 }, // Ninju - { 854, 11, 30 }, // Saiko - { 857, 2, 21 }, // Tennan - { 859, 4, 15 }, // Jogan - { 877, 4, 16 }, // Genkei - { 885, 2, 21 }, // Ninna - { 889, 4, 27 }, // Kampyo 30 - { 898, 4, 26 }, // Shotai - { 901, 7, 15 }, // Engi - { 923, 4, 11 }, // Encho - { 931, 4, 26 }, // Shohei - { 938, 5, 22 }, // Tengyo - { 947, 4, 22 }, // Tenryaku - { 957, 10, 27 }, // Tentoku - { 961, 2, 16 }, // Owa - { 964, 7, 10 }, // Koho - { 968, 8, 13 }, // Anna 40 - { 970, 3, 25 }, // Tenroku - { 973, 12, 20 }, // Ten-en - { 976, 7, 13 }, // Jogen - { 978, 11, 29 }, // Tengen - { 983, 4, 15 }, // Eikan - { 985, 4, 27 }, // Kanna - { 987, 4, 5 }, // Ei-en - { 989, 8, 8 }, // Eiso - { 990, 11, 7 }, // Shoryaku - { 995, 2, 22 }, // Chotoku 50 - { 999, 1, 13 }, // Choho - { 1004, 7, 20 }, // Kanko - { 1012, 12, 25 }, // Chowa - { 1017, 4, 23 }, // Kannin - { 1021, 2, 2 }, // Jian - { 1024, 7, 13 }, // Manju - { 1028, 7, 25 }, // Chogen - { 1037, 4, 21 }, // Choryaku - { 1040, 11, 10 }, // Chokyu - { 1044, 11, 24 }, // Kantoku 60 - { 1046, 4, 14 }, // Eisho - { 1053, 1, 11 }, // Tengi - { 1058, 8, 29 }, // Kohei - { 1065, 8, 2 }, // Jiryaku - { 1069, 4, 13 }, // Enkyu - { 1074, 8, 23 }, // Shoho - { 1077, 11, 17 }, // Shoryaku - { 1081, 2, 10 }, // Eiho - { 1084, 2, 7 }, // Otoku - { 1087, 4, 7 }, // Kanji 70 - { 1094, 12, 15 }, // Kaho - { 1096, 12, 17 }, // Eicho - { 1097, 11, 21 }, // Shotoku - { 1099, 8, 28 }, // Kowa - { 1104, 2, 10 }, // Choji - { 1106, 4, 9 }, // Kasho - { 1108, 8, 3 }, // Tennin - { 1110, 7, 13 }, // Ten-ei - { 1113, 7, 13 }, // Eikyu - { 1118, 4, 3 }, // Gen-ei 80 - { 1120, 4, 10 }, // Hoan - { 1124, 4, 3 }, // Tenji - { 1126, 1, 22 }, // Daiji - { 1131, 1, 29 }, // Tensho - { 1132, 8, 11 }, // Chosho - { 1135, 4, 27 }, // Hoen - { 1141, 7, 10 }, // Eiji - { 1142, 4, 28 }, // Koji - { 1144, 2, 23 }, // Tenyo - { 1145, 7, 22 }, // Kyuan 90 - { 1151, 1, 26 }, // Ninpei - { 1154, 10, 28 }, // Kyuju - { 1156, 4, 27 }, // Hogen - { 1159, 4, 20 }, // Heiji - { 1160, 1, 10 }, // Eiryaku - { 1161, 9, 4 }, // Oho - { 1163, 3, 29 }, // Chokan - { 1165, 6, 5 }, // Eiman - { 1166, 8, 27 }, // Nin-an - { 1169, 4, 8 }, // Kao 100 - { 1171, 4, 21 }, // Shoan - { 1175, 7, 28 }, // Angen - { 1177, 8, 4 }, // Jisho - { 1181, 7, 14 }, // Yowa - { 1182, 5, 27 }, // Juei - { 1184, 4, 16 }, // Genryuku - { 1185, 8, 14 }, // Bunji - { 1190, 4, 11 }, // Kenkyu - { 1199, 4, 27 }, // Shoji - { 1201, 2, 13 }, // Kennin 110 - { 1204, 2, 20 }, // Genkyu - { 1206, 4, 27 }, // Ken-ei - { 1207, 10, 25 }, // Shogen - { 1211, 3, 9 }, // Kenryaku - { 1213, 12, 6 }, // Kenpo - { 1219, 4, 12 }, // Shokyu - { 1222, 4, 13 }, // Joo - { 1224, 11, 20 }, // Gennin - { 1225, 4, 20 }, // Karoku - { 1227, 12, 10 }, // Antei 120 - { 1229, 3, 5 }, // Kanki - { 1232, 4, 2 }, // Joei - { 1233, 4, 15 }, // Tempuku - { 1234, 11, 5 }, // Bunryaku - { 1235, 9, 19 }, // Katei - { 1238, 11, 23 }, // Ryakunin - { 1239, 2, 7 }, // En-o - { 1240, 7, 16 }, // Ninji - { 1243, 2, 26 }, // Kangen - { 1247, 2, 28 }, // Hoji 130 - { 1249, 3, 18 }, // Kencho - { 1256, 10, 5 }, // Kogen - { 1257, 3, 14 }, // Shoka - { 1259, 3, 26 }, // Shogen - { 1260, 4, 13 }, // Bun-o - { 1261, 2, 20 }, // Kocho - { 1264, 2, 28 }, // Bun-ei - { 1275, 4, 25 }, // Kenji - { 1278, 2, 29 }, // Koan - { 1288, 4, 28 }, // Shoo 140 - { 1293, 8, 55 }, // Einin - { 1299, 4, 25 }, // Shoan - { 1302, 11, 21 }, // Kengen - { 1303, 8, 5 }, // Kagen - { 1306, 12, 14 }, // Tokuji - { 1308, 10, 9 }, // Enkei - { 1311, 4, 28 }, // Ocho - { 1312, 3, 20 }, // Showa - { 1317, 2, 3 }, // Bunpo - { 1319, 4, 28 }, // Geno 150 - { 1321, 2, 23 }, // Genkyo - { 1324, 12, 9 }, // Shochu - { 1326, 4, 26 }, // Kareki - { 1329, 8, 29 }, // Gentoku - { 1331, 8, 9 }, // Genko - { 1334, 1, 29 }, // Kemmu - { 1336, 2, 29 }, // Engen - { 1340, 4, 28 }, // Kokoku - { 1346, 12, 8 }, // Shohei - { 1370, 7, 24 }, // Kentoku 160 - { 1372, 4, 1 }, // Bunch\u0169 - { 1375, 5, 27 }, // Tenju - { 1379, 3, 22 }, // Koryaku - { 1381, 2, 10 }, // Kowa - { 1384, 4, 28 }, // Gench\u0169 - { 1384, 2, 27 }, // Meitoku - { 1387, 8, 23 }, // Kakei - { 1389, 2, 9 }, // Koo - { 1390, 3, 26 }, // Meitoku - { 1394, 7, 5 }, // Oei 170 - { 1428, 4, 27 }, // Shocho - { 1429, 9, 5 }, // Eikyo - { 1441, 2, 17 }, // Kakitsu - { 1444, 2, 5 }, // Bun-an - { 1449, 7, 28 }, // Hotoku - { 1452, 7, 25 }, // Kyotoku - { 1455, 7, 25 }, // Kosho - { 1457, 9, 28 }, // Choroku - { 1460, 12, 21 }, // Kansho - { 1466, 2, 28 }, // Bunsho 180 - { 1467, 3, 3 }, // Onin - { 1469, 4, 28 }, // Bunmei - { 1487, 7, 29 }, // Chokyo - { 1489, 8, 21 }, // Entoku - { 1492, 7, 19 }, // Meio - { 1501, 2, 29 }, // Bunki - { 1504, 2, 30 }, // Eisho - { 1521, 8, 23 }, // Taiei - { 1528, 8, 20 }, // Kyoroku - { 1532, 7, 29 }, // Tenmon 190 - { 1555, 10, 23 }, // Koji - { 1558, 2, 28 }, // Eiroku - { 1570, 4, 23 }, // Genki - { 1573, 7, 28 }, // Tensho - { 1592, 12, 8 }, // Bunroku - { 1596, 10, 27 }, // Keicho - { 1615, 7, 13 }, // Genwa - { 1624, 2, 30 }, // Kan-ei - { 1644, 12, 16 }, // Shoho - { 1648, 2, 15 }, // Keian 200 - { 1652, 9, 18 }, // Shoo - { 1655, 4, 13 }, // Meiryaku - { 1658, 7, 23 }, // Manji - { 1661, 4, 25 }, // Kanbun - { 1673, 9, 21 }, // Enpo - { 1681, 9, 29 }, // Tenwa - { 1684, 2, 21 }, // Jokyo - { 1688, 9, 30 }, // Genroku - { 1704, 3, 13 }, // Hoei - { 1711, 4, 25 }, // Shotoku 210 - { 1716, 6, 22 }, // Kyoho - { 1736, 4, 28 }, // Genbun - { 1741, 2, 27 }, // Kanpo - { 1744, 2, 21 }, // Enkyo - { 1748, 7, 12 }, // Kan-en - { 1751, 10, 27 }, // Horyaku - { 1764, 6, 2 }, // Meiwa - { 1772, 11, 16 }, // An-ei - { 1781, 4, 2 }, // Tenmei - { 1789, 1, 25 }, // Kansei 220 - { 1801, 2, 5 }, // Kyowa - { 1804, 2, 11 }, // Bunka - { 1818, 4, 22 }, // Bunsei - { 1830, 12, 10 }, // Tenpo - { 1844, 12, 2 }, // Koka - { 1848, 2, 28 }, // Kaei - { 1854, 11, 27 }, // Ansei - { 1860, 3, 18 }, // Man-en - { 1861, 2, 19 }, // Bunkyu - { 1864, 2, 20 }, // Genji 230 - { 1865, 4, 7 }, // Keio 231 - { 1868, 9, 8 }, // Meiji 232 - { 1912, 7, 30 }, // Taisho 233 - { 1926, 12, 25 }, // Showa 234 - { 1989, 1, 8 } // Heisei 235 -}; - -#define kEraCount UPRV_LENGTHOF(kEraInfo) - -/** - * The current era, for reference. - */ -static const int32_t kCurrentEra = (kEraCount-1); // int32_t to match the calendar field type - static const int32_t kGregorianEpoch = 1970; // used as the default value of EXTENDED_YEAR +static const char* TENTATIVE_ERA_VAR_NAME = "ICU_ENABLE_TENTATIVE_ERA"; + +// Initialize global Japanese era data +static void U_CALLCONV initializeEras(UErrorCode &status) { + // Although start date of next Japanese era is planned ahead, a name of + // new era might not be available. This implementation allows tester to + // check a new era without era names by settings below (in priority order). + // By default, such tentative era is disabled. + + // 1. Environment variable ICU_ENABLE_TENTATIVE_ERA=true or false + + UBool includeTentativeEra = FALSE; + +#if U_PLATFORM_HAS_WINUWP_API == 1 + // UWP doesn't allow access to getenv(), but we can call GetEnvironmentVariableW to do the same thing. + UChar varName[26] = {}; + u_charsToUChars(TENTATIVE_ERA_VAR_NAME, varName, static_cast(uprv_strlen(TENTATIVE_ERA_VAR_NAME))); + WCHAR varValue[5] = {}; + DWORD ret = GetEnvironmentVariableW(reinterpret_cast(varName), varValue, UPRV_LENGTHOF(varValue)); + if ((ret == 4) && (_wcsicmp(varValue, L"true") == 0)) { + includeTentativeEra = TRUE; + } +#else + char *envVarVal = getenv(TENTATIVE_ERA_VAR_NAME); + if (envVarVal != NULL && uprv_stricmp(envVarVal, "true") == 0) { + includeTentativeEra = TRUE; + } +#endif + gJapaneseEraRules = EraRules::createInstance("japanese", includeTentativeEra, status); + if (U_FAILURE(status)) { + return; + } + gCurrentEra = gJapaneseEraRules->getCurrentEraIndex(); +} + +static void init(UErrorCode &status) { + umtx_initOnce(gJapaneseEraRulesInitOnce, &initializeEras, status); + ucln_i18n_registerCleanup(UCLN_I18N_JAPANESE_CALENDAR, japanese_calendar_cleanup); +} /* Some platforms don't like to export constants, like old Palm OS and some z/OS configurations. */ uint32_t JapaneseCalendar::getCurrentEra() { - return kCurrentEra; + return gCurrentEra; } JapaneseCalendar::JapaneseCalendar(const Locale& aLocale, UErrorCode& success) : GregorianCalendar(aLocale, success) { + init(success); setTimeInMillis(getNow(), success); // Call this again now that the vtable is set up properly. } @@ -306,6 +108,9 @@ JapaneseCalendar::~JapaneseCalendar() JapaneseCalendar::JapaneseCalendar(const JapaneseCalendar& source) : GregorianCalendar(source) { + UErrorCode status = U_ZERO_ERROR; + init(status); + U_ASSERT(U_SUCCESS(status)); } JapaneseCalendar& JapaneseCalendar::operator= ( const JapaneseCalendar& right) @@ -332,10 +137,14 @@ int32_t JapaneseCalendar::getDefaultMonthInYear(int32_t eyear) int32_t month = 0; // Find out if we are at the edge of an era - - if(eyear == kEraInfo[era].year) { + int32_t eraStart[3] = { 0,0,0 }; + UErrorCode status = U_ZERO_ERROR; + gJapaneseEraRules->getStartDate(era, eraStart, status); + U_ASSERT(U_SUCCESS(status)); + if(eyear == eraStart[0]) { // Yes, we're in the first year of this era. - return kEraInfo[era].month-1; + return eraStart[1] // month + -1; // return 0-based month } return month; @@ -346,9 +155,13 @@ int32_t JapaneseCalendar::getDefaultDayInMonth(int32_t eyear, int32_t month) int32_t era = internalGetEra(); int32_t day = 1; - if(eyear == kEraInfo[era].year) { - if(month == (kEraInfo[era].month-1)) { - return kEraInfo[era].day; + int32_t eraStart[3] = { 0,0,0 }; + UErrorCode status = U_ZERO_ERROR; + gJapaneseEraRules->getStartDate(era, eraStart, status); + U_ASSERT(U_SUCCESS(status)); + if(eyear == eraStart[0]) { + if(month == eraStart[1] - 1) { + return eraStart[2]; } } @@ -358,7 +171,7 @@ int32_t JapaneseCalendar::getDefaultDayInMonth(int32_t eyear, int32_t month) int32_t JapaneseCalendar::internalGetEra() const { - return internalGet(UCAL_ERA, kCurrentEra); + return internalGet(UCAL_ERA, gCurrentEra); } int32_t JapaneseCalendar::handleGetExtendedYear() @@ -369,12 +182,18 @@ int32_t JapaneseCalendar::handleGetExtendedYear() if (newerField(UCAL_EXTENDED_YEAR, UCAL_YEAR) == UCAL_EXTENDED_YEAR && newerField(UCAL_EXTENDED_YEAR, UCAL_ERA) == UCAL_EXTENDED_YEAR) { - year = internalGet(UCAL_EXTENDED_YEAR, kGregorianEpoch); - } else { - // Subtract one because year starts at 1 - year = internalGet(UCAL_YEAR) + kEraInfo[internalGetEra()].year - 1; - } - return year; + year = internalGet(UCAL_EXTENDED_YEAR, kGregorianEpoch); + } else { + UErrorCode status = U_ZERO_ERROR; + int32_t eraStartYear = gJapaneseEraRules->getStartYear(internalGet(UCAL_ERA, gCurrentEra), status); + U_ASSERT(U_SUCCESS(status)); + + // extended year is a gregorian year, where 1 = 1AD, 0 = 1BC, -1 = 2BC, etc + year = internalGet(UCAL_YEAR, 1) // pin to minimum of year 1 (first year) + + eraStartYear // add gregorian starting year + - 1; // Subtract one because year starts at 1 + } + return year; } @@ -383,79 +202,10 @@ void JapaneseCalendar::handleComputeFields(int32_t julianDay, UErrorCode& status //Calendar::timeToFields(theTime, quick, status); GregorianCalendar::handleComputeFields(julianDay, status); int32_t year = internalGet(UCAL_EXTENDED_YEAR); // Gregorian year + int32_t eraIdx = gJapaneseEraRules->getEraIndex(year, internalGet(UCAL_MONTH) + 1, internalGet(UCAL_DAY_OF_MONTH), status); - int32_t low = 0; - - // Short circuit for recent years. Most modern computations will - // occur in the current era and won't require the binary search. - // Note that if the year is == the current era year, then we use - // the binary search to handle the month/dom comparison. -#ifdef U_DEBUG_JCAL - fprintf(stderr, "== %d \n", year); -#endif - - if (year > kEraInfo[kCurrentEra].year) { - low = kCurrentEra; -#ifdef U_DEBUG_JCAL - fprintf(stderr, " low=%d (special)\n", low); -#endif - } else { - // Binary search - int32_t high = kEraCount; - -#ifdef U_DEBUG_JCAL - fprintf(stderr, " high=%d\n", high); -#endif - while (low < high - 1) { - int32_t i = (low + high) / 2; - int32_t diff = year - kEraInfo[i].year; - -#ifdef U_DEBUG_JCAL - fprintf(stderr, " d=%d low=%d, high=%d. Considering %d:M%d D%d Y%d. { we are ?:M%d D%d Y%d }\n", - diff,low, high, i, kEraInfo[i].month-1, kEraInfo[i].day, kEraInfo[i].year, internalGet(UCAL_MONTH), internalGet(UCAL_DATE),year); -#endif - - // If years are the same, then compare the months, and if those - // are the same, compare days of month. In the ERAS array - // months are 1-based for easier maintenance. - if (diff == 0) { - diff = internalGet(UCAL_MONTH) - (kEraInfo[i].month - 1); -#ifdef U_DEBUG_JCAL - fprintf(stderr, "diff now %d (M) = %d - %d - 1\n", diff, internalGet(UCAL_MONTH), kEraInfo[i].month); -#endif - if (diff == 0) { - diff = internalGet(UCAL_DATE) - kEraInfo[i].day; -#ifdef U_DEBUG_JCAL - fprintf(stderr, "diff now %d (D)\n", diff); -#endif - } - } - if (diff >= 0) { - low = i; - } else { - high = i; - } -#ifdef U_DEBUG_JCAL - fprintf(stderr, ". low=%d, high=%d, i=%d, diff=%d.. %d\n", low, high, i, diff, year); -#endif - - } - } - -#ifdef U_DEBUG_JCAL - fprintf(stderr, " low[era]=%d,.. %d\n", low, year); -#endif - // Now we've found the last era that starts before this date, so - // adjust the year to count from the start of that era. Note that - // all dates before the first era will fall into the first era by - // the algorithm. - - internalSet(UCAL_ERA, low); - internalSet(UCAL_YEAR, year - kEraInfo[low].year + 1); -#ifdef U_DEBUG_JCAL - fprintf(stderr, " Set ERA=%d, year=%d\n", low, year-kEraInfo[low].year+1); -#endif - + internalSet(UCAL_ERA, eraIdx); + internalSet(UCAL_YEAR, year - gJapaneseEraRules->getStartYear(eraIdx, status) + 1); } /* @@ -483,7 +233,7 @@ int32_t JapaneseCalendar::handleGetLimit(UCalendarDateFields field, ELimitType l if (limitType == UCAL_LIMIT_MINIMUM || limitType == UCAL_LIMIT_GREATEST_MINIMUM) { return 0; } - return kCurrentEra; + return gCurrentEra; case UCAL_YEAR: { switch (limitType) { @@ -494,7 +244,12 @@ int32_t JapaneseCalendar::handleGetLimit(UCalendarDateFields field, ELimitType l return 1; case UCAL_LIMIT_COUNT: //added to avoid warning case UCAL_LIMIT_MAXIMUM: - return GregorianCalendar::handleGetLimit(UCAL_YEAR, UCAL_LIMIT_MAXIMUM) - kEraInfo[kCurrentEra].year; + { + UErrorCode status = U_ZERO_ERROR; + int32_t eraStartYear = gJapaneseEraRules->getStartYear(gCurrentEra, status); + U_ASSERT(U_SUCCESS(status)); + return GregorianCalendar::handleGetLimit(UCAL_YEAR, UCAL_LIMIT_MAXIMUM) - eraStartYear; + } default: return 1; // Error condition, invalid limitType } @@ -510,15 +265,18 @@ int32_t JapaneseCalendar::getActualMaximum(UCalendarDateFields field, UErrorCode if (U_FAILURE(status)) { return 0; // error case... any value } - if (era == kCurrentEra) { + if (era == gCurrentEra) { // TODO: Investigate what value should be used here - revisit after 4.0. return handleGetLimit(UCAL_YEAR, UCAL_LIMIT_MAXIMUM); } else { - int32_t nextEraYear = kEraInfo[era + 1].year; - int32_t nextEraMonth = kEraInfo[era + 1].month; - int32_t nextEraDate = kEraInfo[era + 1].day; - - int32_t maxYear = nextEraYear - kEraInfo[era].year + 1; // 1-base + int32_t nextEraStart[3] = { 0,0,0 }; + gJapaneseEraRules->getStartDate(era + 1, nextEraStart, status); + int32_t nextEraYear = nextEraStart[0]; + int32_t nextEraMonth = nextEraStart[1]; // 1-base + int32_t nextEraDate = nextEraStart[2]; + + int32_t eraStartYear = gJapaneseEraRules->getStartYear(era, status); + int32_t maxYear = nextEraYear - eraStartYear + 1; // 1-base if (nextEraMonth == 1 && nextEraDate == 1) { // Subtract 1, because the next era starts at Jan 1 maxYear--; diff --git a/deps/icu-small/source/i18n/japancal.h b/deps/icu-small/source/i18n/japancal.h index 356351d28dd28a..67d2d7031526a2 100644 --- a/deps/icu-small/source/i18n/japancal.h +++ b/deps/icu-small/source/i18n/japancal.h @@ -49,10 +49,18 @@ U_NAMESPACE_BEGIN * July 30, 1912 (Taisho), December 25, 1926 (Showa), and January 7, 1989 (Heisei). Constants * for these eras, suitable for use in the UCAL_ERA field, are provided * in this class. Note that the number used for each era is more or - * less arbitrary. Currently, the era starting in 1053 AD is era #0; however this - * may change in the future as we add more historical data. Use the predefined - * constants rather than using actual, absolute numbers. + * less arbitrary. Currently, the era starting in 645 AD is era #0; however this + * may change in the future. Use the predefined constants rather than using actual, + * absolute numbers. *

+ * Since ICU4C 63, start date of each era is imported from CLDR. CLDR era data + * may contain tentative era in near future with placeholder names. By default, + * such era data is not enabled. ICU4C users who want to test the behavior of + * the future era can enable this one of following settings (in the priority + * order): + *

    + *
  1. Environment variable ICU_ENABLE_TENTATIVE_ERA=true.
  2. + * * @internal */ class JapaneseCalendar : public GregorianCalendar { diff --git a/deps/icu-small/source/common/listformatter.cpp b/deps/icu-small/source/i18n/listformatter.cpp similarity index 71% rename from deps/icu-small/source/common/listformatter.cpp rename to deps/icu-small/source/i18n/listformatter.cpp index 33a8ac28671fc6..d9195348529c57 100644 --- a/deps/icu-small/source/common/listformatter.cpp +++ b/deps/icu-small/source/i18n/listformatter.cpp @@ -16,14 +16,19 @@ * created by: Umesh P. Nair */ +#include "cmemory.h" +#include "unicode/fpositer.h" // FieldPositionIterator #include "unicode/listformatter.h" #include "unicode/simpleformatter.h" +#include "unicode/ulistformatter.h" +#include "fphdlimp.h" #include "mutex.h" #include "hash.h" #include "cstring.h" +#include "uarrsort.h" #include "ulocimp.h" #include "charstr.h" -#include "ucln_cmn.h" +#include "ucln_in.h" #include "uresimp.h" #include "resource.h" @@ -61,14 +66,14 @@ ListFormatInternal(const ListFormatInternal &other) : -static Hashtable* listPatternHash = NULL; +static Hashtable* listPatternHash = nullptr; static UMutex listFormatterMutex = U_MUTEX_INITIALIZER; static const char STANDARD_STYLE[] = "standard"; U_CDECL_BEGIN static UBool U_CALLCONV uprv_listformatter_cleanup() { delete listPatternHash; - listPatternHash = NULL; + listPatternHash = nullptr; return TRUE; } @@ -81,7 +86,7 @@ U_CDECL_END ListFormatter::ListFormatter(const ListFormatter& other) : owned(other.owned), data(other.data) { - if (other.owned != NULL) { + if (other.owned != nullptr) { owned = new ListFormatInternal(*other.owned); data = owned; } @@ -96,7 +101,7 @@ ListFormatter& ListFormatter::operator=(const ListFormatter& other) { owned = new ListFormatInternal(*other.owned); data = owned; } else { - owned = NULL; + owned = nullptr; data = other.data; } return *this; @@ -108,53 +113,53 @@ void ListFormatter::initializeHash(UErrorCode& errorCode) { } listPatternHash = new Hashtable(); - if (listPatternHash == NULL) { + if (listPatternHash == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; return; } listPatternHash->setValueDeleter(uprv_deleteListFormatInternal); - ucln_common_registerCleanup(UCLN_COMMON_LIST_FORMATTER, uprv_listformatter_cleanup); + ucln_i18n_registerCleanup(UCLN_I18N_LIST_FORMATTER, uprv_listformatter_cleanup); } const ListFormatInternal* ListFormatter::getListFormatInternal( const Locale& locale, const char *style, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { - return NULL; + return nullptr; } CharString keyBuffer(locale.getName(), errorCode); keyBuffer.append(':', errorCode).append(style, errorCode); UnicodeString key(keyBuffer.data(), -1, US_INV); - ListFormatInternal* result = NULL; + ListFormatInternal* result = nullptr; { Mutex m(&listFormatterMutex); - if (listPatternHash == NULL) { + if (listPatternHash == nullptr) { initializeHash(errorCode); if (U_FAILURE(errorCode)) { - return NULL; + return nullptr; } } result = static_cast(listPatternHash->get(key)); } - if (result != NULL) { + if (result != nullptr) { return result; } result = loadListFormatInternal(locale, style, errorCode); if (U_FAILURE(errorCode)) { - return NULL; + return nullptr; } { Mutex m(&listFormatterMutex); ListFormatInternal* temp = static_cast(listPatternHash->get(key)); - if (temp != NULL) { + if (temp != nullptr) { delete result; result = temp; } else { listPatternHash->put(key, result, errorCode); if (U_FAILURE(errorCode)) { - return NULL; + return nullptr; } } } @@ -235,11 +240,11 @@ ListFormatter::ListPatternsSink::~ListPatternsSink() {} ListFormatInternal* ListFormatter::loadListFormatInternal( const Locale& locale, const char * style, UErrorCode& errorCode) { - UResourceBundle* rb = ures_open(NULL, locale.getName(), &errorCode); + UResourceBundle* rb = ures_open(nullptr, locale.getName(), &errorCode); rb = ures_getByKeyWithFallback(rb, "listPattern", rb, &errorCode); if (U_FAILURE(errorCode)) { ures_close(rb); - return NULL; + return nullptr; } ListFormatter::ListPatternsSink sink; char currentStyle[kStyleLenMax+1]; @@ -255,20 +260,20 @@ ListFormatInternal* ListFormatter::loadListFormatInternal( } ures_close(rb); if (U_FAILURE(errorCode)) { - return NULL; + return nullptr; } if (sink.two.isEmpty() || sink.start.isEmpty() || sink.middle.isEmpty() || sink.end.isEmpty()) { errorCode = U_MISSING_RESOURCE_ERROR; - return NULL; + return nullptr; } ListFormatInternal* result = new ListFormatInternal(sink.two, sink.start, sink.middle, sink.end, errorCode); - if (result == NULL) { + if (result == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; + return nullptr; } if (U_FAILURE(errorCode)) { delete result; - return NULL; + return nullptr; } return result; } @@ -283,15 +288,14 @@ ListFormatter* ListFormatter::createInstance(const Locale& locale, UErrorCode& e } ListFormatter* ListFormatter::createInstance(const Locale& locale, const char *style, UErrorCode& errorCode) { - Locale tempLocale = locale; - const ListFormatInternal* listFormatInternal = getListFormatInternal(tempLocale, style, errorCode); + const ListFormatInternal* listFormatInternal = getListFormatInternal(locale, style, errorCode); if (U_FAILURE(errorCode)) { - return NULL; + return nullptr; } ListFormatter* p = new ListFormatter(listFormatInternal); - if (p == NULL) { + if (p == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; - return NULL; + return nullptr; } return p; } @@ -301,7 +305,7 @@ ListFormatter::ListFormatter(const ListFormatData& listFormatData, UErrorCode &e data = owned; } -ListFormatter::ListFormatter(const ListFormatInternal* listFormatterInternal) : owned(NULL), data(listFormatterInternal) { +ListFormatter::ListFormatter(const ListFormatInternal* listFormatterInternal) : owned(nullptr), data(listFormatterInternal) { } ListFormatter::~ListFormatter() { @@ -323,6 +327,8 @@ static void joinStringsAndReplace( UnicodeString &result, UBool recordOffset, int32_t &offset, + int32_t *offsetFirst, + int32_t *offsetSecond, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; @@ -348,6 +354,8 @@ static void joinStringsAndReplace( } else if (offset >= 0) { offset += offsets[0]; } + if (offsetFirst != nullptr) *offsetFirst = offsets[0]; + if (offsetSecond != nullptr) *offsetSecond = offsets[1]; } UnicodeString& ListFormatter::format( @@ -359,6 +367,19 @@ UnicodeString& ListFormatter::format( return format(items, nItems, appendTo, -1, offset, errorCode); } +#if !UCONFIG_NO_FORMATTING +UnicodeString& ListFormatter::format( + const UnicodeString items[], + int32_t nItems, + UnicodeString & appendTo, + FieldPositionIterator* posIter, + UErrorCode& errorCode) const { + int32_t offset; + FieldPositionIteratorHandler handler(posIter, errorCode); + return format_(items, nItems, appendTo, -1, offset, &handler, errorCode); +}; +#endif + UnicodeString& ListFormatter::format( const UnicodeString items[], int32_t nItems, @@ -366,11 +387,23 @@ UnicodeString& ListFormatter::format( int32_t index, int32_t &offset, UErrorCode& errorCode) const { + return format_(items, nItems, appendTo, index, offset, nullptr, errorCode); +} + +UnicodeString& ListFormatter::format_( + const UnicodeString items[], + int32_t nItems, + UnicodeString& appendTo, + int32_t index, + int32_t &offset, + FieldPositionHandler* handler, + UErrorCode& errorCode) const { +#if !UCONFIG_NO_FORMATTING offset = -1; if (U_FAILURE(errorCode)) { return appendTo; } - if (data == NULL) { + if (data == nullptr) { errorCode = U_INVALID_STATE_ERROR; return appendTo; } @@ -382,6 +415,11 @@ UnicodeString& ListFormatter::format( if (index == 0) { offset = appendTo.length(); } + if (handler != nullptr) { + handler->addAttribute(ULISTFMT_ELEMENT_FIELD, + appendTo.length(), + appendTo.length() + items[0].length()); + } appendTo.append(items[0]); return appendTo; } @@ -389,6 +427,12 @@ UnicodeString& ListFormatter::format( if (index == 0) { offset = 0; } + int32_t offsetFirst; + int32_t offsetSecond; + int32_t prefixLength = 0; + // for n items, there are 2 * (n + 1) boundary including 0 and the upper + // edge. + MaybeStackArray offsets((handler != nullptr) ? 2 * (nItems + 1): 0); joinStringsAndReplace( nItems == 2 ? data->twoPattern : data->startPattern, result, @@ -396,7 +440,14 @@ UnicodeString& ListFormatter::format( result, index == 1, offset, + &offsetFirst, + &offsetSecond, errorCode); + if (handler != nullptr) { + offsets[0] = 0; + prefixLength += offsetFirst; + offsets[1] = offsetSecond - prefixLength; + } if (nItems > 2) { for (int32_t i = 2; i < nItems - 1; ++i) { joinStringsAndReplace( @@ -406,7 +457,13 @@ UnicodeString& ListFormatter::format( result, index == i, offset, + &offsetFirst, + &offsetSecond, errorCode); + if (handler != nullptr) { + prefixLength += offsetFirst; + offsets[i] = offsetSecond - prefixLength; + } } joinStringsAndReplace( data->endPattern, @@ -415,7 +472,45 @@ UnicodeString& ListFormatter::format( result, index == nItems - 1, offset, + &offsetFirst, + &offsetSecond, errorCode); + if (handler != nullptr) { + prefixLength += offsetFirst; + offsets[nItems - 1] = offsetSecond - prefixLength; + } + } + if (handler != nullptr) { + // If there are already some data in appendTo, we need to adjust the index + // by shifting that lenght while insert into handler. + int32_t shift = appendTo.length() + prefixLength; + // Output the ULISTFMT_ELEMENT_FIELD in the order of the input elements + for (int32_t i = 0; i < nItems; ++i) { + offsets[i + nItems] = offsets[i] + items[i].length() + shift; + offsets[i] += shift; + handler->addAttribute( + ULISTFMT_ELEMENT_FIELD, // id + offsets[i], // index + offsets[i + nItems]); // limit + } + // The locale pattern may reorder the items (such as in ur-IN locale), + // so we cannot assume the array is in accendning order. + // To handle the edging case, just insert the two ends into the array + // and sort. Then we output ULISTFMT_LITERAL_FIELD if the indecies + // between the even and odd position are not the same in the sorted array. + offsets[2 * nItems] = shift - prefixLength; + offsets[2 * nItems + 1] = result.length() + shift - prefixLength; + uprv_sortArray(offsets.getAlias(), 2 * (nItems + 1), sizeof(int32_t), + uprv_int32Comparator, nullptr, + false, &errorCode); + for (int32_t i = 0; i <= nItems; ++i) { + if (offsets[i * 2] != offsets[i * 2 + 1]) { + handler->addAttribute( + ULISTFMT_LITERAL_FIELD, // id + offsets[i * 2], // index + offsets[i * 2 + 1]); // limit + } + } } if (U_SUCCESS(errorCode)) { if (offset >= 0) { @@ -423,6 +518,7 @@ UnicodeString& ListFormatter::format( } appendTo += result; } +#endif return appendTo; } diff --git a/deps/icu-small/source/i18n/measfmt.cpp b/deps/icu-small/source/i18n/measfmt.cpp index 996a20c2e043e8..03974ff4b74048 100644 --- a/deps/icu-small/source/i18n/measfmt.cpp +++ b/deps/icu-small/source/i18n/measfmt.cpp @@ -47,7 +47,7 @@ U_NAMESPACE_BEGIN static constexpr int32_t PER_UNIT_INDEX = StandardPlural::COUNT; static constexpr int32_t PATTERN_COUNT = PER_UNIT_INDEX + 1; -static constexpr int32_t MEAS_UNIT_COUNT = 138; // see assertion in MeasureFormatCacheData constructor +static constexpr int32_t MEAS_UNIT_COUNT = 142; // see assertion in MeasureFormatCacheData constructor static constexpr int32_t WIDTH_INDEX_COUNT = UMEASFMT_WIDTH_NARROW + 1; UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MeasureFormat) @@ -618,7 +618,7 @@ MeasureFormat::MeasureFormat( : cache(NULL), numberFormat(NULL), pluralRules(NULL), - width(w), + fWidth(w), listFormatter(NULL) { initMeasureFormat(locale, w, NULL, status); } @@ -631,7 +631,7 @@ MeasureFormat::MeasureFormat( : cache(NULL), numberFormat(NULL), pluralRules(NULL), - width(w), + fWidth(w), listFormatter(NULL) { initMeasureFormat(locale, w, nfToAdopt, status); } @@ -641,7 +641,7 @@ MeasureFormat::MeasureFormat(const MeasureFormat &other) : cache(other.cache), numberFormat(other.numberFormat), pluralRules(other.pluralRules), - width(other.width), + fWidth(other.fWidth), listFormatter(NULL) { cache->addRef(); numberFormat->addRef(); @@ -659,7 +659,7 @@ MeasureFormat &MeasureFormat::operator=(const MeasureFormat &other) { SharedObject::copyPtr(other.cache, cache); SharedObject::copyPtr(other.numberFormat, numberFormat); SharedObject::copyPtr(other.pluralRules, pluralRules); - width = other.width; + fWidth = other.fWidth; delete listFormatter; if (other.listFormatter != NULL) { listFormatter = new ListFormatter(*other.listFormatter); @@ -673,7 +673,7 @@ MeasureFormat::MeasureFormat() : cache(NULL), numberFormat(NULL), pluralRules(NULL), - width(UMEASFMT_WIDTH_SHORT), + fWidth(UMEASFMT_WIDTH_SHORT), listFormatter(NULL) { } @@ -703,7 +703,7 @@ UBool MeasureFormat::operator==(const Format &other) const { // don't have to check it here. // differing widths aren't equivalent - if (width != rhs.width) { + if (fWidth != rhs.fWidth) { return FALSE; } // Width the same check locales. @@ -805,7 +805,7 @@ UnicodeString &MeasureFormat::formatMeasures( if (measureCount == 1) { return formatMeasure(measures[0], **numberFormat, appendTo, pos, status); } - if (width == UMEASFMT_WIDTH_NUMERIC) { + if (fWidth == UMEASFMT_WIDTH_NUMERIC) { Formattable hms[3]; int32_t bitMap = toHMS(measures, measureCount, hms, status); if (bitMap > 0) { @@ -839,7 +839,7 @@ UnicodeString &MeasureFormat::formatMeasures( } UnicodeString MeasureFormat::getUnitDisplayName(const MeasureUnit& unit, UErrorCode& /*status*/) const { - UMeasureFormatWidth width = getRegularWidth(this->width); + UMeasureFormatWidth width = getRegularWidth(fWidth); const UChar* const* styleToDnam = cache->dnams[unit.getIndex()]; const UChar* dnam = styleToDnam[width]; if (dnam == NULL) { @@ -895,11 +895,11 @@ void MeasureFormat::initMeasureFormat( return; } } - width = w; + fWidth = w; delete listFormatter; listFormatter = ListFormatter::createInstance( locale, - listStyles[getRegularWidth(width)], + listStyles[getRegularWidth(fWidth)], status); } @@ -922,7 +922,7 @@ UBool MeasureFormat::setMeasureFormatLocale(const Locale &locale, UErrorCode &st if (U_FAILURE(status) || locale == getLocale(status)) { return FALSE; } - initMeasureFormat(locale, width, NULL, status); + initMeasureFormat(locale, fWidth, NULL, status); return U_SUCCESS(status); } @@ -956,7 +956,7 @@ UnicodeString &MeasureFormat::formatMeasure( if (isCurrency(amtUnit)) { UChar isoCode[4]; u_charsToUChars(amtUnit.getSubtype(), isoCode, 4); - return cache->getCurrencyFormat(width)->format( + return cache->getCurrencyFormat(fWidth)->format( new CurrencyAmount(amtNumber, isoCode, status), appendTo, pos, @@ -965,7 +965,7 @@ UnicodeString &MeasureFormat::formatMeasure( UnicodeString formattedNumber; StandardPlural::Form pluralForm = QuantityFormatter::selectPlural( amtNumber, nf, **pluralRules, formattedNumber, pos, status); - const SimpleFormatter *formatter = getPluralFormatter(amtUnit, width, pluralForm, status); + const SimpleFormatter *formatter = getPluralFormatter(amtUnit, fWidth, pluralForm, status); return QuantityFormatter::format(*formatter, formattedNumber, appendTo, pos, status); } @@ -1016,7 +1016,6 @@ UnicodeString &MeasureFormat::formatNumeric( return appendTo; break; } - return appendTo; } static void appendRange( @@ -1173,7 +1172,7 @@ int32_t MeasureFormat::withPerUnitAndAppend( if (U_FAILURE(status)) { return offset; } - const SimpleFormatter *perUnitFormatter = getFormatterOrNull(perUnit, width, PER_UNIT_INDEX); + const SimpleFormatter *perUnitFormatter = getFormatterOrNull(perUnit, fWidth, PER_UNIT_INDEX); if (perUnitFormatter != NULL) { const UnicodeString *params[] = {&formatted}; perUnitFormatter->formatAndAppend( @@ -1185,9 +1184,9 @@ int32_t MeasureFormat::withPerUnitAndAppend( status); return offset; } - const SimpleFormatter *perFormatter = getPerFormatter(width, status); + const SimpleFormatter *perFormatter = getPerFormatter(fWidth, status); const SimpleFormatter *pattern = - getPluralFormatter(perUnit, width, StandardPlural::ONE, status); + getPluralFormatter(perUnit, fWidth, StandardPlural::ONE, status); if (U_FAILURE(status)) { return offset; } diff --git a/deps/icu-small/source/i18n/measunit.cpp b/deps/icu-small/source/i18n/measunit.cpp index dc156aa720aae0..f6059f8c6dc808 100644 --- a/deps/icu-small/source/i18n/measunit.cpp +++ b/deps/icu-small/source/i18n/measunit.cpp @@ -39,23 +39,23 @@ static const int32_t gOffsets[] = { 2, 7, 16, - 20, - 24, - 321, - 331, - 342, - 346, - 352, - 356, - 376, - 377, - 388, - 391, - 397, + 22, + 26, + 325, + 336, + 347, + 351, + 357, + 361, + 381, + 382, + 393, + 396, 402, - 406, - 410, - 435 + 408, + 412, + 416, + 441 }; static const int32_t gIndexes[] = { @@ -63,23 +63,23 @@ static const int32_t gIndexes[] = { 2, 7, 16, - 20, - 24, - 24, - 34, - 45, - 49, - 55, - 59, - 79, - 80, - 91, + 22, + 26, + 26, + 37, + 48, + 52, + 58, + 62, + 82, + 83, 94, - 100, - 105, + 97, + 103, 109, 113, - 138 + 117, + 142 }; // Must be sorted alphabetically. @@ -128,6 +128,8 @@ static const char * const gSubTypes[] = { "milligram-per-deciliter", "millimole-per-liter", "part-per-million", + "percent", + "permille", "liter-per-100kilometers", "liter-per-kilometer", "mile-per-gallon", @@ -388,9 +390,11 @@ static const char * const gSubTypes[] = { "UYN", "UYP", "UYU", + "UYW", "UZS", "VEB", "VEF", + "VES", "VNC", "VND", "VUV", @@ -437,6 +441,7 @@ static const char * const gSubTypes[] = { "kilobyte", "megabit", "megabyte", + "petabyte", "terabit", "terabyte", "century", @@ -505,6 +510,7 @@ static const char * const gSubTypes[] = { "megawatt", "milliwatt", "watt", + "atmosphere", "hectopascal", "inch-hg", "millibar", @@ -547,14 +553,14 @@ static const char * const gSubTypes[] = { // Must be sorted by first value and then second value. static int32_t unitPerUnitToSingleUnit[][4] = { - {363, 333, 17, 0}, - {365, 339, 17, 2}, - {367, 333, 17, 3}, - {367, 424, 4, 2}, - {367, 425, 4, 3}, - {382, 422, 3, 1}, - {385, 11, 16, 4}, - {427, 363, 4, 1} + {368, 338, 17, 0}, + {370, 344, 17, 2}, + {372, 338, 17, 3}, + {372, 430, 4, 2}, + {372, 431, 4, 3}, + {387, 428, 3, 1}, + {390, 11, 16, 5}, + {433, 368, 4, 1} }; // Shortcuts to the base unit in order to make the default constructor fast @@ -641,6 +647,14 @@ MeasureUnit *MeasureUnit::createPartPerMillion(UErrorCode &status) { return MeasureUnit::create(3, 3, status); } +MeasureUnit *MeasureUnit::createPercent(UErrorCode &status) { + return MeasureUnit::create(3, 4, status); +} + +MeasureUnit *MeasureUnit::createPermille(UErrorCode &status) { + return MeasureUnit::create(3, 5, status); +} + MeasureUnit *MeasureUnit::createLiterPer100Kilometers(UErrorCode &status) { return MeasureUnit::create(4, 0, status); } @@ -689,14 +703,18 @@ MeasureUnit *MeasureUnit::createMegabyte(UErrorCode &status) { return MeasureUnit::create(6, 7, status); } -MeasureUnit *MeasureUnit::createTerabit(UErrorCode &status) { +MeasureUnit *MeasureUnit::createPetabyte(UErrorCode &status) { return MeasureUnit::create(6, 8, status); } -MeasureUnit *MeasureUnit::createTerabyte(UErrorCode &status) { +MeasureUnit *MeasureUnit::createTerabit(UErrorCode &status) { return MeasureUnit::create(6, 9, status); } +MeasureUnit *MeasureUnit::createTerabyte(UErrorCode &status) { + return MeasureUnit::create(6, 10, status); +} + MeasureUnit *MeasureUnit::createCentury(UErrorCode &status) { return MeasureUnit::create(7, 0, status); } @@ -949,26 +967,30 @@ MeasureUnit *MeasureUnit::createWatt(UErrorCode &status) { return MeasureUnit::create(15, 5, status); } -MeasureUnit *MeasureUnit::createHectopascal(UErrorCode &status) { +MeasureUnit *MeasureUnit::createAtmosphere(UErrorCode &status) { return MeasureUnit::create(16, 0, status); } -MeasureUnit *MeasureUnit::createInchHg(UErrorCode &status) { +MeasureUnit *MeasureUnit::createHectopascal(UErrorCode &status) { return MeasureUnit::create(16, 1, status); } -MeasureUnit *MeasureUnit::createMillibar(UErrorCode &status) { +MeasureUnit *MeasureUnit::createInchHg(UErrorCode &status) { return MeasureUnit::create(16, 2, status); } -MeasureUnit *MeasureUnit::createMillimeterOfMercury(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMillibar(UErrorCode &status) { return MeasureUnit::create(16, 3, status); } -MeasureUnit *MeasureUnit::createPoundPerSquareInch(UErrorCode &status) { +MeasureUnit *MeasureUnit::createMillimeterOfMercury(UErrorCode &status) { return MeasureUnit::create(16, 4, status); } +MeasureUnit *MeasureUnit::createPoundPerSquareInch(UErrorCode &status) { + return MeasureUnit::create(16, 5, status); +} + MeasureUnit *MeasureUnit::createKilometerPerHour(UErrorCode &status) { return MeasureUnit::create(17, 0, status); } diff --git a/deps/icu-small/source/i18n/msgfmt.cpp b/deps/icu-small/source/i18n/msgfmt.cpp index 8b3807e67148a4..8ff86a2cacf018 100644 --- a/deps/icu-small/source/i18n/msgfmt.cpp +++ b/deps/icu-small/source/i18n/msgfmt.cpp @@ -1078,7 +1078,7 @@ void MessageFormat::format(int32_t msgStart, const void *plNumber, // that formats the number without subtracting the offset. appendTo.formatAndAppend(pluralNumber.formatter, *arg, success); } - } else if ((formatter = getCachedFormatter(i -2))) { + } else if ((formatter = getCachedFormatter(i -2)) != 0) { // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings. if (dynamic_cast(formatter) || dynamic_cast(formatter) || diff --git a/deps/icu-small/source/i18n/nfrule.cpp b/deps/icu-small/source/i18n/nfrule.cpp index 9f5deb31683291..b5e7892d5e68cb 100644 --- a/deps/icu-small/source/i18n/nfrule.cpp +++ b/deps/icu-small/source/i18n/nfrule.cpp @@ -39,14 +39,14 @@ NFRule::NFRule(const RuleBasedNumberFormat* _rbnf, const UnicodeString &_ruleTex , radix(10) , exponent(0) , decimalPoint(0) - , ruleText(_ruleText) + , fRuleText(_ruleText) , sub1(NULL) , sub2(NULL) , formatter(_rbnf) , rulePatternFormat(NULL) { - if (!ruleText.isEmpty()) { - parseRuleDescriptor(ruleText, status); + if (!fRuleText.isEmpty()) { + parseRuleDescriptor(fRuleText, status); } } @@ -122,7 +122,7 @@ NFRule::makeRules(UnicodeString& description, status = U_MEMORY_ALLOCATION_ERROR; return; } - description = rule1->ruleText; + description = rule1->fRuleText; // check the description to see whether there's text enclosed // in brackets @@ -314,7 +314,7 @@ NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) if (c == gSlash) { val = 0; ++p; - int64_t ll_10 = 10; + ll_10 = 10; while (p < descriptorLength) { c = descriptor.charAt(p); if (c >= gZero && c <= gNine) { @@ -418,7 +418,7 @@ NFRule::extractSubstitutions(const NFRuleSet* ruleSet, if (U_FAILURE(status)) { return; } - this->ruleText = ruleText; + fRuleText = ruleText; sub1 = extractSubstitution(ruleSet, predecessor, status); if (sub1 == NULL) { // Small optimization. There is no need to create a redundant NullSubstitution. @@ -427,15 +427,15 @@ NFRule::extractSubstitutions(const NFRuleSet* ruleSet, else { sub2 = extractSubstitution(ruleSet, predecessor, status); } - int32_t pluralRuleStart = this->ruleText.indexOf(gDollarOpenParenthesis, -1, 0); - int32_t pluralRuleEnd = (pluralRuleStart >= 0 ? this->ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) : -1); + int32_t pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0); + int32_t pluralRuleEnd = (pluralRuleStart >= 0 ? fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) : -1); if (pluralRuleEnd >= 0) { - int32_t endType = this->ruleText.indexOf(gComma, pluralRuleStart); + int32_t endType = fRuleText.indexOf(gComma, pluralRuleStart); if (endType < 0) { status = U_PARSE_ERROR; return; } - UnicodeString type(this->ruleText.tempSubString(pluralRuleStart + 2, endType - pluralRuleStart - 2)); + UnicodeString type(fRuleText.tempSubString(pluralRuleStart + 2, endType - pluralRuleStart - 2)); UPluralType pluralType; if (type.startsWith(UNICODE_STRING_SIMPLE("cardinal"))) { pluralType = UPLURAL_TYPE_CARDINAL; @@ -448,7 +448,7 @@ NFRule::extractSubstitutions(const NFRuleSet* ruleSet, return; } rulePatternFormat = formatter->createPluralFormat(pluralType, - this->ruleText.tempSubString(endType + 1, pluralRuleEnd - endType - 1), status); + fRuleText.tempSubString(endType + 1, pluralRuleEnd - endType - 1), status); } } @@ -484,16 +484,16 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet, // special-case the ">>>" token, since searching for the > at the // end will actually find the > in the middle - if (ruleText.indexOf(gGreaterGreaterGreater, 3, 0) == subStart) { + if (fRuleText.indexOf(gGreaterGreaterGreater, 3, 0) == subStart) { subEnd = subStart + 2; // otherwise the substitution token ends with the same character // it began with } else { - UChar c = ruleText.charAt(subStart); - subEnd = ruleText.indexOf(c, subStart + 1); + UChar c = fRuleText.charAt(subStart); + subEnd = fRuleText.indexOf(c, subStart + 1); // special case for '<%foo<<' - if (c == gLessThan && subEnd != -1 && subEnd < ruleText.length() - 1 && ruleText.charAt(subEnd+1) == c) { + if (c == gLessThan && subEnd != -1 && subEnd < fRuleText.length() - 1 && fRuleText.charAt(subEnd+1) == c) { // ordinals use "=#,##0==%abbrev=" as their rule. Notice that the '==' in the middle // occurs because of the juxtaposition of two different rules. The check for '<' is a hack // to get around this. Having the duplicate at the front would cause problems with @@ -513,12 +513,12 @@ NFRule::extractSubstitution(const NFRuleSet* ruleSet, // some text bounded by substitution token characters). Use // makeSubstitution() to create the right kind of substitution UnicodeString subToken; - subToken.setTo(ruleText, subStart, subEnd + 1 - subStart); + subToken.setTo(fRuleText, subStart, subEnd + 1 - subStart); result = NFSubstitution::makeSubstitution(subStart, this, predecessor, ruleSet, this->formatter, subToken, status); // remove the substitution from the rule text - ruleText.removeBetween(subStart, subEnd+1); + fRuleText.removeBetween(subStart, subEnd+1); return result; } @@ -601,7 +601,7 @@ NFRule::indexOfAnyRulePrefix() const { int result = -1; for (int i = 0; RULE_PREFIXES[i]; i++) { - int32_t pos = ruleText.indexOf(*RULE_PREFIXES[i]); + int32_t pos = fRuleText.indexOf(*RULE_PREFIXES[i]); if (pos != -1 && (result == -1 || pos < result)) { result = pos; } @@ -637,7 +637,7 @@ NFRule::operator==(const NFRule& rhs) const return baseValue == rhs.baseValue && radix == rhs.radix && exponent == rhs.exponent - && ruleText == rhs.ruleText + && fRuleText == rhs.fRuleText && util_equalSubstitutions(sub1, rhs.sub1) && util_equalSubstitutions(sub2, rhs.sub2); } @@ -690,14 +690,14 @@ NFRule::_appendRuleText(UnicodeString& result) const // if the rule text begins with a space, write an apostrophe // (whitespace after the rule descriptor is ignored; the // apostrophe is used to make the whitespace significant) - if (ruleText.charAt(0) == gSpace && (sub1 == NULL || sub1->getPos() != 0)) { + if (fRuleText.charAt(0) == gSpace && (sub1 == NULL || sub1->getPos() != 0)) { result.append(gTick); } // now, write the rule's rule text, inserting appropriate // substitution tokens in the appropriate places UnicodeString ruleTextCopy; - ruleTextCopy.setTo(ruleText); + ruleTextCopy.setTo(fRuleText); UnicodeString temp; if (sub2 != NULL) { @@ -743,24 +743,24 @@ NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32 // into the right places in toInsertInto (notice we do the // substitutions in reverse order so that the offsets don't get // messed up) - int32_t pluralRuleStart = ruleText.length(); + int32_t pluralRuleStart = fRuleText.length(); int32_t lengthOffset = 0; if (!rulePatternFormat) { - toInsertInto.insert(pos, ruleText); + toInsertInto.insert(pos, fRuleText); } else { - pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0); - int pluralRuleEnd = ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart); + pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0); + int pluralRuleEnd = fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart); int initialLength = toInsertInto.length(); - if (pluralRuleEnd < ruleText.length() - 1) { - toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 2)); + if (pluralRuleEnd < fRuleText.length() - 1) { + toInsertInto.insert(pos, fRuleText.tempSubString(pluralRuleEnd + 2)); } toInsertInto.insert(pos, rulePatternFormat->format((int32_t)(number/util64_pow(radix, exponent)), status)); if (pluralRuleStart > 0) { - toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart)); + toInsertInto.insert(pos, fRuleText.tempSubString(0, pluralRuleStart)); } - lengthOffset = ruleText.length() - (toInsertInto.length() - initialLength); + lengthOffset = fRuleText.length() - (toInsertInto.length() - initialLength); } if (sub2 != NULL) { @@ -789,17 +789,17 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_ // [again, we have two copies of this routine that do the same thing // so that we don't sacrifice precision in a long by casting it // to a double] - int32_t pluralRuleStart = ruleText.length(); + int32_t pluralRuleStart = fRuleText.length(); int32_t lengthOffset = 0; if (!rulePatternFormat) { - toInsertInto.insert(pos, ruleText); + toInsertInto.insert(pos, fRuleText); } else { - pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0); - int pluralRuleEnd = ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart); + pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0); + int pluralRuleEnd = fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart); int initialLength = toInsertInto.length(); - if (pluralRuleEnd < ruleText.length() - 1) { - toInsertInto.insert(pos, ruleText.tempSubString(pluralRuleEnd + 2)); + if (pluralRuleEnd < fRuleText.length() - 1) { + toInsertInto.insert(pos, fRuleText.tempSubString(pluralRuleEnd + 2)); } double pluralVal = number; if (0 <= pluralVal && pluralVal < 1) { @@ -812,9 +812,9 @@ NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_ } toInsertInto.insert(pos, rulePatternFormat->format((int32_t)(pluralVal), status)); if (pluralRuleStart > 0) { - toInsertInto.insert(pos, ruleText.tempSubString(0, pluralRuleStart)); + toInsertInto.insert(pos, fRuleText.tempSubString(0, pluralRuleStart)); } - lengthOffset = ruleText.length() - (toInsertInto.length() - initialLength); + lengthOffset = fRuleText.length() - (toInsertInto.length() - initialLength); } if (sub2 != NULL) { @@ -908,15 +908,15 @@ NFRule::doParse(const UnicodeString& text, ParsePosition pp; UnicodeString workText(text); - int32_t sub1Pos = sub1 != NULL ? sub1->getPos() : ruleText.length(); - int32_t sub2Pos = sub2 != NULL ? sub2->getPos() : ruleText.length(); + int32_t sub1Pos = sub1 != NULL ? sub1->getPos() : fRuleText.length(); + int32_t sub2Pos = sub2 != NULL ? sub2->getPos() : fRuleText.length(); // check to see whether the text before the first substitution // matches the text at the beginning of the string being // parsed. If it does, strip that off the front of workText; // otherwise, dump out with a mismatch UnicodeString prefix; - prefix.setTo(ruleText, 0, sub1Pos); + prefix.setTo(fRuleText, 0, sub1Pos); #ifdef RBNF_DEBUG fprintf(stderr, "doParse %p ", this); @@ -1000,7 +1000,7 @@ NFRule::doParse(const UnicodeString& text, // the substitution, giving us a new partial parse result pp.setIndex(0); - temp.setTo(ruleText, sub1Pos, sub2Pos - sub1Pos); + temp.setTo(fRuleText, sub1Pos, sub2Pos - sub1Pos); double partialResult = matchToDelimiter(workText, start, tempBaseValue, temp, pp, sub1, nonNumericalExecutedRuleMask, @@ -1021,7 +1021,7 @@ NFRule::doParse(const UnicodeString& text, // partial result with whatever it gets back from its // substitution if there's a successful match, giving us // a real result - temp.setTo(ruleText, sub2Pos, ruleText.length() - sub2Pos); + temp.setTo(fRuleText, sub2Pos, fRuleText.length() - sub2Pos); partialResult = matchToDelimiter(workText2, 0, partialResult, temp, pp2, sub2, nonNumericalExecutedRuleMask, @@ -1039,18 +1039,18 @@ NFRule::doParse(const UnicodeString& text, else { // commented out because ParsePosition doesn't have error index in 1.1.x // restored for ICU4C port - int32_t temp = pp2.getErrorIndex() + sub1Pos + pp.getIndex(); - if (temp> parsePosition.getErrorIndex()) { - parsePosition.setErrorIndex(temp); + int32_t i_temp = pp2.getErrorIndex() + sub1Pos + pp.getIndex(); + if (i_temp> parsePosition.getErrorIndex()) { + parsePosition.setErrorIndex(i_temp); } } } else { // commented out because ParsePosition doesn't have error index in 1.1.x // restored for ICU4C port - int32_t temp = sub1Pos + pp.getErrorIndex(); - if (temp > parsePosition.getErrorIndex()) { - parsePosition.setErrorIndex(temp); + int32_t i_temp = sub1Pos + pp.getErrorIndex(); + if (i_temp > parsePosition.getErrorIndex()) { + parsePosition.setErrorIndex(i_temp); } } // keep trying to match things until the outer matchToDelimiter() @@ -1483,11 +1483,11 @@ NFRule::findText(const UnicodeString& str, rulePatternFormat->parseType(str, this, result, position); int start = position.getBeginIndex(); if (start >= 0) { - int32_t pluralRuleStart = ruleText.indexOf(gDollarOpenParenthesis, -1, 0); - int32_t pluralRuleSuffix = ruleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) + 2; + int32_t pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0); + int32_t pluralRuleSuffix = fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) + 2; int32_t matchLen = position.getEndIndex() - start; - UnicodeString prefix(ruleText.tempSubString(0, pluralRuleStart)); - UnicodeString suffix(ruleText.tempSubString(pluralRuleSuffix)); + UnicodeString prefix(fRuleText.tempSubString(0, pluralRuleStart)); + UnicodeString suffix(fRuleText.tempSubString(pluralRuleSuffix)); if (str.compare(start - prefix.length(), prefix.length(), prefix, 0, prefix.length()) == 0 && str.compare(start + matchLen, suffix.length(), suffix, 0, suffix.length()) == 0) { diff --git a/deps/icu-small/source/i18n/nfrule.h b/deps/icu-small/source/i18n/nfrule.h index 843a4a0762bb01..2b030390ea7dcb 100644 --- a/deps/icu-small/source/i18n/nfrule.h +++ b/deps/icu-small/source/i18n/nfrule.h @@ -109,7 +109,7 @@ class NFRule : public UMemory { int32_t radix; int16_t exponent; UChar decimalPoint; - UnicodeString ruleText; + UnicodeString fRuleText; NFSubstitution* sub1; NFSubstitution* sub2; const RuleBasedNumberFormat* formatter; diff --git a/deps/icu-small/source/i18n/number_compact.cpp b/deps/icu-small/source/i18n/number_compact.cpp index 40278e1a012e54..10942c35f535df 100644 --- a/deps/icu-small/source/i18n/number_compact.cpp +++ b/deps/icu-small/source/i18n/number_compact.cpp @@ -273,13 +273,13 @@ void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micr if (U_FAILURE(status)) { return; } // Treat zero as if it had magnitude 0 - int magnitude; + int32_t magnitude; if (quantity.isZero()) { magnitude = 0; micros.rounder.apply(quantity, status); } else { // TODO: Revisit chooseMultiplierAndApply - int multiplier = micros.rounder.chooseMultiplierAndApply(quantity, data, status); + int32_t multiplier = micros.rounder.chooseMultiplierAndApply(quantity, data, status); magnitude = quantity.isZero() ? 0 : quantity.getMagnitude(); magnitude -= multiplier; } diff --git a/deps/icu-small/source/i18n/number_decimalquantity.cpp b/deps/icu-small/source/i18n/number_decimalquantity.cpp index 9d80e3349cb8aa..2c4182b1c6ecda 100644 --- a/deps/icu-small/source/i18n/number_decimalquantity.cpp +++ b/deps/icu-small/source/i18n/number_decimalquantity.cpp @@ -1154,8 +1154,31 @@ const char16_t* DecimalQuantity::checkHealth() const { } bool DecimalQuantity::operator==(const DecimalQuantity& other) const { - // FIXME: Make a faster implementation. - return toString() == other.toString(); + bool basicEquals = + scale == other.scale + && precision == other.precision + && flags == other.flags + && lOptPos == other.lOptPos + && lReqPos == other.lReqPos + && rReqPos == other.rReqPos + && rOptPos == other.rOptPos + && isApproximate == other.isApproximate; + if (!basicEquals) { + return false; + } + + if (precision == 0) { + return true; + } else if (isApproximate) { + return origDouble == other.origDouble && origDelta == other.origDelta; + } else { + for (int m = getUpperDisplayMagnitude(); m >= getLowerDisplayMagnitude(); m--) { + if (getDigit(m) != other.getDigit(m)) { + return false; + } + } + return true; + } } UnicodeString DecimalQuantity::toString() const { diff --git a/deps/icu-small/source/i18n/number_decimfmtprops.cpp b/deps/icu-small/source/i18n/number_decimfmtprops.cpp index 6754fe19eca56c..12fe7060e2d051 100644 --- a/deps/icu-small/source/i18n/number_decimfmtprops.cpp +++ b/deps/icu-small/source/i18n/number_decimfmtprops.cpp @@ -15,6 +15,7 @@ using namespace icu::number::impl; namespace { +alignas(DecimalFormatProperties) char kRawDefaultProperties[sizeof(DecimalFormatProperties)]; icu::UInitOnce gDefaultPropertiesInitOnce = U_INITONCE_INITIALIZER; diff --git a/deps/icu-small/source/i18n/number_fluent.cpp b/deps/icu-small/source/i18n/number_fluent.cpp index 687adb6b5babd2..a66e3bd0f23510 100644 --- a/deps/icu-small/source/i18n/number_fluent.cpp +++ b/deps/icu-small/source/i18n/number_fluent.cpp @@ -363,6 +363,7 @@ UnlocalizedNumberFormatter::UnlocalizedNumberFormatter(const NFS& other) // No additional fields to assign } +// Make default copy constructor call the NumberFormatterSettings copy constructor. UnlocalizedNumberFormatter::UnlocalizedNumberFormatter(UNF&& src) U_NOEXCEPT : UNF(static_cast&&>(src)) {} @@ -383,6 +384,7 @@ UnlocalizedNumberFormatter& UnlocalizedNumberFormatter::operator=(UNF&& src) U_N return *this; } +// Make default copy constructor call the NumberFormatterSettings copy constructor. LocalizedNumberFormatter::LocalizedNumberFormatter(const LNF& other) : LNF(static_cast&>(other)) {} @@ -405,7 +407,8 @@ LocalizedNumberFormatter::LocalizedNumberFormatter(NFS&& src) U_NOEXCEPT LocalizedNumberFormatter& LocalizedNumberFormatter::operator=(const LNF& other) { NFS::operator=(static_cast&>(other)); - // No additional fields to assign (let call count and compiled formatter reset to defaults) + // Reset to default values. + clear(); return *this; } @@ -417,20 +420,26 @@ LocalizedNumberFormatter& LocalizedNumberFormatter::operator=(LNF&& src) U_NOEXC // Formatter is compiled lnfMoveHelper(static_cast(src)); } else { - // Reset to default values. - auto* callCount = reinterpret_cast(fUnsafeCallCount); - umtx_storeRelease(*callCount, 0); - fCompiled = nullptr; + clear(); } return *this; } +void LocalizedNumberFormatter::clear() { + // Reset to default values. + auto* callCount = reinterpret_cast(fUnsafeCallCount); + umtx_storeRelease(*callCount, 0); + delete fCompiled; + fCompiled = nullptr; +} + void LocalizedNumberFormatter::lnfMoveHelper(LNF&& src) { // Copy over the compiled formatter and set call count to INT32_MIN as in computeCompiled(). // Don't copy the call count directly because doing so requires a loadAcquire/storeRelease. // The bits themselves appear to be platform-dependent, so copying them might not be safe. auto* callCount = reinterpret_cast(fUnsafeCallCount); umtx_storeRelease(*callCount, INT32_MIN); + delete fCompiled; fCompiled = src.fCompiled; // Reset the source object to leave it in a safe state. auto* srcCallCount = reinterpret_cast(src.fUnsafeCallCount); @@ -657,9 +666,9 @@ LocalizedNumberFormatter::formatDecimalQuantity(const DecimalQuantity& dq, UErro void LocalizedNumberFormatter::formatImpl(impl::UFormattedNumberData* results, UErrorCode& status) const { if (computeCompiled(status)) { - fCompiled->apply(results->quantity, results->string, status); + fCompiled->format(results->quantity, results->string, status); } else { - NumberFormatterImpl::applyStatic(fMacros, results->quantity, results->string, status); + NumberFormatterImpl::formatStatic(fMacros, results->quantity, results->string, status); } } @@ -706,7 +715,11 @@ bool LocalizedNumberFormatter::computeCompiled(UErrorCode& status) const { if (currentCount == fMacros.threshold && fMacros.threshold > 0) { // Build the data structure and then use it (slow to fast path). - const NumberFormatterImpl* compiled = NumberFormatterImpl::fromMacros(fMacros, status); + const NumberFormatterImpl* compiled = new NumberFormatterImpl(fMacros, status); + if (compiled == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return false; + } U_ASSERT(fCompiled == nullptr); const_cast(this)->fCompiled = compiled; umtx_storeRelease(*callCount, INT32_MIN); @@ -776,7 +789,7 @@ Appendable& FormattedNumber::appendTo(Appendable& appendable) { return appendTo(appendable, localStatus); } -Appendable& FormattedNumber::appendTo(Appendable& appendable, UErrorCode& status) { +Appendable& FormattedNumber::appendTo(Appendable& appendable, UErrorCode& status) const { if (U_FAILURE(status)) { return appendable; } diff --git a/deps/icu-small/source/i18n/number_formatimpl.cpp b/deps/icu-small/source/i18n/number_formatimpl.cpp index 3f887128bcc668..60c18ee284e238 100644 --- a/deps/icu-small/source/i18n/number_formatimpl.cpp +++ b/deps/icu-small/source/i18n/number_formatimpl.cpp @@ -67,14 +67,18 @@ getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& sta MicroPropsGenerator::~MicroPropsGenerator() = default; -NumberFormatterImpl* NumberFormatterImpl::fromMacros(const MacroProps& macros, UErrorCode& status) { - return new NumberFormatterImpl(macros, true, status); +NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, UErrorCode& status) + : NumberFormatterImpl(macros, true, status) { } -void NumberFormatterImpl::applyStatic(const MacroProps& macros, DecimalQuantity& inValue, - NumberStringBuilder& outString, UErrorCode& status) { +int32_t NumberFormatterImpl::formatStatic(const MacroProps& macros, DecimalQuantity& inValue, + NumberStringBuilder& outString, UErrorCode& status) { NumberFormatterImpl impl(macros, false, status); - impl.applyUnsafe(inValue, outString, status); + MicroProps& micros = impl.preProcessUnsafe(inValue, status); + if (U_FAILURE(status)) { return 0; } + int32_t length = writeNumber(micros, inValue, outString, 0, status); + length += writeAffixes(micros, outString, 0, length, status); + return length; } int32_t NumberFormatterImpl::getPrefixSuffixStatic(const MacroProps& macros, int8_t signum, @@ -89,22 +93,40 @@ int32_t NumberFormatterImpl::getPrefixSuffixStatic(const MacroProps& macros, int // The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation. // See MicroProps::processQuantity() for details. -void NumberFormatterImpl::apply(DecimalQuantity& inValue, NumberStringBuilder& outString, +int32_t NumberFormatterImpl::format(DecimalQuantity& inValue, NumberStringBuilder& outString, UErrorCode& status) const { - if (U_FAILURE(status)) { return; } MicroProps micros; - if (!fMicroPropsGenerator) { return; } - fMicroPropsGenerator->processQuantity(inValue, micros, status); - if (U_FAILURE(status)) { return; } - microsToString(micros, inValue, outString, status); + preProcess(inValue, micros, status); + if (U_FAILURE(status)) { return 0; } + int32_t length = writeNumber(micros, inValue, outString, 0, status); + length += writeAffixes(micros, outString, 0, length, status); + return length; } -void NumberFormatterImpl::applyUnsafe(DecimalQuantity& inValue, NumberStringBuilder& outString, - UErrorCode& status) { +void NumberFormatterImpl::preProcess(DecimalQuantity& inValue, MicroProps& microsOut, + UErrorCode& status) const { if (U_FAILURE(status)) { return; } + if (fMicroPropsGenerator == nullptr) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } + fMicroPropsGenerator->processQuantity(inValue, microsOut, status); + microsOut.rounder.apply(inValue, status); + microsOut.integerWidth.apply(inValue, status); +} + +MicroProps& NumberFormatterImpl::preProcessUnsafe(DecimalQuantity& inValue, UErrorCode& status) { + if (U_FAILURE(status)) { + return fMicros; // must always return a value + } + if (fMicroPropsGenerator == nullptr) { + status = U_INTERNAL_PROGRAM_ERROR; + return fMicros; // must always return a value + } fMicroPropsGenerator->processQuantity(inValue, fMicros, status); - if (U_FAILURE(status)) { return; } - microsToString(fMicros, inValue, outString, status); + fMicros.rounder.apply(inValue, status); + fMicros.integerWidth.apply(inValue, status); + return fMicros; } int32_t NumberFormatterImpl::getPrefixSuffix(int8_t signum, StandardPlural::Form plural, @@ -115,7 +137,7 @@ int32_t NumberFormatterImpl::getPrefixSuffix(int8_t signum, StandardPlural::Form const Modifier* modifier = fImmutablePatternModifier->getModifier(signum, plural); modifier->apply(outString, 0, 0, status); if (U_FAILURE(status)) { return 0; } - return modifier->getPrefixLength(status); + return modifier->getPrefixLength(); } int32_t NumberFormatterImpl::getPrefixSuffixUnsafe(int8_t signum, StandardPlural::Form plural, @@ -126,7 +148,7 @@ int32_t NumberFormatterImpl::getPrefixSuffixUnsafe(int8_t signum, StandardPlural fPatternModifier->setNumberProperties(signum, plural); fPatternModifier->apply(outString, 0, 0, status); if (U_FAILURE(status)) { return 0; } - return fPatternModifier->getPrefixLength(status); + return fPatternModifier->getPrefixLength(); } NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, bool safe, UErrorCode& status) { @@ -344,25 +366,23 @@ NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, // Outer modifier (CLDR units and currency long names) if (isCldrUnit) { fLongNameHandler.adoptInstead( - new LongNameHandler( - LongNameHandler::forMeasureUnit( - macros.locale, - macros.unit, - macros.perUnit, - unitWidth, - resolvePluralRules(macros.rules, macros.locale, status), - chain, - status))); + LongNameHandler::forMeasureUnit( + macros.locale, + macros.unit, + macros.perUnit, + unitWidth, + resolvePluralRules(macros.rules, macros.locale, status), + chain, + status)); chain = fLongNameHandler.getAlias(); } else if (isCurrency && unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) { fLongNameHandler.adoptInstead( - new LongNameHandler( - LongNameHandler::forCurrencyLongNames( - macros.locale, - currency, - resolvePluralRules(macros.rules, macros.locale, status), - chain, - status))); + LongNameHandler::forCurrencyLongNames( + macros.locale, + currency, + resolvePluralRules(macros.rules, macros.locale, status), + chain, + status)); chain = fLongNameHandler.getAlias(); } else { // No outer modifier required @@ -404,50 +424,46 @@ NumberFormatterImpl::resolvePluralRules(const PluralRules* rulesPtr, const Local return fRules.getAlias(); } -int32_t NumberFormatterImpl::microsToString(const MicroProps& micros, DecimalQuantity& quantity, - NumberStringBuilder& string, UErrorCode& status) { - micros.rounder.apply(quantity, status); - micros.integerWidth.apply(quantity, status); - int32_t length = writeNumber(micros, quantity, string, status); - // NOTE: When range formatting is added, these modifiers can bubble up. - // For now, apply them all here at once. +int32_t NumberFormatterImpl::writeAffixes(const MicroProps& micros, NumberStringBuilder& string, + int32_t start, int32_t end, UErrorCode& status) { // Always apply the inner modifier (which is "strong"). - length += micros.modInner->apply(string, 0, length, status); + int32_t length = micros.modInner->apply(string, start, end, status); if (micros.padding.isValid()) { length += micros.padding - .padAndApply(*micros.modMiddle, *micros.modOuter, string, 0, length, status); + .padAndApply(*micros.modMiddle, *micros.modOuter, string, start, length + end, status); } else { - length += micros.modMiddle->apply(string, 0, length, status); - length += micros.modOuter->apply(string, 0, length, status); + length += micros.modMiddle->apply(string, start, length + end, status); + length += micros.modOuter->apply(string, start, length + end, status); } return length; } int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuantity& quantity, - NumberStringBuilder& string, UErrorCode& status) { + NumberStringBuilder& string, int32_t index, + UErrorCode& status) { int32_t length = 0; if (quantity.isInfinite()) { length += string.insert( - length, + length + index, micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kInfinitySymbol), UNUM_INTEGER_FIELD, status); } else if (quantity.isNaN()) { length += string.insert( - length, + length + index, micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kNaNSymbol), UNUM_INTEGER_FIELD, status); } else { // Add the integer digits - length += writeIntegerDigits(micros, quantity, string, status); + length += writeIntegerDigits(micros, quantity, string, length + index, status); // Add the decimal point if (quantity.getLowerDisplayMagnitude() < 0 || micros.decimal == UNUM_DECIMAL_SEPARATOR_ALWAYS) { length += string.insert( - length, + length + index, micros.useCurrency ? micros.symbols->getSymbol( DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol) : micros .symbols @@ -458,21 +474,22 @@ int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuanti } // Add the fraction digits - length += writeFractionDigits(micros, quantity, string, status); + length += writeFractionDigits(micros, quantity, string, length + index, status); } return length; } int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, DecimalQuantity& quantity, - NumberStringBuilder& string, UErrorCode& status) { + NumberStringBuilder& string, int32_t index, + UErrorCode& status) { int length = 0; int integerCount = quantity.getUpperDisplayMagnitude() + 1; for (int i = 0; i < integerCount; i++) { // Add grouping separator if (micros.grouping.groupAtPosition(i, quantity)) { length += string.insert( - 0, + index, micros.useCurrency ? micros.symbols->getSymbol( DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol) : micros.symbols->getSymbol( @@ -484,20 +501,21 @@ int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, Decima // Get and append the next digit value int8_t nextDigit = quantity.getDigit(i); length += utils::insertDigitFromSymbols( - string, 0, nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status); + string, index, nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status); } return length; } int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps& micros, DecimalQuantity& quantity, - NumberStringBuilder& string, UErrorCode& status) { + NumberStringBuilder& string, int32_t index, + UErrorCode& status) { int length = 0; int fractionCount = -quantity.getLowerDisplayMagnitude(); for (int i = 0; i < fractionCount; i++) { // Get and append the next digit value int8_t nextDigit = quantity.getDigit(-i - 1); length += utils::insertDigitFromSymbols( - string, string.length(), nextDigit, *micros.symbols, UNUM_FRACTION_FIELD, status); + string, length + index, nextDigit, *micros.symbols, UNUM_FRACTION_FIELD, status); } return length; } diff --git a/deps/icu-small/source/i18n/number_formatimpl.h b/deps/icu-small/source/i18n/number_formatimpl.h index 744fecec13f984..fda38c92845f87 100644 --- a/deps/icu-small/source/i18n/number_formatimpl.h +++ b/deps/icu-small/source/i18n/number_formatimpl.h @@ -29,14 +29,14 @@ class NumberFormatterImpl : public UMemory { * Builds a "safe" MicroPropsGenerator, which is thread-safe and can be used repeatedly. * The caller owns the returned NumberFormatterImpl. */ - static NumberFormatterImpl *fromMacros(const MacroProps ¯os, UErrorCode &status); + NumberFormatterImpl(const MacroProps ¯os, UErrorCode &status); /** * Builds and evaluates an "unsafe" MicroPropsGenerator, which is cheaper but can be used only once. */ - static void - applyStatic(const MacroProps ¯os, DecimalQuantity &inValue, NumberStringBuilder &outString, - UErrorCode &status); + static int32_t + formatStatic(const MacroProps ¯os, DecimalQuantity &inValue, NumberStringBuilder &outString, + UErrorCode &status); /** * Prints only the prefix and suffix; used for DecimalFormat getters. @@ -51,7 +51,12 @@ class NumberFormatterImpl : public UMemory { /** * Evaluates the "safe" MicroPropsGenerator created by "fromMacros". */ - void apply(DecimalQuantity& inValue, NumberStringBuilder& outString, UErrorCode& status) const; + int32_t format(DecimalQuantity& inValue, NumberStringBuilder& outString, UErrorCode& status) const; + + /** + * Like format(), but saves the result into an output MicroProps without additional processing. + */ + void preProcess(DecimalQuantity& inValue, MicroProps& microsOut, UErrorCode& status) const; /** * Like getPrefixSuffixStatic() but uses the safe compiled object. @@ -59,6 +64,19 @@ class NumberFormatterImpl : public UMemory { int32_t getPrefixSuffix(int8_t signum, StandardPlural::Form plural, NumberStringBuilder& outString, UErrorCode& status) const; + /** + * Synthesizes the output string from a MicroProps and DecimalQuantity. + * This method formats only the main number, not affixes. + */ + static int32_t writeNumber(const MicroProps& micros, DecimalQuantity& quantity, + NumberStringBuilder& string, int32_t index, UErrorCode& status); + + /** + * Adds the affixes. Intended to be called immediately after formatNumber. + */ + static int32_t writeAffixes(const MicroProps& micros, NumberStringBuilder& string, int32_t start, + int32_t end, UErrorCode& status); + private: // Head of the MicroPropsGenerator linked list: const MicroPropsGenerator *fMicroPropsGenerator = nullptr; @@ -85,7 +103,7 @@ class NumberFormatterImpl : public UMemory { NumberFormatterImpl(const MacroProps ¯os, bool safe, UErrorCode &status); - void applyUnsafe(DecimalQuantity &inValue, NumberStringBuilder &outString, UErrorCode &status); + MicroProps& preProcessUnsafe(DecimalQuantity &inValue, UErrorCode &status); int32_t getPrefixSuffixUnsafe(int8_t signum, StandardPlural::Form plural, NumberStringBuilder& outString, UErrorCode& status); @@ -113,31 +131,13 @@ class NumberFormatterImpl : public UMemory { const MicroPropsGenerator * macrosToMicroGenerator(const MacroProps ¯os, bool safe, UErrorCode &status); - /** - * Synthesizes the output string from a MicroProps and DecimalQuantity. - * - * @param micros - * The MicroProps after the quantity has been consumed. Will not be mutated. - * @param quantity - * The DecimalQuantity to be rendered. May be mutated. - * @param string - * The output string. Will be mutated. - */ - static int32_t - microsToString(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string, - UErrorCode &status); - - static int32_t - writeNumber(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string, - UErrorCode &status); - static int32_t writeIntegerDigits(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string, - UErrorCode &status); + int32_t index, UErrorCode &status); static int32_t writeFractionDigits(const MicroProps µs, DecimalQuantity &quantity, NumberStringBuilder &string, - UErrorCode &status); + int32_t index, UErrorCode &status); }; } // namespace impl diff --git a/deps/icu-small/source/i18n/number_grouping.cpp b/deps/icu-small/source/i18n/number_grouping.cpp index 4a1cceb49948b9..da32cca99a38df 100644 --- a/deps/icu-small/source/i18n/number_grouping.cpp +++ b/deps/icu-small/source/i18n/number_grouping.cpp @@ -80,7 +80,7 @@ void Grouper::setLocaleData(const impl::ParsedPatternInfo &patternInfo, const Lo if (fMinGrouping == -2) { fMinGrouping = getMinGroupingForLocale(locale); } else if (fMinGrouping == -3) { - fMinGrouping = uprv_max(2, getMinGroupingForLocale(locale)); + fMinGrouping = static_cast(uprv_max(2, getMinGroupingForLocale(locale))); } else { // leave fMinGrouping alone } diff --git a/deps/icu-small/source/i18n/number_longnames.cpp b/deps/icu-small/source/i18n/number_longnames.cpp index 26f9af4c9bdbfe..fd8e8d381a1d76 100644 --- a/deps/icu-small/source/i18n/number_longnames.cpp +++ b/deps/icu-small/source/i18n/number_longnames.cpp @@ -39,7 +39,7 @@ static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) { static UnicodeString getWithPlural( const UnicodeString* strings, - int32_t plural, + StandardPlural::Form plural, UErrorCode& status) { UnicodeString result = strings[plural]; if (result.isBogus()) { @@ -156,7 +156,7 @@ UnicodeString getPerUnitFormat(const Locale& locale, const UNumberUnitWidth &wid } // namespace -LongNameHandler +LongNameHandler* LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, const MeasureUnit &perUnit, const UNumberUnitWidth &width, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { @@ -173,20 +173,28 @@ LongNameHandler::forMeasureUnit(const Locale &loc, const MeasureUnit &unitRef, c } } - LongNameHandler result(rules, parent); + auto* result = new LongNameHandler(rules, parent); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } UnicodeString simpleFormats[ARRAY_LENGTH]; getMeasureData(loc, unit, width, simpleFormats, status); if (U_FAILURE(status)) { return result; } // TODO: What field to use for units? - simpleFormatsToModifiers(simpleFormats, UNUM_FIELD_COUNT, result.fModifiers, status); + result->simpleFormatsToModifiers(simpleFormats, UNUM_FIELD_COUNT, status); return result; } -LongNameHandler +LongNameHandler* LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, const UNumberUnitWidth &width, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { - LongNameHandler result(rules, parent); + auto* result = new LongNameHandler(rules, parent); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } UnicodeString primaryData[ARRAY_LENGTH]; getMeasureData(loc, unit, width, primaryData, status); if (U_FAILURE(status)) { return result; } @@ -213,46 +221,52 @@ LongNameHandler::forCompoundUnit(const Locale &loc, const MeasureUnit &unit, con if (U_FAILURE(status)) { return result; } } // TODO: What field to use for units? - multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_FIELD_COUNT, result.fModifiers, status); + result->multiSimpleFormatsToModifiers(primaryData, perUnitFormat, UNUM_FIELD_COUNT, status); return result; } -LongNameHandler LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, +LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status) { - LongNameHandler result(rules, parent); + auto* result = new LongNameHandler(rules, parent); + if (result == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } UnicodeString simpleFormats[ARRAY_LENGTH]; getCurrencyLongNameData(loc, currency, simpleFormats, status); - if (U_FAILURE(status)) { return result; } - simpleFormatsToModifiers(simpleFormats, UNUM_CURRENCY_FIELD, result.fModifiers, status); + if (U_FAILURE(status)) { return nullptr; } + result->simpleFormatsToModifiers(simpleFormats, UNUM_CURRENCY_FIELD, status); return result; } void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, - SimpleModifier *output, UErrorCode &status) { + UErrorCode &status) { for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { - UnicodeString simpleFormat = getWithPlural(simpleFormats, i, status); + StandardPlural::Form plural = static_cast(i); + UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status); if (U_FAILURE(status)) { return; } SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status); if (U_FAILURE(status)) { return; } - output[i] = SimpleModifier(compiledFormatter, field, false); + fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, 0, plural}); } } void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, - Field field, SimpleModifier *output, UErrorCode &status) { + Field field, UErrorCode &status) { SimpleFormatter trailCompiled(trailFormat, 1, 1, status); if (U_FAILURE(status)) { return; } for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) { - UnicodeString leadFormat = getWithPlural(leadFormats, i, status); + StandardPlural::Form plural = static_cast(i); + UnicodeString leadFormat = getWithPlural(leadFormats, plural, status); if (U_FAILURE(status)) { return; } UnicodeString compoundFormat; trailCompiled.format(leadFormat, compoundFormat, status); if (U_FAILURE(status)) { return; } SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status); if (U_FAILURE(status)) { return; } - output[i] = SimpleModifier(compoundCompiled, field, false); + fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, 0, plural}); } } @@ -265,4 +279,8 @@ void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &mic micros.modOuter = &fModifiers[utils::getStandardPlural(rules, copy)]; } +const Modifier* LongNameHandler::getModifier(int8_t /*signum*/, StandardPlural::Form plural) const { + return &fModifiers[plural]; +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/number_longnames.h b/deps/icu-small/source/i18n/number_longnames.h index 1d1e7dd3e864a6..a71d0caadf125c 100644 --- a/deps/icu-small/source/i18n/number_longnames.h +++ b/deps/icu-small/source/i18n/number_longnames.h @@ -14,13 +14,13 @@ U_NAMESPACE_BEGIN namespace number { namespace impl { -class LongNameHandler : public MicroPropsGenerator, public UMemory { +class LongNameHandler : public MicroPropsGenerator, public ModifierStore, public UMemory { public: - static LongNameHandler + static LongNameHandler* forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status); - static LongNameHandler + static LongNameHandler* forMeasureUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, const UNumberUnitWidth &width, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status); @@ -28,6 +28,8 @@ class LongNameHandler : public MicroPropsGenerator, public UMemory { void processQuantity(DecimalQuantity &quantity, MicroProps µs, UErrorCode &status) const U_OVERRIDE; + const Modifier* getModifier(int8_t signum, StandardPlural::Form plural) const U_OVERRIDE; + private: SimpleModifier fModifiers[StandardPlural::Form::COUNT]; const PluralRules *rules; @@ -36,15 +38,14 @@ class LongNameHandler : public MicroPropsGenerator, public UMemory { LongNameHandler(const PluralRules *rules, const MicroPropsGenerator *parent) : rules(rules), parent(parent) {} - static LongNameHandler + static LongNameHandler* forCompoundUnit(const Locale &loc, const MeasureUnit &unit, const MeasureUnit &perUnit, const UNumberUnitWidth &width, const PluralRules *rules, const MicroPropsGenerator *parent, UErrorCode &status); - static void simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, - SimpleModifier *output, UErrorCode &status); - static void multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, - Field field, SimpleModifier *output, UErrorCode &status); + void simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field, UErrorCode &status); + void multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat, + Field field, UErrorCode &status); }; } // namespace impl diff --git a/deps/icu-small/source/i18n/number_mapper.cpp b/deps/icu-small/source/i18n/number_mapper.cpp index d260632f93b0db..2c9a8e5178f35e 100644 --- a/deps/icu-small/source/i18n/number_mapper.cpp +++ b/deps/icu-small/source/i18n/number_mapper.cpp @@ -225,8 +225,8 @@ MacroProps NumberPropertyMapper::oldToNew(const DecimalFormatProperties& propert // TODO: Overriding here is a bit of a hack. Should this logic go earlier? if (macros.precision.fType == Precision::PrecisionType::RND_FRACTION) { // For the purposes of rounding, get the original min/max int/frac, since the local - // variables - // have been manipulated for display purposes. + // variables have been manipulated for display purposes. + int maxInt_ = properties.maximumIntegerDigits; int minInt_ = properties.minimumIntegerDigits; int minFrac_ = properties.minimumFractionDigits; int maxFrac_ = properties.maximumFractionDigits; @@ -237,9 +237,15 @@ MacroProps NumberPropertyMapper::oldToNew(const DecimalFormatProperties& propert // Patterns like "#.##E0" (no zeros in the mantissa), which mean round to maxFrac+1 macros.precision = Precision::constructSignificant(1, maxFrac_ + 1).withMode(roundingMode); } else { - // All other scientific patterns, which mean round to minInt+maxFrac - macros.precision = Precision::constructSignificant( - minInt_ + minFrac_, minInt_ + maxFrac_).withMode(roundingMode); + int maxSig_ = minInt_ + maxFrac_; + // Bug #20058: if maxInt_ > minInt_ > 1, then minInt_ should be 1. + if (maxInt_ > minInt_ && minInt_ > 1) { + minInt_ = 1; + } + int minSig_ = minInt_ + minFrac_; + // To avoid regression, maxSig is not reset when minInt_ set to 1. + // TODO: Reset maxSig_ = 1 + minFrac_ to follow the spec. + macros.precision = Precision::constructSignificant(minSig_, maxSig_).withMode(roundingMode); } } } diff --git a/deps/icu-small/source/i18n/number_modifiers.cpp b/deps/icu-small/source/i18n/number_modifiers.cpp index 4385499b54f603..d92ec63b08da58 100644 --- a/deps/icu-small/source/i18n/number_modifiers.cpp +++ b/deps/icu-small/source/i18n/number_modifiers.cpp @@ -53,6 +53,21 @@ void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) { Modifier::~Modifier() = default; +Modifier::Parameters::Parameters() + : obj(nullptr) {} + +Modifier::Parameters::Parameters( + const ModifierStore* _obj, int8_t _signum, StandardPlural::Form _plural) + : obj(_obj), signum(_signum), plural(_plural) {} + +ModifierStore::~ModifierStore() = default; + +AdoptingModifierStore::~AdoptingModifierStore() { + for (const Modifier *mod : mods) { + delete mod; + } +} + int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const { @@ -62,13 +77,11 @@ int32_t ConstantAffixModifier::apply(NumberStringBuilder &output, int leftIndex, return length; } -int32_t ConstantAffixModifier::getPrefixLength(UErrorCode &status) const { - (void)status; +int32_t ConstantAffixModifier::getPrefixLength() const { return fPrefix.length(); } -int32_t ConstantAffixModifier::getCodePointCount(UErrorCode &status) const { - (void)status; +int32_t ConstantAffixModifier::getCodePointCount() const { return fPrefix.countChar32() + fSuffix.countChar32(); } @@ -76,8 +89,38 @@ bool ConstantAffixModifier::isStrong() const { return fStrong; } +bool ConstantAffixModifier::containsField(UNumberFormatFields field) const { + (void)field; + // This method is not currently used. + U_ASSERT(false); + return false; +} + +void ConstantAffixModifier::getParameters(Parameters& output) const { + (void)output; + // This method is not currently used. + U_ASSERT(false); +} + +bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const { + auto* _other = dynamic_cast(&other); + if (_other == nullptr) { + return false; + } + return fPrefix == _other->fPrefix + && fSuffix == _other->fSuffix + && fField == _other->fField + && fStrong == _other->fStrong; +} + + SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong) - : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong) { + : SimpleModifier(simpleFormatter, field, strong, {}) {} + +SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong, + const Modifier::Parameters parameters) + : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong), + fParameters(parameters) { int32_t argLimit = SimpleFormatter::getArgumentLimit( fCompiledPattern.getBuffer(), fCompiledPattern.length()); if (argLimit == 0) { @@ -90,15 +133,19 @@ SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field fie } else { U_ASSERT(argLimit == 1); if (fCompiledPattern.charAt(1) != 0) { + // Found prefix fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT; fSuffixOffset = 3 + fPrefixLength; } else { + // No prefix fPrefixLength = 0; fSuffixOffset = 2; } if (3 + fPrefixLength < fCompiledPattern.length()) { + // Found suffix fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT; } else { + // No suffix fSuffixLength = 0; } } @@ -113,13 +160,11 @@ int32_t SimpleModifier::apply(NumberStringBuilder &output, int leftIndex, int ri return formatAsPrefixSuffix(output, leftIndex, rightIndex, fField, status); } -int32_t SimpleModifier::getPrefixLength(UErrorCode &status) const { - (void)status; +int32_t SimpleModifier::getPrefixLength() const { return fPrefixLength; } -int32_t SimpleModifier::getCodePointCount(UErrorCode &status) const { - (void)status; +int32_t SimpleModifier::getCodePointCount() const { int32_t count = 0; if (fPrefixLength > 0) { count += fCompiledPattern.countChar32(2, fPrefixLength); @@ -134,10 +179,35 @@ bool SimpleModifier::isStrong() const { return fStrong; } +bool SimpleModifier::containsField(UNumberFormatFields field) const { + (void)field; + // This method is not currently used. + U_ASSERT(false); + return false; +} + +void SimpleModifier::getParameters(Parameters& output) const { + output = fParameters; +} + +bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const { + auto* _other = dynamic_cast(&other); + if (_other == nullptr) { + return false; + } + if (fParameters.obj != nullptr) { + return fParameters.obj == _other->fParameters.obj; + } + return fCompiledPattern == _other->fCompiledPattern + && fField == _other->fField + && fStrong == _other->fStrong; +} + + int32_t SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex, Field field, UErrorCode &status) const { - if (fSuffixOffset == -1) { + if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) { // There is no argument for the inner number; overwrite the entire segment with our string. return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, field, status); } else { @@ -157,6 +227,65 @@ SimpleModifier::formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startI } } + +int32_t +SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, NumberStringBuilder& result, + int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength, + Field field, UErrorCode& status) { + const UnicodeString& compiledPattern = compiled.compiledPattern; + int32_t argLimit = SimpleFormatter::getArgumentLimit( + compiledPattern.getBuffer(), compiledPattern.length()); + if (argLimit != 2) { + status = U_INTERNAL_PROGRAM_ERROR; + return 0; + } + int32_t offset = 1; // offset into compiledPattern + int32_t length = 0; // chars added to result + + int32_t prefixLength = compiledPattern.charAt(offset); + offset++; + if (prefixLength < ARG_NUM_LIMIT) { + // No prefix + prefixLength = 0; + } else { + prefixLength -= ARG_NUM_LIMIT; + result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status); + offset += prefixLength; + length += prefixLength; + offset++; + } + + int32_t infixLength = compiledPattern.charAt(offset); + offset++; + if (infixLength < ARG_NUM_LIMIT) { + // No infix + infixLength = 0; + } else { + infixLength -= ARG_NUM_LIMIT; + result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status); + offset += infixLength; + length += infixLength; + offset++; + } + + int32_t suffixLength; + if (offset == compiledPattern.length()) { + // No suffix + suffixLength = 0; + } else { + suffixLength = compiledPattern.charAt(offset) - ARG_NUM_LIMIT; + offset++; + result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status); + length += suffixLength; + } + + *outPrefixLength = prefixLength; + *outSuffixLength = suffixLength; + + return length; +} + + int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftIndex, int rightIndex, UErrorCode &status) const { int32_t length = output.insert(leftIndex, fPrefix, status); @@ -171,13 +300,11 @@ int32_t ConstantMultiFieldModifier::apply(NumberStringBuilder &output, int leftI return length; } -int32_t ConstantMultiFieldModifier::getPrefixLength(UErrorCode &status) const { - (void)status; +int32_t ConstantMultiFieldModifier::getPrefixLength() const { return fPrefix.length(); } -int32_t ConstantMultiFieldModifier::getCodePointCount(UErrorCode &status) const { - (void)status; +int32_t ConstantMultiFieldModifier::getCodePointCount() const { return fPrefix.codePointCount() + fSuffix.codePointCount(); } @@ -185,6 +312,29 @@ bool ConstantMultiFieldModifier::isStrong() const { return fStrong; } +bool ConstantMultiFieldModifier::containsField(UNumberFormatFields field) const { + return fPrefix.containsField(field) || fSuffix.containsField(field); +} + +void ConstantMultiFieldModifier::getParameters(Parameters& output) const { + output = fParameters; +} + +bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const { + auto* _other = dynamic_cast(&other); + if (_other == nullptr) { + return false; + } + if (fParameters.obj != nullptr) { + return fParameters.obj == _other->fParameters.obj; + } + return fPrefix.contentEquals(_other->fPrefix) + && fSuffix.contentEquals(_other->fSuffix) + && fOverwrite == _other->fOverwrite + && fStrong == _other->fStrong; +} + + CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, bool overwrite, diff --git a/deps/icu-small/source/i18n/number_modifiers.h b/deps/icu-small/source/i18n/number_modifiers.h index a553100cd92a9b..65ada937d03345 100644 --- a/deps/icu-small/source/i18n/number_modifiers.h +++ b/deps/icu-small/source/i18n/number_modifiers.h @@ -31,12 +31,18 @@ class U_I18N_API ConstantAffixModifier : public Modifier, public UObject { int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const U_OVERRIDE; - int32_t getPrefixLength(UErrorCode &status) const U_OVERRIDE; + int32_t getPrefixLength() const U_OVERRIDE; - int32_t getCodePointCount(UErrorCode &status) const U_OVERRIDE; + int32_t getCodePointCount() const U_OVERRIDE; bool isStrong() const U_OVERRIDE; + bool containsField(UNumberFormatFields field) const U_OVERRIDE; + + void getParameters(Parameters& output) const U_OVERRIDE; + + bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE; + private: UnicodeString fPrefix; UnicodeString fSuffix; @@ -52,21 +58,30 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory { public: SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong); + SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong, + const Modifier::Parameters parameters); + // Default constructor for LongNameHandler.h SimpleModifier(); int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const U_OVERRIDE; - int32_t getPrefixLength(UErrorCode &status) const U_OVERRIDE; + int32_t getPrefixLength() const U_OVERRIDE; - int32_t getCodePointCount(UErrorCode &status) const U_OVERRIDE; + int32_t getCodePointCount() const U_OVERRIDE; bool isStrong() const U_OVERRIDE; + bool containsField(UNumberFormatFields field) const U_OVERRIDE; + + void getParameters(Parameters& output) const U_OVERRIDE; + + bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE; + /** * TODO: This belongs in SimpleFormatterImpl. The only reason I haven't moved it there yet is because - * DoubleSidedStringBuilder is an internal class and SimpleFormatterImpl feels like it should not depend on it. + * NumberStringBuilder is an internal class and SimpleFormatterImpl feels like it should not depend on it. * *

    * Formats a value that is already stored inside the StringBuilder result between the indices @@ -85,16 +100,33 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory { * @return The number of characters (UTF-16 code points) that were added to the StringBuilder. */ int32_t - formatAsPrefixSuffix(NumberStringBuilder &result, int32_t startIndex, int32_t endIndex, Field field, - UErrorCode &status) const; + formatAsPrefixSuffix(NumberStringBuilder& result, int32_t startIndex, int32_t endIndex, Field field, + UErrorCode& status) const; + + /** + * TODO: Like above, this belongs with the rest of the SimpleFormatterImpl code. + * I put it here so that the SimpleFormatter uses in NumberStringBuilder are near each other. + * + *

    + * Applies the compiled two-argument pattern to the NumberStringBuilder. + * + *

    + * This method is optimized for the case where the prefix and suffix are often empty, such as + * in the range pattern like "{0}-{1}". + */ + static int32_t + formatTwoArgPattern(const SimpleFormatter& compiled, NumberStringBuilder& result, + int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength, + Field field, UErrorCode& status); private: UnicodeString fCompiledPattern; Field fField; - bool fStrong; - int32_t fPrefixLength; - int32_t fSuffixOffset; - int32_t fSuffixLength; + bool fStrong = false; + int32_t fPrefixLength = 0; + int32_t fSuffixOffset = -1; + int32_t fSuffixLength = 0; + Modifier::Parameters fParameters; }; /** @@ -103,6 +135,18 @@ class U_I18N_API SimpleModifier : public Modifier, public UMemory { */ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { public: + ConstantMultiFieldModifier( + const NumberStringBuilder &prefix, + const NumberStringBuilder &suffix, + bool overwrite, + bool strong, + const Modifier::Parameters parameters) + : fPrefix(prefix), + fSuffix(suffix), + fOverwrite(overwrite), + fStrong(strong), + fParameters(parameters) {} + ConstantMultiFieldModifier( const NumberStringBuilder &prefix, const NumberStringBuilder &suffix, @@ -116,12 +160,18 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const U_OVERRIDE; - int32_t getPrefixLength(UErrorCode &status) const U_OVERRIDE; + int32_t getPrefixLength() const U_OVERRIDE; - int32_t getCodePointCount(UErrorCode &status) const U_OVERRIDE; + int32_t getCodePointCount() const U_OVERRIDE; bool isStrong() const U_OVERRIDE; + bool containsField(UNumberFormatFields field) const U_OVERRIDE; + + void getParameters(Parameters& output) const U_OVERRIDE; + + bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE; + protected: // NOTE: In Java, these are stored as array pointers. In C++, the NumberStringBuilder is stored by // value and is treated internally as immutable. @@ -129,6 +179,7 @@ class U_I18N_API ConstantMultiFieldModifier : public Modifier, public UMemory { NumberStringBuilder fSuffix; bool fOverwrite; bool fStrong; + Modifier::Parameters fParameters; }; /** Identical to {@link ConstantMultiFieldModifier}, but supports currency spacing. */ @@ -192,13 +243,11 @@ class U_I18N_API EmptyModifier : public Modifier, public UMemory { return 0; } - int32_t getPrefixLength(UErrorCode &status) const U_OVERRIDE { - (void)status; + int32_t getPrefixLength() const U_OVERRIDE { return 0; } - int32_t getCodePointCount(UErrorCode &status) const U_OVERRIDE { - (void)status; + int32_t getCodePointCount() const U_OVERRIDE { return 0; } @@ -206,55 +255,75 @@ class U_I18N_API EmptyModifier : public Modifier, public UMemory { return fStrong; } + bool containsField(UNumberFormatFields field) const U_OVERRIDE { + (void)field; + return false; + } + + void getParameters(Parameters& output) const U_OVERRIDE { + output.obj = nullptr; + } + + bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE { + return other.getCodePointCount() == 0; + } + private: bool fStrong; }; /** - * A ParameterizedModifier by itself is NOT a Modifier. Rather, it wraps a data structure containing two or more - * Modifiers and returns the modifier appropriate for the current situation. + * This implementation of ModifierStore adopts Modifer pointers. */ -class U_I18N_API ParameterizedModifier : public UMemory { +class U_I18N_API AdoptingModifierStore : public ModifierStore, public UMemory { public: - // NOTE: mods is zero-initialized (to nullptr) - ParameterizedModifier() : mods() { - } + virtual ~AdoptingModifierStore(); - // No copying! - ParameterizedModifier(const ParameterizedModifier &other) = delete; + static constexpr StandardPlural::Form DEFAULT_STANDARD_PLURAL = StandardPlural::OTHER; - ~ParameterizedModifier() { - for (const Modifier *mod : mods) { - delete mod; - } - } + AdoptingModifierStore() = default; - void adoptPositiveNegativeModifiers( - const Modifier *positive, const Modifier *zero, const Modifier *negative) { - mods[2] = positive; - mods[1] = zero; - mods[0] = negative; - } + // No copying! + AdoptingModifierStore(const AdoptingModifierStore &other) = delete; - /** The modifier is ADOPTED. */ - void adoptSignPluralModifier(int8_t signum, StandardPlural::Form plural, const Modifier *mod) { + /** + * Sets the Modifier with the specified signum and plural form. + */ + void adoptModifier(int8_t signum, StandardPlural::Form plural, const Modifier *mod) { + U_ASSERT(mods[getModIndex(signum, plural)] == nullptr); mods[getModIndex(signum, plural)] = mod; } + /** + * Sets the Modifier with the specified signum. + * The modifier will apply to all plural forms. + */ + void adoptModifierWithoutPlural(int8_t signum, const Modifier *mod) { + U_ASSERT(mods[getModIndex(signum, DEFAULT_STANDARD_PLURAL)] == nullptr); + mods[getModIndex(signum, DEFAULT_STANDARD_PLURAL)] = mod; + } + /** Returns a reference to the modifier; no ownership change. */ - const Modifier *getModifier(int8_t signum) const { - return mods[signum + 1]; + const Modifier *getModifier(int8_t signum, StandardPlural::Form plural) const U_OVERRIDE { + const Modifier* modifier = mods[getModIndex(signum, plural)]; + if (modifier == nullptr && plural != DEFAULT_STANDARD_PLURAL) { + modifier = mods[getModIndex(signum, DEFAULT_STANDARD_PLURAL)]; + } + return modifier; } /** Returns a reference to the modifier; no ownership change. */ - const Modifier *getModifier(int8_t signum, StandardPlural::Form plural) const { - return mods[getModIndex(signum, plural)]; + const Modifier *getModifierWithoutPlural(int8_t signum) const { + return mods[getModIndex(signum, DEFAULT_STANDARD_PLURAL)]; } private: - const Modifier *mods[3 * StandardPlural::COUNT]; + // NOTE: mods is zero-initialized (to nullptr) + const Modifier *mods[3 * StandardPlural::COUNT] = {}; inline static int32_t getModIndex(int8_t signum, StandardPlural::Form plural) { + U_ASSERT(signum >= -1 && signum <= 1); + U_ASSERT(plural >= 0 && plural < StandardPlural::COUNT); return static_cast(plural) * 3 + (signum + 1); } }; diff --git a/deps/icu-small/source/i18n/number_multiplier.cpp b/deps/icu-small/source/i18n/number_multiplier.cpp index a27142c9bd689b..ecb50dd9b82019 100644 --- a/deps/icu-small/source/i18n/number_multiplier.cpp +++ b/deps/icu-small/source/i18n/number_multiplier.cpp @@ -143,14 +143,14 @@ void Scale::applyReciprocalTo(impl::DecimalQuantity& quantity) const { void MultiplierFormatHandler::setAndChain(const Scale& multiplier, const MicroPropsGenerator* parent) { - this->multiplier = multiplier; - this->parent = parent; + fMultiplier = multiplier; + fParent = parent; } void MultiplierFormatHandler::processQuantity(DecimalQuantity& quantity, MicroProps& micros, UErrorCode& status) const { - parent->processQuantity(quantity, micros, status); - multiplier.applyTo(quantity); + fParent->processQuantity(quantity, micros, status); + fMultiplier.applyTo(quantity); } #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/number_multiplier.h b/deps/icu-small/source/i18n/number_multiplier.h index 82c30c78426cdb..d8235dc601b559 100644 --- a/deps/icu-small/source/i18n/number_multiplier.h +++ b/deps/icu-small/source/i18n/number_multiplier.h @@ -28,8 +28,8 @@ class U_I18N_API MultiplierFormatHandler : public MicroPropsGenerator, public UM UErrorCode& status) const U_OVERRIDE; private: - Scale multiplier; - const MicroPropsGenerator *parent; + Scale fMultiplier; + const MicroPropsGenerator *fParent; }; diff --git a/deps/icu-small/source/i18n/number_padding.cpp b/deps/icu-small/source/i18n/number_padding.cpp index 97e7b6014f9aac..31684d7208b606 100644 --- a/deps/icu-small/source/i18n/number_padding.cpp +++ b/deps/icu-small/source/i18n/number_padding.cpp @@ -62,7 +62,7 @@ Padder Padder::forProperties(const DecimalFormatProperties& properties) { int32_t Padder::padAndApply(const Modifier &mod1, const Modifier &mod2, NumberStringBuilder &string, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const { - int32_t modLength = mod1.getCodePointCount(status) + mod2.getCodePointCount(status); + int32_t modLength = mod1.getCodePointCount() + mod2.getCodePointCount(); int32_t requiredPadding = fWidth - modLength - string.codePointCount(); U_ASSERT(leftIndex == 0 && rightIndex == string.length()); // fix the previous line to remove this assertion diff --git a/deps/icu-small/source/i18n/number_patternmodifier.cpp b/deps/icu-small/source/i18n/number_patternmodifier.cpp index 6417e14378bea6..4c61a0d35bca82 100644 --- a/deps/icu-small/source/i18n/number_patternmodifier.cpp +++ b/deps/icu-small/source/i18n/number_patternmodifier.cpp @@ -24,11 +24,11 @@ MutablePatternModifier::MutablePatternModifier(bool isStrong) : fStrong(isStrong) {} void MutablePatternModifier::setPatternInfo(const AffixPatternProvider* patternInfo) { - this->patternInfo = patternInfo; + fPatternInfo = patternInfo; } void MutablePatternModifier::setPatternAttributes(UNumberSignDisplay signDisplay, bool perMille) { - this->signDisplay = signDisplay; + fSignDisplay = signDisplay; this->perMilleReplacesPercent = perMille; } @@ -36,20 +36,20 @@ void MutablePatternModifier::setSymbols(const DecimalFormatSymbols* symbols, const CurrencySymbols* currencySymbols, const UNumberUnitWidth unitWidth, const PluralRules* rules) { U_ASSERT((rules != nullptr) == needsPlurals()); - this->symbols = symbols; - this->currencySymbols = currencySymbols; - this->unitWidth = unitWidth; - this->rules = rules; + fSymbols = symbols; + fCurrencySymbols = currencySymbols; + fUnitWidth = unitWidth; + fRules = rules; } void MutablePatternModifier::setNumberProperties(int8_t signum, StandardPlural::Form plural) { - this->signum = signum; - this->plural = plural; + fSignum = signum; + fPlural = plural; } bool MutablePatternModifier::needsPlurals() const { UErrorCode statusLocal = U_ZERO_ERROR; - return patternInfo->containsSymbolType(AffixPatternType::TYPE_CURRENCY_TRIPLE, statusLocal); + return fPatternInfo->containsSymbolType(AffixPatternType::TYPE_CURRENCY_TRIPLE, statusLocal); // Silently ignore any error codes. } @@ -69,7 +69,7 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator* paren StandardPlural::Form::MANY, StandardPlural::Form::OTHER}; - auto pm = new ParameterizedModifier(); + auto pm = new AdoptingModifierStore(); if (pm == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return nullptr; @@ -79,26 +79,25 @@ MutablePatternModifier::createImmutableAndChain(const MicroPropsGenerator* paren // Slower path when we require the plural keyword. for (StandardPlural::Form plural : STANDARD_PLURAL_VALUES) { setNumberProperties(1, plural); - pm->adoptSignPluralModifier(1, plural, createConstantModifier(status)); + pm->adoptModifier(1, plural, createConstantModifier(status)); setNumberProperties(0, plural); - pm->adoptSignPluralModifier(0, plural, createConstantModifier(status)); + pm->adoptModifier(0, plural, createConstantModifier(status)); setNumberProperties(-1, plural); - pm->adoptSignPluralModifier(-1, plural, createConstantModifier(status)); + pm->adoptModifier(-1, plural, createConstantModifier(status)); } if (U_FAILURE(status)) { delete pm; return nullptr; } - return new ImmutablePatternModifier(pm, rules, parent); // adopts pm + return new ImmutablePatternModifier(pm, fRules, parent); // adopts pm } else { // Faster path when plural keyword is not needed. setNumberProperties(1, StandardPlural::Form::COUNT); - Modifier* positive = createConstantModifier(status); + pm->adoptModifierWithoutPlural(1, createConstantModifier(status)); setNumberProperties(0, StandardPlural::Form::COUNT); - Modifier* zero = createConstantModifier(status); + pm->adoptModifierWithoutPlural(0, createConstantModifier(status)); setNumberProperties(-1, StandardPlural::Form::COUNT); - Modifier* negative = createConstantModifier(status); - pm->adoptPositiveNegativeModifiers(positive, zero, negative); + pm->adoptModifierWithoutPlural(-1, createConstantModifier(status)); if (U_FAILURE(status)) { delete pm; return nullptr; @@ -112,15 +111,15 @@ ConstantMultiFieldModifier* MutablePatternModifier::createConstantModifier(UErro NumberStringBuilder b; insertPrefix(a, 0, status); insertSuffix(b, 0, status); - if (patternInfo->hasCurrencySign()) { + if (fPatternInfo->hasCurrencySign()) { return new CurrencySpacingEnabledModifier( - a, b, !patternInfo->hasBody(), fStrong, *symbols, status); + a, b, !fPatternInfo->hasBody(), fStrong, *fSymbols, status); } else { - return new ConstantMultiFieldModifier(a, b, !patternInfo->hasBody(), fStrong); + return new ConstantMultiFieldModifier(a, b, !fPatternInfo->hasBody(), fStrong); } } -ImmutablePatternModifier::ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules, +ImmutablePatternModifier::ImmutablePatternModifier(AdoptingModifierStore* pm, const PluralRules* rules, const MicroPropsGenerator* parent) : pm(pm), rules(rules), parent(parent) {} @@ -132,7 +131,7 @@ void ImmutablePatternModifier::processQuantity(DecimalQuantity& quantity, MicroP void ImmutablePatternModifier::applyToMicros(MicroProps& micros, DecimalQuantity& quantity) const { if (rules == nullptr) { - micros.modMiddle = pm->getModifier(quantity.signum()); + micros.modMiddle = pm->getModifierWithoutPlural(quantity.signum()); } else { // TODO: Fix this. Avoid the copy. DecimalQuantity copy(quantity); @@ -144,7 +143,7 @@ void ImmutablePatternModifier::applyToMicros(MicroProps& micros, DecimalQuantity const Modifier* ImmutablePatternModifier::getModifier(int8_t signum, StandardPlural::Form plural) const { if (rules == nullptr) { - return pm->getModifier(signum); + return pm->getModifierWithoutPlural(signum); } else { return pm->getModifier(signum, plural); } @@ -153,13 +152,13 @@ const Modifier* ImmutablePatternModifier::getModifier(int8_t signum, StandardPlu /** Used by the unsafe code path. */ MicroPropsGenerator& MutablePatternModifier::addToChain(const MicroPropsGenerator* parent) { - this->parent = parent; + fParent = parent; return *this; } void MutablePatternModifier::processQuantity(DecimalQuantity& fq, MicroProps& micros, UErrorCode& status) const { - parent->processQuantity(fq, micros, status); + fParent->processQuantity(fq, micros, status); // The unsafe code path performs self-mutation, so we need a const_cast. // This method needs to be const because it overrides a const method in the parent class. auto nonConstThis = const_cast(this); @@ -167,7 +166,7 @@ void MutablePatternModifier::processQuantity(DecimalQuantity& fq, MicroProps& mi // TODO: Fix this. Avoid the copy. DecimalQuantity copy(fq); micros.rounder.apply(copy, status); - nonConstThis->setNumberProperties(fq.signum(), utils::getStandardPlural(rules, copy)); + nonConstThis->setNumberProperties(fq.signum(), utils::getStandardPlural(fRules, copy)); } else { nonConstThis->setNumberProperties(fq.signum(), StandardPlural::Form::COUNT); } @@ -183,7 +182,7 @@ int32_t MutablePatternModifier::apply(NumberStringBuilder& output, int32_t leftI int32_t suffixLen = nonConstThis->insertSuffix(output, rightIndex + prefixLen, status); // If the pattern had no decimal stem body (like #,##0.00), overwrite the value. int32_t overwriteLen = 0; - if (!patternInfo->hasBody()) { + if (!fPatternInfo->hasBody()) { overwriteLen = output.splice( leftIndex + prefixLen, rightIndex + prefixLen, @@ -199,28 +198,30 @@ int32_t MutablePatternModifier::apply(NumberStringBuilder& output, int32_t leftI prefixLen, rightIndex + overwriteLen + prefixLen, suffixLen, - *symbols, + *fSymbols, status); return prefixLen + overwriteLen + suffixLen; } -int32_t MutablePatternModifier::getPrefixLength(UErrorCode& status) const { +int32_t MutablePatternModifier::getPrefixLength() const { // The unsafe code path performs self-mutation, so we need a const_cast. // This method needs to be const because it overrides a const method in the parent class. auto nonConstThis = const_cast(this); // Enter and exit CharSequence Mode to get the length. + UErrorCode status = U_ZERO_ERROR; // status fails only with an iilegal argument exception nonConstThis->prepareAffix(true); int result = AffixUtils::unescapedCodePointCount(currentAffix, *this, status); // prefix length return result; } -int32_t MutablePatternModifier::getCodePointCount(UErrorCode& status) const { +int32_t MutablePatternModifier::getCodePointCount() const { // The unsafe code path performs self-mutation, so we need a const_cast. // This method needs to be const because it overrides a const method in the parent class. auto nonConstThis = const_cast(this); // Render the affixes to get the length + UErrorCode status = U_ZERO_ERROR; // status fails only with an iilegal argument exception nonConstThis->prepareAffix(true); int result = AffixUtils::unescapedCodePointCount(currentAffix, *this, status); // prefix length nonConstThis->prepareAffix(false); @@ -232,6 +233,26 @@ bool MutablePatternModifier::isStrong() const { return fStrong; } +bool MutablePatternModifier::containsField(UNumberFormatFields field) const { + (void)field; + // This method is not currently used. + U_ASSERT(false); + return false; +} + +void MutablePatternModifier::getParameters(Parameters& output) const { + (void)output; + // This method is not currently used. + U_ASSERT(false); +} + +bool MutablePatternModifier::semanticallyEquivalent(const Modifier& other) const { + (void)other; + // This method is not currently used. + U_ASSERT(false); + return false; +} + int32_t MutablePatternModifier::insertPrefix(NumberStringBuilder& sb, int position, UErrorCode& status) { prepareAffix(true); int length = AffixUtils::unescape(currentAffix, sb, position, *this, status); @@ -247,40 +268,40 @@ int32_t MutablePatternModifier::insertSuffix(NumberStringBuilder& sb, int positi /** This method contains the heart of the logic for rendering LDML affix strings. */ void MutablePatternModifier::prepareAffix(bool isPrefix) { PatternStringUtils::patternInfoToStringBuilder( - *patternInfo, isPrefix, signum, signDisplay, plural, perMilleReplacesPercent, currentAffix); + *fPatternInfo, isPrefix, fSignum, fSignDisplay, fPlural, perMilleReplacesPercent, currentAffix); } UnicodeString MutablePatternModifier::getSymbol(AffixPatternType type) const { UErrorCode localStatus = U_ZERO_ERROR; switch (type) { case AffixPatternType::TYPE_MINUS_SIGN: - return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kMinusSignSymbol); + return fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kMinusSignSymbol); case AffixPatternType::TYPE_PLUS_SIGN: - return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol); + return fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPlusSignSymbol); case AffixPatternType::TYPE_PERCENT: - return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPercentSymbol); + return fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPercentSymbol); case AffixPatternType::TYPE_PERMILLE: - return symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPerMillSymbol); + return fSymbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kPerMillSymbol); case AffixPatternType::TYPE_CURRENCY_SINGLE: { // UnitWidth ISO and HIDDEN overrides the singular currency symbol. - if (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE) { - return currencySymbols->getIntlCurrencySymbol(localStatus); - } else if (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_HIDDEN) { + if (fUnitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_ISO_CODE) { + return fCurrencySymbols->getIntlCurrencySymbol(localStatus); + } else if (fUnitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_HIDDEN) { return UnicodeString(); - } else if (unitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW) { - return currencySymbols->getNarrowCurrencySymbol(localStatus); + } else if (fUnitWidth == UNumberUnitWidth::UNUM_UNIT_WIDTH_NARROW) { + return fCurrencySymbols->getNarrowCurrencySymbol(localStatus); } else { - return currencySymbols->getCurrencySymbol(localStatus); + return fCurrencySymbols->getCurrencySymbol(localStatus); } } case AffixPatternType::TYPE_CURRENCY_DOUBLE: - return currencySymbols->getIntlCurrencySymbol(localStatus); + return fCurrencySymbols->getIntlCurrencySymbol(localStatus); case AffixPatternType::TYPE_CURRENCY_TRIPLE: // NOTE: This is the code path only for patterns containing "¤¤¤". // Plural currencies set via the API are formatted in LongNameHandler. // This code path is used by DecimalFormat via CurrencyPluralInfo. - U_ASSERT(plural != StandardPlural::Form::COUNT); - return currencySymbols->getPluralName(plural, localStatus); + U_ASSERT(fPlural != StandardPlural::Form::COUNT); + return fCurrencySymbols->getPluralName(fPlural, localStatus); case AffixPatternType::TYPE_CURRENCY_QUAD: return UnicodeString(u"\uFFFD"); case AffixPatternType::TYPE_CURRENCY_QUINT: diff --git a/deps/icu-small/source/i18n/number_patternmodifier.h b/deps/icu-small/source/i18n/number_patternmodifier.h index f1359bd5747d89..ea80d6305e75b4 100644 --- a/deps/icu-small/source/i18n/number_patternmodifier.h +++ b/deps/icu-small/source/i18n/number_patternmodifier.h @@ -18,13 +18,13 @@ U_NAMESPACE_BEGIN // Export an explicit template instantiation of the LocalPointer that is used as a -// data member of ParameterizedModifier. +// data member of AdoptingModifierStore. // (When building DLLs for Windows this is required.) #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= #pragma warning(suppress: 4661) -template class U_I18N_API LocalPointerBase; -template class U_I18N_API LocalPointer; +template class U_I18N_API LocalPointerBase; +template class U_I18N_API LocalPointer; #endif namespace number { @@ -45,10 +45,10 @@ class U_I18N_API ImmutablePatternModifier : public MicroPropsGenerator, public U const Modifier* getModifier(int8_t signum, StandardPlural::Form plural) const; private: - ImmutablePatternModifier(ParameterizedModifier* pm, const PluralRules* rules, + ImmutablePatternModifier(AdoptingModifierStore* pm, const PluralRules* rules, const MicroPropsGenerator* parent); - const LocalPointer pm; + const LocalPointer pm; const PluralRules* rules; const MicroPropsGenerator* parent; @@ -178,12 +178,18 @@ class U_I18N_API MutablePatternModifier int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const U_OVERRIDE; - int32_t getPrefixLength(UErrorCode &status) const U_OVERRIDE; + int32_t getPrefixLength() const U_OVERRIDE; - int32_t getCodePointCount(UErrorCode &status) const U_OVERRIDE; + int32_t getCodePointCount() const U_OVERRIDE; bool isStrong() const U_OVERRIDE; + bool containsField(UNumberFormatFields field) const U_OVERRIDE; + + void getParameters(Parameters& output) const U_OVERRIDE; + + bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE; + /** * Returns the string that substitutes a given symbol type in a pattern. */ @@ -196,22 +202,22 @@ class U_I18N_API MutablePatternModifier const bool fStrong; // Pattern details (initialized in setPatternInfo and setPatternAttributes) - const AffixPatternProvider *patternInfo; - UNumberSignDisplay signDisplay; + const AffixPatternProvider *fPatternInfo; + UNumberSignDisplay fSignDisplay; bool perMilleReplacesPercent; // Symbol details (initialized in setSymbols) - const DecimalFormatSymbols *symbols; - UNumberUnitWidth unitWidth; - const CurrencySymbols *currencySymbols; - const PluralRules *rules; + const DecimalFormatSymbols *fSymbols; + UNumberUnitWidth fUnitWidth; + const CurrencySymbols *fCurrencySymbols; + const PluralRules *fRules; // Number details (initialized in setNumberProperties) - int8_t signum; - StandardPlural::Form plural; + int8_t fSignum; + StandardPlural::Form fPlural; // QuantityChain details (initialized in addToChain) - const MicroPropsGenerator *parent; + const MicroPropsGenerator *fParent; // Transient fields for rendering UnicodeString currentAffix; diff --git a/deps/icu-small/source/i18n/number_scientific.cpp b/deps/icu-small/source/i18n/number_scientific.cpp index 40952024e995cc..07c1ce9dac2c89 100644 --- a/deps/icu-small/source/i18n/number_scientific.cpp +++ b/deps/icu-small/source/i18n/number_scientific.cpp @@ -76,17 +76,16 @@ int32_t ScientificModifier::apply(NumberStringBuilder &output, int32_t /*leftInd return i - rightIndex; } -int32_t ScientificModifier::getPrefixLength(UErrorCode &status) const { - (void)status; +int32_t ScientificModifier::getPrefixLength() const { // TODO: Localized exponent separator location. return 0; } -int32_t ScientificModifier::getCodePointCount(UErrorCode &status) const { - (void)status; - // This method is not used for strong modifiers. - U_ASSERT(false); - return 0; +int32_t ScientificModifier::getCodePointCount() const { + // NOTE: This method is only called one place, NumberRangeFormatterImpl. + // The call site only cares about != 0 and != 1. + // Return a very large value so that if this method is used elsewhere, we should notice. + return 999; } bool ScientificModifier::isStrong() const { @@ -94,6 +93,27 @@ bool ScientificModifier::isStrong() const { return true; } +bool ScientificModifier::containsField(UNumberFormatFields field) const { + (void)field; + // This method is not used for inner modifiers. + U_ASSERT(false); + return false; +} + +void ScientificModifier::getParameters(Parameters& output) const { + // Not part of any plural sets + output.obj = nullptr; +} + +bool ScientificModifier::semanticallyEquivalent(const Modifier& other) const { + auto* _other = dynamic_cast(&other); + if (_other == nullptr) { + return false; + } + // TODO: Check for locale symbols and settings as well? Could be less efficient. + return fExponent == _other->fExponent; +} + // Note: Visual Studio does not compile this function without full name space. Why? icu::number::impl::ScientificHandler::ScientificHandler(const Notation *notation, const DecimalFormatSymbols *symbols, const MicroPropsGenerator *parent) : diff --git a/deps/icu-small/source/i18n/number_scientific.h b/deps/icu-small/source/i18n/number_scientific.h index 974ab3adb614ca..e377bd941efaeb 100644 --- a/deps/icu-small/source/i18n/number_scientific.h +++ b/deps/icu-small/source/i18n/number_scientific.h @@ -24,12 +24,18 @@ class U_I18N_API ScientificModifier : public UMemory, public Modifier { int32_t apply(NumberStringBuilder &output, int32_t leftIndex, int32_t rightIndex, UErrorCode &status) const U_OVERRIDE; - int32_t getPrefixLength(UErrorCode &status) const U_OVERRIDE; + int32_t getPrefixLength() const U_OVERRIDE; - int32_t getCodePointCount(UErrorCode &status) const U_OVERRIDE; + int32_t getCodePointCount() const U_OVERRIDE; bool isStrong() const U_OVERRIDE; + bool containsField(UNumberFormatFields field) const U_OVERRIDE; + + void getParameters(Parameters& output) const U_OVERRIDE; + + bool semanticallyEquivalent(const Modifier& other) const U_OVERRIDE; + private: int32_t fExponent; const ScientificHandler *fHandler; diff --git a/deps/icu-small/source/i18n/number_stringbuilder.cpp b/deps/icu-small/source/i18n/number_stringbuilder.cpp index 37770d11d51dfb..74ba33fbbc159f 100644 --- a/deps/icu-small/source/i18n/number_stringbuilder.cpp +++ b/deps/icu-small/source/i18n/number_stringbuilder.cpp @@ -241,6 +241,9 @@ NumberStringBuilder::insert(int32_t index, const NumberStringBuilder &other, UEr } int32_t NumberStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) { + U_ASSERT(index >= 0); + U_ASSERT(index <= fLength); + U_ASSERT(count >= 0); if (index == 0 && fZero - count >= 0) { // Append to start fZero -= count; @@ -485,4 +488,13 @@ void NumberStringBuilder::getAllFieldPositions(FieldPositionIteratorHandler& fpi } } +bool NumberStringBuilder::containsField(Field field) const { + for (int32_t i = 0; i < fLength; i++) { + if (field == fieldAt(i)) { + return true; + } + } + return false; +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/number_stringbuilder.h b/deps/icu-small/source/i18n/number_stringbuilder.h index cd8ce2f805e994..b14ad9ede2f90f 100644 --- a/deps/icu-small/source/i18n/number_stringbuilder.h +++ b/deps/icu-small/source/i18n/number_stringbuilder.h @@ -106,6 +106,8 @@ class U_I18N_API NumberStringBuilder : public UMemory { void getAllFieldPositions(FieldPositionIteratorHandler& fpih, UErrorCode& status) const; + bool containsField(Field field) const; + private: bool fUsingHeap = false; ValueOrHeapArray fChars; diff --git a/deps/icu-small/source/i18n/number_types.h b/deps/icu-small/source/i18n/number_types.h index 57da72f8aa0ac1..00a6818869fdc2 100644 --- a/deps/icu-small/source/i18n/number_types.h +++ b/deps/icu-small/source/i18n/number_types.h @@ -16,6 +16,7 @@ #include "uassert.h" #include "unicode/platform.h" #include "unicode/uniset.h" +#include "standardplural.h" U_NAMESPACE_BEGIN namespace number { namespace impl { @@ -45,6 +46,7 @@ class Modifier; class MutablePatternModifier; class DecimalQuantity; class NumberStringBuilder; +class ModifierStore; struct MicroProps; @@ -127,12 +129,13 @@ class U_I18N_API AffixPatternProvider { virtual bool hasBody() const = 0; }; + /** * A Modifier is an object that can be passed through the formatting pipeline until it is finally applied to the string * builder. A Modifier usually contains a prefix and a suffix that are applied, but it could contain something else, * like a {@link com.ibm.icu.text.SimpleFormatter} pattern. * - * A Modifier is usually immutable, except in cases such as {@link MurkyModifier}, which are mutable for performance + * A Modifier is usually immutable, except in cases such as {@link MutablePatternModifier}, which are mutable for performance * reasons. * * Exported as U_I18N_API because it is a base class for other exported types @@ -162,12 +165,12 @@ class U_I18N_API Modifier { * * @return The number of characters (UTF-16 code units) in the prefix. */ - virtual int32_t getPrefixLength(UErrorCode& status) const = 0; + virtual int32_t getPrefixLength() const = 0; /** * Returns the number of code points in the modifier, prefix plus suffix. */ - virtual int32_t getCodePointCount(UErrorCode& status) const = 0; + virtual int32_t getCodePointCount() const = 0; /** * Whether this modifier is strong. If a modifier is strong, it should always be applied immediately and not allowed @@ -177,8 +180,57 @@ class U_I18N_API Modifier { * @return Whether the modifier is strong. */ virtual bool isStrong() const = 0; + + /** + * Whether the modifier contains at least one occurrence of the given field. + */ + virtual bool containsField(UNumberFormatFields field) const = 0; + + /** + * A fill-in for getParameters(). obj will always be set; if non-null, the other + * two fields are also safe to read. + */ + struct U_I18N_API Parameters { + const ModifierStore* obj = nullptr; + int8_t signum; + StandardPlural::Form plural; + + Parameters(); + Parameters(const ModifierStore* _obj, int8_t _signum, StandardPlural::Form _plural); + }; + + /** + * Gets a set of "parameters" for this Modifier. + * + * TODO: Make this return a `const Parameters*` more like Java? + */ + virtual void getParameters(Parameters& output) const = 0; + + /** + * Returns whether this Modifier is *semantically equivalent* to the other Modifier; + * in many cases, this is the same as equal, but parameters should be ignored. + */ + virtual bool semanticallyEquivalent(const Modifier& other) const = 0; }; + +/** + * This is *not* a modifier; rather, it is an object that can return modifiers + * based on given parameters. + * + * Exported as U_I18N_API because it is a base class for other exported types. + */ +class U_I18N_API ModifierStore { + public: + virtual ~ModifierStore(); + + /** + * Returns a Modifier with the given parameters (best-effort). + */ + virtual const Modifier* getModifier(int8_t signum, StandardPlural::Form plural) const = 0; +}; + + /** * This interface is used when all number formatting settings, including the locale, are known, except for the quantity * itself. The {@link #processQuantity} method performs the final step in the number processing pipeline: it uses the diff --git a/deps/icu-small/source/i18n/numfmt.cpp b/deps/icu-small/source/i18n/numfmt.cpp index 13f23131b1851b..ef47e1e01b5ee6 100644 --- a/deps/icu-small/source/i18n/numfmt.cpp +++ b/deps/icu-small/source/i18n/numfmt.cpp @@ -1326,13 +1326,13 @@ NumberFormat::makeInstance(const Locale& desiredLocale, // if the locale has "@compat=host", create a host-specific NumberFormat if (U_SUCCESS(status) && count > 0 && uprv_strcmp(buffer, "host") == 0) { - Win32NumberFormat *f = NULL; UBool curr = TRUE; switch (style) { case UNUM_DECIMAL: curr = FALSE; // fall-through + U_FALLTHROUGH; case UNUM_CURRENCY: case UNUM_CURRENCY_ISO: // do not support plural formatting here @@ -1340,14 +1340,13 @@ NumberFormat::makeInstance(const Locale& desiredLocale, case UNUM_CURRENCY_ACCOUNTING: case UNUM_CASH_CURRENCY: case UNUM_CURRENCY_STANDARD: - f = new Win32NumberFormat(desiredLocale, curr, status); - + { + LocalPointer f(new Win32NumberFormat(desiredLocale, curr, status), status); if (U_SUCCESS(status)) { - return f; + return f.orphan(); } - - delete f; - break; + } + break; default: break; } @@ -1417,8 +1416,7 @@ NumberFormat::makeInstance(const Locale& desiredLocale, } } - - NumberFormat *f; + LocalPointer f; if (ns->isAlgorithmic()) { UnicodeString nsDesc; UnicodeString nsRuleSetGroup; @@ -1453,7 +1451,7 @@ NumberFormat::makeInstance(const Locale& desiredLocale, return NULL; } r->setDefaultRuleSet(nsRuleSetName,status); - f = r; + f.adoptInstead(r); } else { // replace single currency sign in the pattern with double currency sign // if the style is UNUM_CURRENCY_ISO @@ -1462,9 +1460,22 @@ NumberFormat::makeInstance(const Locale& desiredLocale, UnicodeString(TRUE, gDoubleCurrencySign, 2)); } - // "new DecimalFormat()" does not adopt the symbols if its memory allocation fails. - DecimalFormatSymbols *syms = symbolsToAdopt.orphan(); - DecimalFormat* df = new DecimalFormat(pattern, syms, style, status); + // "new DecimalFormat()" does not adopt the symbols argument if its memory allocation fails. + // So we can't use adoptInsteadAndCheckErrorCode as we need to know if the 'new' failed. + DecimalFormatSymbols *syms = symbolsToAdopt.getAlias(); + LocalPointer df(new DecimalFormat(pattern, syms, style, status)); + + if (df.isValid()) { + // if the DecimalFormat object was successfully new'ed, then it will own symbolsToAdopt, even if the status is a failure. + symbolsToAdopt.orphan(); + } + else { + status = U_MEMORY_ALLOCATION_ERROR; + } + + if (U_FAILURE(status)) { + return nullptr; + } // if it is cash currency style, setCurrencyUsage with usage if (style == UNUM_CASH_CURRENCY){ @@ -1472,25 +1483,18 @@ NumberFormat::makeInstance(const Locale& desiredLocale, } if (U_FAILURE(status)) { - delete df; - return NULL; + return nullptr; } - f = df; - if (f == NULL) { - delete syms; - status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } + f.adoptInstead(df.orphan()); } f->setLocaleIDs(ures_getLocaleByType(ownedResource.getAlias(), ULOC_VALID_LOCALE, &status), ures_getLocaleByType(ownedResource.getAlias(), ULOC_ACTUAL_LOCALE, &status)); if (U_FAILURE(status)) { - delete f; return NULL; } - return f; + return f.orphan(); } /** diff --git a/deps/icu-small/source/i18n/numparse_currency.cpp b/deps/icu-small/source/i18n/numparse_currency.cpp index ae8196ec483799..598ace56533c36 100644 --- a/deps/icu-small/source/i18n/numparse_currency.cpp +++ b/deps/icu-small/source/i18n/numparse_currency.cpp @@ -111,7 +111,9 @@ bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber int32_t overlap2; if (!fCurrency2.isEmpty()) { - overlap2 = segment.getCaseSensitivePrefixLength(fCurrency2); + // ISO codes should be accepted case-insensitive. + // https://unicode-org.atlassian.net/browse/ICU-13696 + overlap2 = segment.getCommonPrefixLength(fCurrency2); } else { overlap2 = -1; } diff --git a/deps/icu-small/source/i18n/numparse_impl.cpp b/deps/icu-small/source/i18n/numparse_impl.cpp index 5fa52f63351c31..3192a3959389a7 100644 --- a/deps/icu-small/source/i18n/numparse_impl.cpp +++ b/deps/icu-small/source/i18n/numparse_impl.cpp @@ -159,10 +159,10 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr // ICU-TC meeting, April 11, 2018: accept percent/permille only if it is in the pattern, // and to maintain regressive behavior, divide by 100 even if no percent sign is present. - if (affixProvider->containsSymbolType(AffixPatternType::TYPE_PERCENT, status)) { + if (!isStrict && affixProvider->containsSymbolType(AffixPatternType::TYPE_PERCENT, status)) { parser->addMatcher(parser->fLocalMatchers.percent = {symbols}); } - if (affixProvider->containsSymbolType(AffixPatternType::TYPE_PERMILLE, status)) { + if (!isStrict && affixProvider->containsSymbolType(AffixPatternType::TYPE_PERMILLE, status)) { parser->addMatcher(parser->fLocalMatchers.permille = {symbols}); } diff --git a/deps/icu-small/source/i18n/numparse_scientific.cpp b/deps/icu-small/source/i18n/numparse_scientific.cpp index 611695e57d4743..de38957440817c 100644 --- a/deps/icu-small/source/i18n/numparse_scientific.cpp +++ b/deps/icu-small/source/i18n/numparse_scientific.cpp @@ -56,6 +56,11 @@ bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErr return false; } + // Only accept one exponent per string. + if (0 != (result.flags & FLAG_HAS_EXPONENT)) { + return false; + } + // First match the scientific separator, and then match another number after it. // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again. int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString); diff --git a/deps/icu-small/source/i18n/numrange_fluent.cpp b/deps/icu-small/source/i18n/numrange_fluent.cpp new file mode 100644 index 00000000000000..12b006c8ad5540 --- /dev/null +++ b/deps/icu-small/source/i18n/numrange_fluent.cpp @@ -0,0 +1,472 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "numrange_impl.h" +#include "util.h" +#include "number_utypes.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; + + +// This function needs to be declared in this namespace so it can be friended. +// NOTE: In Java, this logic is handled in the resolve() function. +void icu::number::impl::touchRangeLocales(RangeMacroProps& macros) { + macros.formatter1.fMacros.locale = macros.locale; + macros.formatter2.fMacros.locale = macros.locale; +} + + +template +Derived NumberRangeFormatterSettings::numberFormatterBoth(const UnlocalizedNumberFormatter& formatter) const& { + Derived copy(*this); + copy.fMacros.formatter1 = formatter; + copy.fMacros.singleFormatter = true; + touchRangeLocales(copy.fMacros); + return copy; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterBoth(const UnlocalizedNumberFormatter& formatter) && { + Derived move(std::move(*this)); + move.fMacros.formatter1 = formatter; + move.fMacros.singleFormatter = true; + touchRangeLocales(move.fMacros); + return move; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterBoth(UnlocalizedNumberFormatter&& formatter) const& { + Derived copy(*this); + copy.fMacros.formatter1 = std::move(formatter); + copy.fMacros.singleFormatter = true; + touchRangeLocales(copy.fMacros); + return copy; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterBoth(UnlocalizedNumberFormatter&& formatter) && { + Derived move(std::move(*this)); + move.fMacros.formatter1 = std::move(formatter); + move.fMacros.singleFormatter = true; + touchRangeLocales(move.fMacros); + return move; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterFirst(const UnlocalizedNumberFormatter& formatter) const& { + Derived copy(*this); + copy.fMacros.formatter1 = formatter; + copy.fMacros.singleFormatter = false; + touchRangeLocales(copy.fMacros); + return copy; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterFirst(const UnlocalizedNumberFormatter& formatter) && { + Derived move(std::move(*this)); + move.fMacros.formatter1 = formatter; + move.fMacros.singleFormatter = false; + touchRangeLocales(move.fMacros); + return move; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterFirst(UnlocalizedNumberFormatter&& formatter) const& { + Derived copy(*this); + copy.fMacros.formatter1 = std::move(formatter); + copy.fMacros.singleFormatter = false; + touchRangeLocales(copy.fMacros); + return copy; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterFirst(UnlocalizedNumberFormatter&& formatter) && { + Derived move(std::move(*this)); + move.fMacros.formatter1 = std::move(formatter); + move.fMacros.singleFormatter = false; + touchRangeLocales(move.fMacros); + return move; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterSecond(const UnlocalizedNumberFormatter& formatter) const& { + Derived copy(*this); + copy.fMacros.formatter2 = formatter; + copy.fMacros.singleFormatter = false; + touchRangeLocales(copy.fMacros); + return copy; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterSecond(const UnlocalizedNumberFormatter& formatter) && { + Derived move(std::move(*this)); + move.fMacros.formatter2 = formatter; + move.fMacros.singleFormatter = false; + touchRangeLocales(move.fMacros); + return move; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterSecond(UnlocalizedNumberFormatter&& formatter) const& { + Derived copy(*this); + copy.fMacros.formatter2 = std::move(formatter); + copy.fMacros.singleFormatter = false; + touchRangeLocales(copy.fMacros); + return copy; +} + +template +Derived NumberRangeFormatterSettings::numberFormatterSecond(UnlocalizedNumberFormatter&& formatter) && { + Derived move(std::move(*this)); + move.fMacros.formatter2 = std::move(formatter); + move.fMacros.singleFormatter = false; + touchRangeLocales(move.fMacros); + return move; +} + +template +Derived NumberRangeFormatterSettings::collapse(UNumberRangeCollapse collapse) const& { + Derived copy(*this); + copy.fMacros.collapse = collapse; + return copy; +} + +template +Derived NumberRangeFormatterSettings::collapse(UNumberRangeCollapse collapse) && { + Derived move(std::move(*this)); + move.fMacros.collapse = collapse; + return move; +} + +template +Derived NumberRangeFormatterSettings::identityFallback(UNumberRangeIdentityFallback identityFallback) const& { + Derived copy(*this); + copy.fMacros.identityFallback = identityFallback; + return copy; +} + +template +Derived NumberRangeFormatterSettings::identityFallback(UNumberRangeIdentityFallback identityFallback) && { + Derived move(std::move(*this)); + move.fMacros.identityFallback = identityFallback; + return move; +} + +// Declare all classes that implement NumberRangeFormatterSettings +// See https://stackoverflow.com/a/495056/1407170 +template +class icu::number::NumberRangeFormatterSettings; +template +class icu::number::NumberRangeFormatterSettings; + + +UnlocalizedNumberRangeFormatter NumberRangeFormatter::with() { + UnlocalizedNumberRangeFormatter result; + return result; +} + +LocalizedNumberRangeFormatter NumberRangeFormatter::withLocale(const Locale& locale) { + return with().locale(locale); +} + + +template using NFS = NumberRangeFormatterSettings; +using LNF = LocalizedNumberRangeFormatter; +using UNF = UnlocalizedNumberRangeFormatter; + +UnlocalizedNumberRangeFormatter::UnlocalizedNumberRangeFormatter(const UNF& other) + : UNF(static_cast&>(other)) {} + +UnlocalizedNumberRangeFormatter::UnlocalizedNumberRangeFormatter(const NFS& other) + : NFS(other) { + // No additional fields to assign +} + +// Make default copy constructor call the NumberRangeFormatterSettings copy constructor. +UnlocalizedNumberRangeFormatter::UnlocalizedNumberRangeFormatter(UNF&& src) U_NOEXCEPT + : UNF(static_cast&&>(src)) {} + +UnlocalizedNumberRangeFormatter::UnlocalizedNumberRangeFormatter(NFS&& src) U_NOEXCEPT + : NFS(std::move(src)) { + // No additional fields to assign +} + +UnlocalizedNumberRangeFormatter& UnlocalizedNumberRangeFormatter::operator=(const UNF& other) { + NFS::operator=(static_cast&>(other)); + // No additional fields to assign + return *this; +} + +UnlocalizedNumberRangeFormatter& UnlocalizedNumberRangeFormatter::operator=(UNF&& src) U_NOEXCEPT { + NFS::operator=(static_cast&&>(src)); + // No additional fields to assign + return *this; +} + +// Make default copy constructor call the NumberRangeFormatterSettings copy constructor. +LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(const LNF& other) + : LNF(static_cast&>(other)) {} + +LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(const NFS& other) + : NFS(other) { + // No additional fields to assign +} + +LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(LocalizedNumberRangeFormatter&& src) U_NOEXCEPT + : LNF(static_cast&&>(src)) {} + +LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(NFS&& src) U_NOEXCEPT + : NFS(std::move(src)) { + // Steal the compiled formatter + LNF&& _src = static_cast(src); + auto* stolen = _src.fAtomicFormatter.exchange(nullptr); + delete fAtomicFormatter.exchange(stolen); +} + +LocalizedNumberRangeFormatter& LocalizedNumberRangeFormatter::operator=(const LNF& other) { + NFS::operator=(static_cast&>(other)); + // Do not steal; just clear + delete fAtomicFormatter.exchange(nullptr); + return *this; +} + +LocalizedNumberRangeFormatter& LocalizedNumberRangeFormatter::operator=(LNF&& src) U_NOEXCEPT { + NFS::operator=(static_cast&&>(src)); + // Steal the compiled formatter + auto* stolen = src.fAtomicFormatter.exchange(nullptr); + delete fAtomicFormatter.exchange(stolen); + return *this; +} + + +LocalizedNumberRangeFormatter::~LocalizedNumberRangeFormatter() { + delete fAtomicFormatter.exchange(nullptr); +} + +LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(const RangeMacroProps& macros, const Locale& locale) { + fMacros = macros; + fMacros.locale = locale; + touchRangeLocales(fMacros); +} + +LocalizedNumberRangeFormatter::LocalizedNumberRangeFormatter(RangeMacroProps&& macros, const Locale& locale) { + fMacros = std::move(macros); + fMacros.locale = locale; + touchRangeLocales(fMacros); +} + +LocalizedNumberRangeFormatter UnlocalizedNumberRangeFormatter::locale(const Locale& locale) const& { + return LocalizedNumberRangeFormatter(fMacros, locale); +} + +LocalizedNumberRangeFormatter UnlocalizedNumberRangeFormatter::locale(const Locale& locale)&& { + return LocalizedNumberRangeFormatter(std::move(fMacros), locale); +} + + +FormattedNumberRange LocalizedNumberRangeFormatter::formatFormattableRange( + const Formattable& first, const Formattable& second, UErrorCode& status) const { + if (U_FAILURE(status)) { + return FormattedNumberRange(U_ILLEGAL_ARGUMENT_ERROR); + } + + auto results = new UFormattedNumberRangeData(); + if (results == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return FormattedNumberRange(status); + } + + first.populateDecimalQuantity(results->quantity1, status); + if (U_FAILURE(status)) { + return FormattedNumberRange(status); + } + + second.populateDecimalQuantity(results->quantity2, status); + if (U_FAILURE(status)) { + return FormattedNumberRange(status); + } + + formatImpl(*results, first == second, status); + + // Do not save the results object if we encountered a failure. + if (U_SUCCESS(status)) { + return FormattedNumberRange(results); + } else { + delete results; + return FormattedNumberRange(status); + } +} + +void LocalizedNumberRangeFormatter::formatImpl( + UFormattedNumberRangeData& results, bool equalBeforeRounding, UErrorCode& status) const { + auto* impl = getFormatter(status); + if (U_FAILURE(status)) { + return; + } + if (impl == nullptr) { + status = U_INTERNAL_PROGRAM_ERROR; + return; + } + impl->format(results, equalBeforeRounding, status); +} + +const impl::NumberRangeFormatterImpl* +LocalizedNumberRangeFormatter::getFormatter(UErrorCode& status) const { + // TODO: Move this into umutex.h? (similar logic also in decimfmt.cpp) + // See ICU-20146 + + if (U_FAILURE(status)) { + return nullptr; + } + + // First try to get the pre-computed formatter + auto* ptr = fAtomicFormatter.load(); + if (ptr != nullptr) { + return ptr; + } + + // Try computing the formatter on our own + auto* temp = new NumberRangeFormatterImpl(fMacros, status); + if (U_FAILURE(status)) { + return nullptr; + } + if (temp == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + + // Note: ptr starts as nullptr; during compare_exchange, + // it is set to what is actually stored in the atomic + // if another thread beat us to computing the formatter object. + auto* nonConstThis = const_cast(this); + if (!nonConstThis->fAtomicFormatter.compare_exchange_strong(ptr, temp)) { + // Another thread beat us to computing the formatter + delete temp; + return ptr; + } else { + // Our copy of the formatter got stored in the atomic + return temp; + } + +} + + +FormattedNumberRange::FormattedNumberRange(FormattedNumberRange&& src) U_NOEXCEPT + : fResults(src.fResults), fErrorCode(src.fErrorCode) { + // Disown src.fResults to prevent double-deletion + src.fResults = nullptr; + src.fErrorCode = U_INVALID_STATE_ERROR; +} + +FormattedNumberRange& FormattedNumberRange::operator=(FormattedNumberRange&& src) U_NOEXCEPT { + delete fResults; + fResults = src.fResults; + fErrorCode = src.fErrorCode; + // Disown src.fResults to prevent double-deletion + src.fResults = nullptr; + src.fErrorCode = U_INVALID_STATE_ERROR; + return *this; +} + +UnicodeString FormattedNumberRange::toString(UErrorCode& status) const { + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + if (fResults == nullptr) { + status = fErrorCode; + return ICU_Utility::makeBogusString(); + } + return fResults->string.toUnicodeString(); +} + +Appendable& FormattedNumberRange::appendTo(Appendable& appendable, UErrorCode& status) const { + if (U_FAILURE(status)) { + return appendable; + } + if (fResults == nullptr) { + status = fErrorCode; + return appendable; + } + appendable.appendString(fResults->string.chars(), fResults->string.length()); + return appendable; +} + +UBool FormattedNumberRange::nextFieldPosition(FieldPosition& fieldPosition, UErrorCode& status) const { + if (U_FAILURE(status)) { + return FALSE; + } + if (fResults == nullptr) { + status = fErrorCode; + return FALSE; + } + // NOTE: MSVC sometimes complains when implicitly converting between bool and UBool + return fResults->string.nextFieldPosition(fieldPosition, status) ? TRUE : FALSE; +} + +void FormattedNumberRange::getAllFieldPositions(FieldPositionIterator& iterator, UErrorCode& status) const { + FieldPositionIteratorHandler fpih(&iterator, status); + getAllFieldPositionsImpl(fpih, status); +} + +void FormattedNumberRange::getAllFieldPositionsImpl( + FieldPositionIteratorHandler& fpih, UErrorCode& status) const { + if (U_FAILURE(status)) { + return; + } + if (fResults == nullptr) { + status = fErrorCode; + return; + } + fResults->string.getAllFieldPositions(fpih, status); +} + +UnicodeString FormattedNumberRange::getFirstDecimal(UErrorCode& status) const { + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + if (fResults == nullptr) { + status = fErrorCode; + return ICU_Utility::makeBogusString(); + } + return fResults->quantity1.toScientificString(); +} + +UnicodeString FormattedNumberRange::getSecondDecimal(UErrorCode& status) const { + if (U_FAILURE(status)) { + return ICU_Utility::makeBogusString(); + } + if (fResults == nullptr) { + status = fErrorCode; + return ICU_Utility::makeBogusString(); + } + return fResults->quantity2.toScientificString(); +} + +UNumberRangeIdentityResult FormattedNumberRange::getIdentityResult(UErrorCode& status) const { + if (U_FAILURE(status)) { + return UNUM_IDENTITY_RESULT_NOT_EQUAL; + } + if (fResults == nullptr) { + status = fErrorCode; + return UNUM_IDENTITY_RESULT_NOT_EQUAL; + } + return fResults->identityResult; +} + +FormattedNumberRange::~FormattedNumberRange() { + delete fResults; +} + + + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/numrange_impl.cpp b/deps/icu-small/source/i18n/numrange_impl.cpp new file mode 100644 index 00000000000000..21365bfc59bca3 --- /dev/null +++ b/deps/icu-small/source/i18n/numrange_impl.cpp @@ -0,0 +1,486 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT + +#include "unicode/numberrangeformatter.h" +#include "numrange_impl.h" +#include "patternprops.h" +#include "uresimp.h" +#include "util.h" + +using namespace icu; +using namespace icu::number; +using namespace icu::number::impl; + +namespace { + +// Helper function for 2-dimensional switch statement +constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) { + return static_cast(a) | (static_cast(b) << 4); +} + + +struct NumberRangeData { + SimpleFormatter rangePattern; + SimpleFormatter approximatelyPattern; +}; + +class NumberRangeDataSink : public ResourceSink { + public: + NumberRangeDataSink(NumberRangeData& data) : fData(data) {} + + void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE { + ResourceTable miscTable = value.getTable(status); + if (U_FAILURE(status)) { return; } + for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) { + if (uprv_strcmp(key, "range") == 0) { + if (fData.rangePattern.getArgumentLimit() != 0) { + continue; // have already seen this pattern + } + fData.rangePattern = {value.getUnicodeString(status), status}; + } else if (uprv_strcmp(key, "approximately") == 0) { + if (fData.approximatelyPattern.getArgumentLimit() != 0) { + continue; // have already seen this pattern + } + fData.approximatelyPattern = {value.getUnicodeString(status), status}; + } + } + } + + private: + NumberRangeData& fData; +}; + +void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + LocalUResourceBundlePointer rb(ures_open(NULL, localeName, &status)); + if (U_FAILURE(status)) { return; } + NumberRangeDataSink sink(data); + + CharString dataPath; + dataPath.append("NumberElements/", -1, status); + dataPath.append(nsName, -1, status); + dataPath.append("/miscPatterns", -1, status); + ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status); + if (U_FAILURE(status)) { return; } + + // TODO: Is it necessary to manually fall back to latn, or does the data sink take care of that? + + if (data.rangePattern.getArgumentLimit() == 0) { + // No data! + data.rangePattern = {u"{0}–{1}", status}; + } + if (data.approximatelyPattern.getArgumentLimit() == 0) { + // No data! + data.approximatelyPattern = {u"~{0}", status}; + } +} + +class PluralRangesDataSink : public ResourceSink { + public: + PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {} + + void put(const char* /*key*/, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE { + ResourceArray entriesArray = value.getArray(status); + if (U_FAILURE(status)) { return; } + fOutput.setCapacity(entriesArray.getSize()); + for (int i = 0; entriesArray.getValue(i, value); i++) { + ResourceArray pluralFormsArray = value.getArray(status); + if (U_FAILURE(status)) { return; } + pluralFormsArray.getValue(0, value); + StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status); + if (U_FAILURE(status)) { return; } + pluralFormsArray.getValue(1, value); + StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status); + if (U_FAILURE(status)) { return; } + pluralFormsArray.getValue(2, value); + StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status); + if (U_FAILURE(status)) { return; } + fOutput.addPluralRange(first, second, result); + } + } + + private: + StandardPluralRanges& fOutput; +}; + +void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status)); + if (U_FAILURE(status)) { return; } + + CharString dataPath; + dataPath.append("locales/", -1, status); + dataPath.append(locale.getLanguage(), -1, status); + if (U_FAILURE(status)) { return; } + int32_t setLen; + // Not all languages are covered: fail gracefully + UErrorCode internalStatus = U_ZERO_ERROR; + const UChar* set = ures_getStringByKeyWithFallback(rb.getAlias(), dataPath.data(), &setLen, &internalStatus); + if (U_FAILURE(internalStatus)) { return; } + + dataPath.clear(); + dataPath.append("rules/", -1, status); + dataPath.appendInvariantChars(set, setLen, status); + if (U_FAILURE(status)) { return; } + PluralRangesDataSink sink(output); + ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status); + if (U_FAILURE(status)) { return; } +} + +} // namespace + + +void StandardPluralRanges::initialize(const Locale& locale, UErrorCode& status) { + getPluralRangesData(locale, *this, status); +} + +void StandardPluralRanges::addPluralRange( + StandardPlural::Form first, + StandardPlural::Form second, + StandardPlural::Form result) { + U_ASSERT(fTriplesLen < fTriples.getCapacity()); + fTriples[fTriplesLen] = {first, second, result}; + fTriplesLen++; +} + +void StandardPluralRanges::setCapacity(int32_t length) { + if (length > fTriples.getCapacity()) { + fTriples.resize(length, 0); + } +} + +StandardPlural::Form +StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const { + for (int32_t i=0; isemanticallyEquivalent(*micros2.modInner) + || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle) + || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) { + formatRange(data, micros1, micros2, status); + data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL; + return; + } + + // Check for identity + if (equalBeforeRounding) { + data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING; + } else if (data.quantity1 == data.quantity2) { + data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING; + } else { + data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL; + } + + switch (identity2d(fIdentityFallback, data.identityResult)) { + case identity2d(UNUM_IDENTITY_FALLBACK_RANGE, + UNUM_IDENTITY_RESULT_NOT_EQUAL): + case identity2d(UNUM_IDENTITY_FALLBACK_RANGE, + UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING): + case identity2d(UNUM_IDENTITY_FALLBACK_RANGE, + UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING): + case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY, + UNUM_IDENTITY_RESULT_NOT_EQUAL): + case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE, + UNUM_IDENTITY_RESULT_NOT_EQUAL): + case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE, + UNUM_IDENTITY_RESULT_NOT_EQUAL): + formatRange(data, micros1, micros2, status); + break; + + case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY, + UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING): + case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY, + UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING): + case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE, + UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING): + formatApproximately(data, micros1, micros2, status); + break; + + case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE, + UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING): + case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE, + UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING): + case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE, + UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING): + formatSingleValue(data, micros1, micros2, status); + break; + + default: + U_ASSERT(false); + break; + } +} + + +void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data, + MicroProps& micros1, MicroProps& micros2, + UErrorCode& status) const { + if (U_FAILURE(status)) { return; } + if (fSameFormatters) { + int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.string, 0, status); + NumberFormatterImpl::writeAffixes(micros1, data.string, 0, length, status); + } else { + formatRange(data, micros1, micros2, status); + } +} + + +void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data, + MicroProps& micros1, MicroProps& micros2, + UErrorCode& status) const { + if (U_FAILURE(status)) { return; } + if (fSameFormatters) { + int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.string, 0, status); + // HEURISTIC: Desired modifier order: inner, middle, approximately, outer. + length += micros1.modInner->apply(data.string, 0, length, status); + length += micros1.modMiddle->apply(data.string, 0, length, status); + length += fApproximatelyModifier.apply(data.string, 0, length, status); + micros1.modOuter->apply(data.string, 0, length, status); + } else { + formatRange(data, micros1, micros2, status); + } +} + + +void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data, + MicroProps& micros1, MicroProps& micros2, + UErrorCode& status) const { + if (U_FAILURE(status)) { return; } + + // modInner is always notation (scientific); collapsable in ALL. + // modOuter is always units; collapsable in ALL, AUTO, and UNIT. + // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT. + // Never collapse an outer mod but not an inner mod. + bool collapseOuter, collapseMiddle, collapseInner; + switch (fCollapse) { + case UNUM_RANGE_COLLAPSE_ALL: + case UNUM_RANGE_COLLAPSE_AUTO: + case UNUM_RANGE_COLLAPSE_UNIT: + { + // OUTER MODIFIER + collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter); + + if (!collapseOuter) { + // Never collapse inner mods if outer mods are not collapsable + collapseMiddle = false; + collapseInner = false; + break; + } + + // MIDDLE MODIFIER + collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle); + + if (!collapseMiddle) { + // Never collapse inner mods if outer mods are not collapsable + collapseInner = false; + break; + } + + // MIDDLE MODIFIER HEURISTICS + // (could disable collapsing of the middle modifier) + // The modifiers are equal by this point, so we can look at just one of them. + const Modifier* mm = micros1.modMiddle; + if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) { + // Only collapse if the modifier is a unit. + // TODO: Make a better way to check for a unit? + // TODO: Handle case where the modifier has both notation and unit (compact currency)? + if (!mm->containsField(UNUM_CURRENCY_FIELD) && !mm->containsField(UNUM_PERCENT_FIELD)) { + collapseMiddle = false; + } + } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) { + // Heuristic as of ICU 63: collapse only if the modifier is more than one code point. + if (mm->getCodePointCount() <= 1) { + collapseMiddle = false; + } + } + + if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) { + collapseInner = false; + break; + } + + // INNER MODIFIER + collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner); + + // All done checking for collapsability. + break; + } + + default: + collapseOuter = false; + collapseMiddle = false; + collapseInner = false; + break; + } + + NumberStringBuilder& string = data.string; + int32_t lengthPrefix = 0; + int32_t length1 = 0; + int32_t lengthInfix = 0; + int32_t length2 = 0; + int32_t lengthSuffix = 0; + + // Use #define so that these are evaluated at the call site. + #define UPRV_INDEX_0 (lengthPrefix) + #define UPRV_INDEX_1 (lengthPrefix + length1) + #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix) + #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2) + + int32_t lengthRange = SimpleModifier::formatTwoArgPattern( + fRangeFormatter, + string, + 0, + &lengthPrefix, + &lengthSuffix, + UNUM_FIELD_COUNT, + status); + if (U_FAILURE(status)) { return; } + lengthInfix = lengthRange - lengthPrefix - lengthSuffix; + U_ASSERT(lengthInfix > 0); + + // SPACING HEURISTIC + // Add spacing unless all modifiers are collapsed. + // TODO: add API to control this? + // TODO: Use a data-driven heuristic like currency spacing? + // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications) + { + bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0; + bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0; + bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0; + if (repeatInner || repeatMiddle || repeatOuter) { + // Add spacing if there is not already spacing + if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) { + lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', UNUM_FIELD_COUNT, status); + } + if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) { + lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', UNUM_FIELD_COUNT, status); + } + } + } + + length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status); + length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status); + + // TODO: Support padding? + + if (collapseInner) { + // Note: this is actually a mix of prefix and suffix, but adding to infix length works + const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner); + lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + } else { + length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); + length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + } + + if (collapseMiddle) { + // Note: this is actually a mix of prefix and suffix, but adding to infix length works + const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle); + lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + } else { + length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); + length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + } + + if (collapseOuter) { + // Note: this is actually a mix of prefix and suffix, but adding to infix length works + const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter); + lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status); + } else { + length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status); + length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status); + } +} + + +const Modifier& +NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const { + Modifier::Parameters parameters; + first.getParameters(parameters); + if (parameters.obj == nullptr) { + // No plural form; return a fallback (e.g., the first) + return first; + } + StandardPlural::Form firstPlural = parameters.plural; + + second.getParameters(parameters); + if (parameters.obj == nullptr) { + // No plural form; return a fallback (e.g., the first) + return first; + } + StandardPlural::Form secondPlural = parameters.plural; + + // Get the required plural form from data + StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural); + + // Get and return the new Modifier + const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural); + U_ASSERT(mod != nullptr); + return *mod; +} + + + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/numrange_impl.h b/deps/icu-small/source/i18n/numrange_impl.h new file mode 100644 index 00000000000000..787fc656860d53 --- /dev/null +++ b/deps/icu-small/source/i18n/numrange_impl.h @@ -0,0 +1,114 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING +#ifndef __SOURCE_NUMRANGE_TYPES_H__ +#define __SOURCE_NUMRANGE_TYPES_H__ + +#include "unicode/numberformatter.h" +#include "unicode/numberrangeformatter.h" +#include "unicode/simpleformatter.h" +#include "number_types.h" +#include "number_decimalquantity.h" +#include "number_formatimpl.h" +#include "number_stringbuilder.h" + +U_NAMESPACE_BEGIN namespace number { +namespace impl { + + +/** + * Class similar to UFormattedNumberData. + * + * Has incomplete magic number logic that will need to be finished + * if this is to be exposed as C API in the future. + */ +struct UFormattedNumberRangeData : public UMemory { + // The magic number to identify incoming objects. + // Reads in ASCII as "FDR" (FormatteDnumberRange with room at the end) + static constexpr int32_t kMagic = 0x46445200; + + // Data members: + int32_t fMagic = kMagic; + DecimalQuantity quantity1; + DecimalQuantity quantity2; + NumberStringBuilder string; + UNumberRangeIdentityResult identityResult = UNUM_IDENTITY_RESULT_COUNT; + + // No C conversion methods (no C API yet) +}; + + +class StandardPluralRanges : public UMemory { + public: + void initialize(const Locale& locale, UErrorCode& status); + StandardPlural::Form resolve(StandardPlural::Form first, StandardPlural::Form second) const; + + /** Used for data loading. */ + void addPluralRange( + StandardPlural::Form first, + StandardPlural::Form second, + StandardPlural::Form result); + + /** Used for data loading. */ + void setCapacity(int32_t length); + + private: + struct StandardPluralRangeTriple { + StandardPlural::Form first; + StandardPlural::Form second; + StandardPlural::Form result; + }; + + // TODO: An array is simple here, but it results in linear lookup time. + // Certain locales have 20-30 entries in this list. + // Consider changing to a smarter data structure. + typedef MaybeStackArray PluralRangeTriples; + PluralRangeTriples fTriples; + int32_t fTriplesLen = 0; +}; + + +class NumberRangeFormatterImpl : public UMemory { + public: + NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status); + + void format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const; + + private: + NumberFormatterImpl formatterImpl1; + NumberFormatterImpl formatterImpl2; + bool fSameFormatters; + + UNumberRangeCollapse fCollapse; + UNumberRangeIdentityFallback fIdentityFallback; + + SimpleFormatter fRangeFormatter; + SimpleModifier fApproximatelyModifier; + + StandardPluralRanges fPluralRanges; + + void formatSingleValue(UFormattedNumberRangeData& data, + MicroProps& micros1, MicroProps& micros2, + UErrorCode& status) const; + + void formatApproximately(UFormattedNumberRangeData& data, + MicroProps& micros1, MicroProps& micros2, + UErrorCode& status) const; + + void formatRange(UFormattedNumberRangeData& data, + MicroProps& micros1, MicroProps& micros2, + UErrorCode& status) const; + + const Modifier& resolveModifierPlurals(const Modifier& first, const Modifier& second) const; +}; + + +} // namespace impl +} // namespace number +U_NAMESPACE_END + +#endif //__SOURCE_NUMRANGE_TYPES_H__ +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/numsys.cpp b/deps/icu-small/source/i18n/numsys.cpp index 087dd277eb4dd8..514fe05e5ae6de 100644 --- a/deps/icu-small/source/i18n/numsys.cpp +++ b/deps/icu-small/source/i18n/numsys.cpp @@ -79,43 +79,45 @@ NumberingSystem* U_EXPORT2 NumberingSystem::createInstance(int32_t radix_in, UBool isAlgorithmic_in, const UnicodeString & desc_in, UErrorCode &status) { if (U_FAILURE(status)) { - return NULL; + return nullptr; } if ( radix_in < 2 ) { status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; + return nullptr; } if ( !isAlgorithmic_in ) { if ( desc_in.countChar32() != radix_in ) { status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; + return nullptr; } } - NumberingSystem *ns = new NumberingSystem(); + LocalPointer ns(new NumberingSystem(), status); + if (U_FAILURE(status)) { + return nullptr; + } ns->setRadix(radix_in); ns->setDesc(desc_in); ns->setAlgorithmic(isAlgorithmic_in); - ns->setName(NULL); - return ns; + ns->setName(nullptr); + return ns.orphan(); } - NumberingSystem* U_EXPORT2 NumberingSystem::createInstance(const Locale & inLocale, UErrorCode& status) { if (U_FAILURE(status)) { - return NULL; + return nullptr; } UBool nsResolved = TRUE; UBool usingFallback = FALSE; char buffer[ULOC_KEYWORDS_CAPACITY]; - int32_t count = inLocale.getKeywordValue("numbers",buffer, sizeof(buffer),status); + int32_t count = inLocale.getKeywordValue("numbers", buffer, sizeof(buffer), status); if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) { // the "numbers" keyword exceeds ULOC_KEYWORDS_CAPACITY; ignore and use default. count = 0; @@ -129,20 +131,30 @@ NumberingSystem::createInstance(const Locale & inLocale, UErrorCode& status) { nsResolved = FALSE; } } else { - uprv_strcpy(buffer,gDefault); + uprv_strcpy(buffer, gDefault); nsResolved = FALSE; } if (!nsResolved) { // Resolve the numbering system ( default, native, traditional or finance ) into a "real" numbering system UErrorCode localStatus = U_ZERO_ERROR; - UResourceBundle *resource = ures_open(NULL, inLocale.getName(), &localStatus); - UResourceBundle *numberElementsRes = ures_getByKey(resource,gNumberElements,NULL,&localStatus); + LocalUResourceBundlePointer resource(ures_open(nullptr, inLocale.getName(), &localStatus)); + LocalUResourceBundlePointer numberElementsRes(ures_getByKey(resource.getAlias(), gNumberElements, nullptr, &localStatus)); + // Don't stomp on the catastrophic failure of OOM. + if (localStatus == U_MEMORY_ALLOCATION_ERROR) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } while (!nsResolved) { localStatus = U_ZERO_ERROR; count = 0; - const UChar *nsName = ures_getStringByKeyWithFallback(numberElementsRes, buffer, &count, &localStatus); + const UChar *nsName = ures_getStringByKeyWithFallback(numberElementsRes.getAlias(), buffer, &count, &localStatus); + // Don't stomp on the catastrophic failure of OOM. + if (localStatus == U_MEMORY_ALLOCATION_ERROR) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } if ( count > 0 && count < ULOC_KEYWORDS_CAPACITY ) { // numbering system found - u_UCharsToChars(nsName,buffer,count); + u_UCharsToChars(nsName, buffer, count); buffer[count] = '\0'; // Make sure it is null terminated. nsResolved = TRUE; } @@ -158,16 +170,17 @@ NumberingSystem::createInstance(const Locale & inLocale, UErrorCode& status) { } } } - ures_close(numberElementsRes); - ures_close(resource); } if (usingFallback) { status = U_USING_FALLBACK_WARNING; NumberingSystem *ns = new NumberingSystem(); + if (ns == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } return ns; } else { - return NumberingSystem::createInstanceByName(buffer,status); + return NumberingSystem::createInstanceByName(buffer, status); } } @@ -178,36 +191,37 @@ NumberingSystem::createInstance(UErrorCode& status) { NumberingSystem* U_EXPORT2 NumberingSystem::createInstanceByName(const char *name, UErrorCode& status) { - UResourceBundle *numberingSystemsInfo = NULL; - UResourceBundle *nsTop, *nsCurrent; int32_t radix = 10; int32_t algorithmic = 0; - numberingSystemsInfo = ures_openDirect(NULL,gNumberingSystems, &status); - nsCurrent = ures_getByKey(numberingSystemsInfo,gNumberingSystems,NULL,&status); - nsTop = ures_getByKey(nsCurrent,name,NULL,&status); - UnicodeString nsd = ures_getUnicodeStringByKey(nsTop,gDesc,&status); + LocalUResourceBundlePointer numberingSystemsInfo(ures_openDirect(nullptr, gNumberingSystems, &status)); + LocalUResourceBundlePointer nsCurrent(ures_getByKey(numberingSystemsInfo.getAlias(), gNumberingSystems, nullptr, &status)); + LocalUResourceBundlePointer nsTop(ures_getByKey(nsCurrent.getAlias(), name, nullptr, &status)); - ures_getByKey(nsTop,gRadix,nsCurrent,&status); - radix = ures_getInt(nsCurrent,&status); + UnicodeString nsd = ures_getUnicodeStringByKey(nsTop.getAlias(), gDesc, &status); - ures_getByKey(nsTop,gAlgorithmic,nsCurrent,&status); - algorithmic = ures_getInt(nsCurrent,&status); + ures_getByKey(nsTop.getAlias(), gRadix, nsCurrent.getAlias(), &status); + radix = ures_getInt(nsCurrent.getAlias(), &status); - UBool isAlgorithmic = ( algorithmic == 1 ); + ures_getByKey(nsTop.getAlias(), gAlgorithmic, nsCurrent.getAlias(), &status); + algorithmic = ures_getInt(nsCurrent.getAlias(), &status); - ures_close(nsCurrent); - ures_close(nsTop); - ures_close(numberingSystemsInfo); + UBool isAlgorithmic = ( algorithmic == 1 ); if (U_FAILURE(status)) { - status = U_UNSUPPORTED_ERROR; - return NULL; + // Don't stomp on the catastrophic failure of OOM. + if (status != U_MEMORY_ALLOCATION_ERROR) { + status = U_UNSUPPORTED_ERROR; + } + return nullptr; } - NumberingSystem* ns = NumberingSystem::createInstance(radix,isAlgorithmic,nsd,status); + LocalPointer ns(NumberingSystem::createInstance(radix, isAlgorithmic, nsd, status), status); + if (U_FAILURE(status)) { + return nullptr; + } ns->setName(name); - return ns; + return ns.orphan(); } /** @@ -241,11 +255,11 @@ void NumberingSystem::setDesc(const UnicodeString &d) { desc.setTo(d); } void NumberingSystem::setName(const char *n) { - if ( n == NULL ) { + if ( n == nullptr ) { name[0] = (char) 0; } else { uprv_strncpy(name,n,NUMSYS_NAME_CAPACITY); - name[NUMSYS_NAME_CAPACITY] = (char)0; // Make sure it is null terminated. + name[NUMSYS_NAME_CAPACITY] = '\0'; // Make sure it is null terminated. } } UBool NumberingSystem::isAlgorithmic() const { @@ -254,43 +268,57 @@ UBool NumberingSystem::isAlgorithmic() const { StringEnumeration* NumberingSystem::getAvailableNames(UErrorCode &status) { // TODO(ticket #11908): Init-once static cache, with u_cleanup() callback. - static StringEnumeration* availableNames = NULL; + static StringEnumeration* availableNames = nullptr; if (U_FAILURE(status)) { - return NULL; + return nullptr; } - if ( availableNames == NULL ) { + if ( availableNames == nullptr ) { // TODO: Simple array of UnicodeString objects, based on length of table resource? - LocalPointer numsysNames(new UVector(uprv_deleteUObject, NULL, status), status); + LocalPointer numsysNames(new UVector(uprv_deleteUObject, nullptr, status), status); if (U_FAILURE(status)) { - return NULL; + return nullptr; } UErrorCode rbstatus = U_ZERO_ERROR; - UResourceBundle *numberingSystemsInfo = ures_openDirect(NULL, "numberingSystems", &rbstatus); - numberingSystemsInfo = ures_getByKey(numberingSystemsInfo,"numberingSystems",numberingSystemsInfo,&rbstatus); - if(U_FAILURE(rbstatus)) { - status = U_MISSING_RESOURCE_ERROR; + UResourceBundle *numberingSystemsInfo = ures_openDirect(nullptr, "numberingSystems", &rbstatus); + numberingSystemsInfo = ures_getByKey(numberingSystemsInfo, "numberingSystems", numberingSystemsInfo, &rbstatus); + if (U_FAILURE(rbstatus)) { + // Don't stomp on the catastrophic failure of OOM. + if (rbstatus == U_MEMORY_ALLOCATION_ERROR) { + status = rbstatus; + } else { + status = U_MISSING_RESOURCE_ERROR; + } ures_close(numberingSystemsInfo); - return NULL; + return nullptr; } - while ( ures_hasNext(numberingSystemsInfo) ) { - UResourceBundle *nsCurrent = ures_getNextResource(numberingSystemsInfo,NULL,&rbstatus); - const char *nsName = ures_getKey(nsCurrent); - numsysNames->addElement(new UnicodeString(nsName, -1, US_INV),status); - ures_close(nsCurrent); + while ( ures_hasNext(numberingSystemsInfo) && U_SUCCESS(status) ) { + LocalUResourceBundlePointer nsCurrent(ures_getNextResource(numberingSystemsInfo, nullptr, &rbstatus)); + if (rbstatus == U_MEMORY_ALLOCATION_ERROR) { + status = rbstatus; // we want to report OOM failure back to the caller. + break; + } + const char *nsName = ures_getKey(nsCurrent.getAlias()); + LocalPointer newElem(new UnicodeString(nsName, -1, US_INV), status); + if (U_SUCCESS(status)) { + numsysNames->addElement(newElem.getAlias(), status); + if (U_SUCCESS(status)) { + newElem.orphan(); // on success, the numsysNames vector owns newElem. + } + } } ures_close(numberingSystemsInfo); if (U_FAILURE(status)) { - return NULL; + return nullptr; } availableNames = new NumsysNameEnumeration(numsysNames.getAlias(), status); - if (availableNames == NULL) { + if (availableNames == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; - return NULL; + return nullptr; } numsysNames.orphan(); // The names got adopted. } @@ -305,10 +333,10 @@ NumsysNameEnumeration::NumsysNameEnumeration(UVector *numsysNames, UErrorCode& / const UnicodeString* NumsysNameEnumeration::snext(UErrorCode& status) { - if (U_SUCCESS(status) && pos < fNumsysNames->size()) { + if (U_SUCCESS(status) && (fNumsysNames != nullptr) && (pos < fNumsysNames->size())) { return (const UnicodeString*)fNumsysNames->elementAt(pos++); } - return NULL; + return nullptr; } void @@ -318,7 +346,7 @@ NumsysNameEnumeration::reset(UErrorCode& /*status*/) { int32_t NumsysNameEnumeration::count(UErrorCode& /*status*/) const { - return (fNumsysNames==NULL) ? 0 : fNumsysNames->size(); + return (fNumsysNames==nullptr) ? 0 : fNumsysNames->size(); } NumsysNameEnumeration::~NumsysNameEnumeration() { diff --git a/deps/icu-small/source/i18n/numsys_impl.h b/deps/icu-small/source/i18n/numsys_impl.h index 6385fa5408a152..733e102365369d 100644 --- a/deps/icu-small/source/i18n/numsys_impl.h +++ b/deps/icu-small/source/i18n/numsys_impl.h @@ -37,7 +37,7 @@ class NumsysNameEnumeration : public StringEnumeration { virtual int32_t count(UErrorCode& status) const; private: int32_t pos; - UVector *fNumsysNames; + UVector *fNumsysNames = nullptr; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/olsontz.cpp b/deps/icu-small/source/i18n/olsontz.cpp index df025c0808388e..c3500574662585 100644 --- a/deps/icu-small/source/i18n/olsontz.cpp +++ b/deps/icu-small/source/i18n/olsontz.cpp @@ -140,7 +140,7 @@ OlsonTimeZone::OlsonTimeZone(const UResourceBundle* top, // Pre-32bit second transitions ures_getByKey(res, kTRANSPRE32, &r, &ec); transitionTimesPre32 = ures_getIntVector(&r, &len, &ec); - transitionCountPre32 = len >> 1; + transitionCountPre32 = static_cast(len >> 1); if (ec == U_MISSING_RESOURCE_ERROR) { // No pre-32bit transitions transitionTimesPre32 = NULL; @@ -153,7 +153,7 @@ OlsonTimeZone::OlsonTimeZone(const UResourceBundle* top, // 32bit second transitions ures_getByKey(res, kTRANS, &r, &ec); transitionTimes32 = ures_getIntVector(&r, &len, &ec); - transitionCount32 = len; + transitionCount32 = static_cast(len); if (ec == U_MISSING_RESOURCE_ERROR) { // No 32bit transitions transitionTimes32 = NULL; @@ -166,7 +166,7 @@ OlsonTimeZone::OlsonTimeZone(const UResourceBundle* top, // Post-32bit second transitions ures_getByKey(res, kTRANSPOST32, &r, &ec); transitionTimesPost32 = ures_getIntVector(&r, &len, &ec); - transitionCountPost32 = len >> 1; + transitionCountPost32 = static_cast(len >> 1); if (ec == U_MISSING_RESOURCE_ERROR) { // No pre-32bit transitions transitionTimesPost32 = NULL; diff --git a/deps/icu-small/source/i18n/plurrule.cpp b/deps/icu-small/source/i18n/plurrule.cpp index 9597e8eb00d023..3caa48a57bb700 100644 --- a/deps/icu-small/source/i18n/plurrule.cpp +++ b/deps/icu-small/source/i18n/plurrule.cpp @@ -65,13 +65,15 @@ UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration) PluralRules::PluralRules(UErrorCode& /*status*/) : UObject(), - mRules(NULL) + mRules(nullptr), + mInternalStatus(U_ZERO_ERROR) { } PluralRules::PluralRules(const PluralRules& other) : UObject(other), - mRules(NULL) + mRules(nullptr), + mInternalStatus(U_ZERO_ERROR) { *this=other; } @@ -86,54 +88,67 @@ SharedPluralRules::~SharedPluralRules() { PluralRules* PluralRules::clone() const { - return new PluralRules(*this); + PluralRules* newObj = new PluralRules(*this); + // Since clone doesn't have a 'status' parameter, the best we can do is return nullptr if + // the newly created object was not fully constructed properly (an error occurred). + if (newObj != nullptr && U_FAILURE(newObj->mInternalStatus)) { + delete newObj; + newObj = nullptr; + } + return newObj; } PluralRules& PluralRules::operator=(const PluralRules& other) { if (this != &other) { delete mRules; - if (other.mRules==NULL) { - mRules = NULL; + mRules = nullptr; + mInternalStatus = other.mInternalStatus; + if (U_FAILURE(mInternalStatus)) { + // bail out early if the object we were copying from was already 'invalid'. + return *this; } - else { + if (other.mRules != nullptr) { mRules = new RuleChain(*other.mRules); + if (mRules == nullptr) { + mInternalStatus = U_MEMORY_ALLOCATION_ERROR; + } + else if (U_FAILURE(mRules->fInternalStatus)) { + // If the RuleChain wasn't fully copied, then set our status to failure as well. + mInternalStatus = mRules->fInternalStatus; + } } } - return *this; } StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) { - StringEnumeration *result = new PluralAvailableLocalesEnumeration(status); - if (result == NULL && U_SUCCESS(status)) { - status = U_MEMORY_ALLOCATION_ERROR; + if (U_FAILURE(status)) { + return nullptr; } + LocalPointer result(new PluralAvailableLocalesEnumeration(status), status); if (U_FAILURE(status)) { - delete result; - result = NULL; + return nullptr; } - return result; + return result.orphan(); } PluralRules* U_EXPORT2 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) { if (U_FAILURE(status)) { - return NULL; + return nullptr; } - PluralRuleParser parser; - PluralRules *newRules = new PluralRules(status); - if (U_SUCCESS(status) && newRules == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; + LocalPointer newRules(new PluralRules(status), status); + if (U_FAILURE(status)) { + return nullptr; } - parser.parse(description, newRules, status); + parser.parse(description, newRules.getAlias(), status); if (U_FAILURE(status)) { - delete newRules; - newRules = NULL; + newRules.adoptInstead(nullptr); } - return newRules; + return newRules.orphan(); } @@ -149,19 +164,17 @@ template<> U_I18N_API const SharedPluralRules *LocaleCacheKey::createObject( const void * /*unused*/, UErrorCode &status) const { const char *localeId = fLoc.getName(); - PluralRules *pr = PluralRules::internalForLocale( - localeId, UPLURAL_TYPE_CARDINAL, status); + LocalPointer pr(PluralRules::internalForLocale(localeId, UPLURAL_TYPE_CARDINAL, status), status); if (U_FAILURE(status)) { - return NULL; + return nullptr; } - SharedPluralRules *result = new SharedPluralRules(pr); - if (result == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - delete pr; - return NULL; + LocalPointer result(new SharedPluralRules(pr.getAlias()), status); + if (U_FAILURE(status)) { + return nullptr; } + pr.orphan(); // result was successfully created so it nows pr. result->addRef(); - return result; + return result.orphan(); } /* end plural rules cache */ @@ -171,13 +184,13 @@ const SharedPluralRules* U_EXPORT2 PluralRules::createSharedInstance( const Locale& locale, UPluralType type, UErrorCode& status) { if (U_FAILURE(status)) { - return NULL; + return nullptr; } if (type != UPLURAL_TYPE_CARDINAL) { status = U_UNSUPPORTED_ERROR; - return NULL; + return nullptr; } - const SharedPluralRules *result = NULL; + const SharedPluralRules *result = nullptr; UnifiedCache::getByLocale(locale, result, status); return result; } @@ -195,11 +208,11 @@ PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& statu const SharedPluralRules *shared = createSharedInstance( locale, type, status); if (U_FAILURE(status)) { - return NULL; + return nullptr; } PluralRules *result = (*shared)->clone(); shared->removeRef(); - if (result == NULL) { + if (result == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; } return result; @@ -208,20 +221,23 @@ PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& statu PluralRules* U_EXPORT2 PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) { if (U_FAILURE(status)) { - return NULL; + return nullptr; } if (type >= UPLURAL_TYPE_COUNT) { status = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; + return nullptr; } - PluralRules *newObj = new PluralRules(status); - if (newObj==NULL || U_FAILURE(status)) { - delete newObj; - return NULL; + LocalPointer newObj(new PluralRules(status), status); + if (U_FAILURE(status)) { + return nullptr; } UnicodeString locRule = newObj->getRuleFromResource(locale, type, status); - // TODO: which errors, if any, should be returned? + // TODO: which other errors, if any, should be returned? if (locRule.length() == 0) { + // If an out-of-memory error occurred, then stop and report the failure. + if (status == U_MEMORY_ALLOCATION_ERROR) { + return nullptr; + } // Locales with no specific rules (all numbers have the "other" category // will return a U_MISSING_RESOURCE_ERROR at this point. This is not // an error. @@ -229,13 +245,13 @@ PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCod status = U_ZERO_ERROR; } PluralRuleParser parser; - parser.parse(locRule, newObj, status); + parser.parse(locRule, newObj.getAlias(), status); // TODO: should rule parse errors be returned, or // should we silently use default rules? // Original impl used default rules. // Ask the question to ICU Core. - return newObj; + return newObj.orphan(); } UnicodeString @@ -250,7 +266,7 @@ PluralRules::select(double number) const { UnicodeString PluralRules::select(const IFixedDecimal &number) const { - if (mRules == NULL) { + if (mRules == nullptr) { return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1); } else { @@ -262,14 +278,18 @@ PluralRules::select(const IFixedDecimal &number) const { StringEnumeration* PluralRules::getKeywords(UErrorCode& status) const { - if (U_FAILURE(status)) return NULL; - StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status); if (U_FAILURE(status)) { - delete nameEnumerator; - return NULL; + return nullptr; } - - return nameEnumerator; + if (U_FAILURE(mInternalStatus)) { + status = mInternalStatus; + return nullptr; + } + LocalPointer nameEnumerator(new PluralKeywordEnumeration(mRules, status), status); + if (U_FAILURE(status)) { + return nullptr; + } + return nameEnumerator.orphan(); } double @@ -367,8 +387,15 @@ getSamplesFromString(const UnicodeString &samples, double *dest, int32_t PluralRules::getSamples(const UnicodeString &keyword, double *dest, int32_t destCapacity, UErrorCode& status) { + if (destCapacity == 0 || U_FAILURE(status)) { + return 0; + } + if (U_FAILURE(mInternalStatus)) { + status = mInternalStatus; + return 0; + } RuleChain *rc = rulesForKeyword(keyword); - if (rc == NULL || destCapacity == 0 || U_FAILURE(status)) { + if (rc == nullptr) { return 0; } int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status); @@ -381,7 +408,7 @@ PluralRules::getSamples(const UnicodeString &keyword, double *dest, RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const { RuleChain *rc; - for (rc = mRules; rc != NULL; rc = rc->fNext) { + for (rc = mRules; rc != nullptr; rc = rc->fNext) { if (rc->fKeyword == keyword) { break; } @@ -395,7 +422,7 @@ PluralRules::isKeyword(const UnicodeString& keyword) const { if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) { return true; } - return rulesForKeyword(keyword) != NULL; + return rulesForKeyword(keyword) != nullptr; } UnicodeString @@ -421,13 +448,13 @@ PluralRules::operator==(const PluralRules& other) const { return FALSE; } myKeywordList->reset(status); - while ((ptrKeyword=myKeywordList->snext(status))!=NULL) { + while ((ptrKeyword=myKeywordList->snext(status))!=nullptr) { if (!other.isKeyword(*ptrKeyword)) { return FALSE; } } otherKeywordList->reset(status); - while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) { + while ((ptrKeyword=otherKeywordList->snext(status))!=nullptr) { if (!this->isKeyword(*ptrKeyword)) { return FALSE; } @@ -460,29 +487,33 @@ PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErr } switch (type) { case tAnd: - U_ASSERT(curAndConstraint != NULL); - curAndConstraint = curAndConstraint->add(); + U_ASSERT(curAndConstraint != nullptr); + curAndConstraint = curAndConstraint->add(status); break; case tOr: { - U_ASSERT(currentChain != NULL); + U_ASSERT(currentChain != nullptr); OrConstraint *orNode=currentChain->ruleHeader; - while (orNode->next != NULL) { + while (orNode->next != nullptr) { orNode = orNode->next; } orNode->next= new OrConstraint(); + if (orNode->next == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + break; + } orNode=orNode->next; - orNode->next=NULL; - curAndConstraint = orNode->add(); + orNode->next=nullptr; + curAndConstraint = orNode->add(status); } break; case tIs: - U_ASSERT(curAndConstraint != NULL); + U_ASSERT(curAndConstraint != nullptr); U_ASSERT(curAndConstraint->value == -1); - U_ASSERT(curAndConstraint->rangeList == NULL); + U_ASSERT(curAndConstraint->rangeList == nullptr); break; case tNot: - U_ASSERT(curAndConstraint != NULL); + U_ASSERT(curAndConstraint != nullptr); curAndConstraint->negated=TRUE; break; @@ -492,23 +523,29 @@ PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErr case tIn: case tWithin: case tEqual: - U_ASSERT(curAndConstraint != NULL); - curAndConstraint->rangeList = new UVector32(status); - curAndConstraint->rangeList->addElement(-1, status); // range Low - curAndConstraint->rangeList->addElement(-1, status); // range Hi - rangeLowIdx = 0; - rangeHiIdx = 1; - curAndConstraint->value=PLURAL_RANGE_HIGH; - curAndConstraint->integerOnly = (type != tWithin); + { + U_ASSERT(curAndConstraint != nullptr); + LocalPointer newRangeList(new UVector32(status), status); + if (U_FAILURE(status)) { + break; + } + curAndConstraint->rangeList = newRangeList.orphan(); + curAndConstraint->rangeList->addElement(-1, status); // range Low + curAndConstraint->rangeList->addElement(-1, status); // range Hi + rangeLowIdx = 0; + rangeHiIdx = 1; + curAndConstraint->value=PLURAL_RANGE_HIGH; + curAndConstraint->integerOnly = (type != tWithin); + } break; case tNumber: - U_ASSERT(curAndConstraint != NULL); + U_ASSERT(curAndConstraint != nullptr); if ( (curAndConstraint->op==AndConstraint::MOD)&& (curAndConstraint->opNum == -1 ) ) { curAndConstraint->opNum=getNumberValue(token); } else { - if (curAndConstraint->rangeList == NULL) { + if (curAndConstraint->rangeList == nullptr) { // this is for an 'is' rule curAndConstraint->value = getNumberValue(token); } else { @@ -534,7 +571,7 @@ PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErr case tComma: // TODO: rule syntax checking is inadequate, can happen with badly formed rules. // Catch cases like "n mod 10, is 1" here instead. - if (curAndConstraint == NULL || curAndConstraint->rangeList == NULL) { + if (curAndConstraint == nullptr || curAndConstraint->rangeList == nullptr) { status = U_UNEXPECTED_TOKEN; break; } @@ -545,7 +582,7 @@ PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErr curAndConstraint->rangeList->addElement(-1, status); // range Hi break; case tMod: - U_ASSERT(curAndConstraint != NULL); + U_ASSERT(curAndConstraint != nullptr); curAndConstraint->op=AndConstraint::MOD; break; case tVariableN: @@ -553,24 +590,24 @@ PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErr case tVariableF: case tVariableT: case tVariableV: - U_ASSERT(curAndConstraint != NULL); + U_ASSERT(curAndConstraint != nullptr); curAndConstraint->digitsType = type; break; case tKeyword: { RuleChain *newChain = new RuleChain; - if (newChain == NULL) { + if (newChain == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; break; } newChain->fKeyword = token; - if (prules->mRules == NULL) { + if (prules->mRules == nullptr) { prules->mRules = newChain; } else { // The new rule chain goes at the end of the linked list of rule chains, // unless there is an "other" keyword & chain. "other" must remain last. RuleChain *insertAfter = prules->mRules; - while (insertAfter->fNext!=NULL && + while (insertAfter->fNext!=nullptr && insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){ insertAfter=insertAfter->fNext; } @@ -578,8 +615,12 @@ PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErr insertAfter->fNext = newChain; } OrConstraint *orNode = new OrConstraint(); + if (orNode == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + break; + } newChain->ruleHeader = orNode; - curAndConstraint = orNode->add(); + curAndConstraint = orNode->add(status); currentChain = newChain; } break; @@ -629,7 +670,7 @@ PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorC if (U_FAILURE(errCode)) { return emptyStr; } - LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &errCode)); + LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals", &errCode)); if(U_FAILURE(errCode)) { return emptyStr; } @@ -646,7 +687,7 @@ PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorC errCode = U_ILLEGAL_ARGUMENT_ERROR; return emptyStr; } - LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, NULL, &errCode)); + LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, nullptr, &errCode)); if(U_FAILURE(errCode)) { return emptyStr; } @@ -654,25 +695,25 @@ PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorC const char *curLocaleName=locale.getName(); const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode); - if (s == NULL) { + if (s == nullptr) { // Check parent locales. UErrorCode status = U_ZERO_ERROR; char parentLocaleName[ULOC_FULLNAME_CAPACITY]; - const char *curLocaleName=locale.getName(); - uprv_strcpy(parentLocaleName, curLocaleName); + const char *curLocaleName2=locale.getName(); + uprv_strcpy(parentLocaleName, curLocaleName2); while (uloc_getParent(parentLocaleName, parentLocaleName, ULOC_FULLNAME_CAPACITY, &status) > 0) { resLen=0; s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status); - if (s != NULL) { + if (s != nullptr) { errCode = U_ZERO_ERROR; break; } status = U_ZERO_ERROR; } } - if (s==NULL) { + if (s==nullptr) { return emptyStr; } @@ -680,18 +721,18 @@ PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorC u_UCharsToChars(s, setKey, resLen + 1); // printf("\n PluralRule: %s\n", setKey); - LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", NULL, &errCode)); + LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", nullptr, &errCode)); if(U_FAILURE(errCode)) { return emptyStr; } - LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, NULL, &errCode)); + LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, nullptr, &errCode)); if (U_FAILURE(errCode)) { return emptyStr; } int32_t numberKeys = ures_getSize(setRes.getAlias()); UnicodeString result; - const char *key=NULL; + const char *key=nullptr; for(int32_t i=0; idumpRules(rules); } return rules; } - -AndConstraint::AndConstraint() { - op = AndConstraint::NONE; - opNum=-1; - value = -1; - rangeList = NULL; - negated = FALSE; - integerOnly = FALSE; - digitsType = none; - next=NULL; -} - - AndConstraint::AndConstraint(const AndConstraint& other) { + this->fInternalStatus = other.fInternalStatus; + if (U_FAILURE(fInternalStatus)) { + return; // stop early if the object we are copying from is invalid. + } this->op = other.op; this->opNum=other.opNum; this->value=other.value; - this->rangeList=NULL; - if (other.rangeList != NULL) { - UErrorCode status = U_ZERO_ERROR; - this->rangeList = new UVector32(status); - this->rangeList->assign(*other.rangeList, status); + if (other.rangeList != nullptr) { + LocalPointer newRangeList(new UVector32(fInternalStatus), fInternalStatus); + if (U_FAILURE(fInternalStatus)) { + return; + } + this->rangeList = newRangeList.orphan(); + this->rangeList->assign(*other.rangeList, fInternalStatus); } this->integerOnly=other.integerOnly; this->negated=other.negated; this->digitsType = other.digitsType; - if (other.next==NULL) { - this->next=NULL; - } - else { + if (other.next != nullptr) { this->next = new AndConstraint(*other.next); + if (this->next == nullptr) { + fInternalStatus = U_MEMORY_ALLOCATION_ERROR; + } } } AndConstraint::~AndConstraint() { delete rangeList; - if (next!=NULL) { - delete next; - } + rangeList = nullptr; + delete next; + next = nullptr; } - UBool AndConstraint::isFulfilled(const IFixedDecimal &number) { UBool result = TRUE; @@ -776,7 +809,7 @@ AndConstraint::isFulfilled(const IFixedDecimal &number) { if (op == MOD) { n = fmod(n, opNum); } - if (rangeList == NULL) { + if (rangeList == nullptr) { result = value == -1 || // empty rule n == value; // 'is' rule break; @@ -796,53 +829,67 @@ AndConstraint::isFulfilled(const IFixedDecimal &number) { return result; } - AndConstraint* -AndConstraint::add() -{ +AndConstraint::add(UErrorCode& status) { + if (U_FAILURE(fInternalStatus)) { + status = fInternalStatus; + return nullptr; + } this->next = new AndConstraint(); + if (this->next == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } return this->next; } -OrConstraint::OrConstraint() { - childNode=NULL; - next=NULL; -} OrConstraint::OrConstraint(const OrConstraint& other) { - if ( other.childNode == NULL ) { - this->childNode = NULL; + this->fInternalStatus = other.fInternalStatus; + if (U_FAILURE(fInternalStatus)) { + return; // stop early if the object we are copying from is invalid. } - else { + if ( other.childNode != nullptr ) { this->childNode = new AndConstraint(*(other.childNode)); + if (this->childNode == nullptr) { + fInternalStatus = U_MEMORY_ALLOCATION_ERROR; + return; + } } - if (other.next == NULL ) { - this->next = NULL; - } - else { + if (other.next != nullptr ) { this->next = new OrConstraint(*(other.next)); + if (this->next == nullptr) { + fInternalStatus = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (U_FAILURE(this->next->fInternalStatus)) { + this->fInternalStatus = this->next->fInternalStatus; + } } } OrConstraint::~OrConstraint() { - if (childNode!=NULL) { - delete childNode; - } - if (next!=NULL) { - delete next; - } + delete childNode; + childNode = nullptr; + delete next; + next = nullptr; } AndConstraint* -OrConstraint::add() -{ +OrConstraint::add(UErrorCode& status) { + if (U_FAILURE(fInternalStatus)) { + status = fInternalStatus; + return nullptr; + } OrConstraint *curOrConstraint=this; { - while (curOrConstraint->next!=NULL) { + while (curOrConstraint->next!=nullptr) { curOrConstraint = curOrConstraint->next; } - U_ASSERT(curOrConstraint->childNode == NULL); + U_ASSERT(curOrConstraint->childNode == nullptr); curOrConstraint->childNode = new AndConstraint(); + if (curOrConstraint->childNode == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } } return curOrConstraint->childNode; } @@ -852,10 +899,10 @@ OrConstraint::isFulfilled(const IFixedDecimal &number) { OrConstraint* orRule=this; UBool result=FALSE; - while (orRule!=NULL && !result) { + while (orRule!=nullptr && !result) { result=TRUE; AndConstraint* andRule = orRule->childNode; - while (andRule!=NULL && result) { + while (andRule!=nullptr && result) { result = andRule->isFulfilled(number); andRule=andRule->next; } @@ -866,19 +913,33 @@ OrConstraint::isFulfilled(const IFixedDecimal &number) { } -RuleChain::RuleChain(): fKeyword(), fNext(NULL), ruleHeader(NULL), fDecimalSamples(), fIntegerSamples(), - fDecimalSamplesUnbounded(FALSE), fIntegerSamplesUnbounded(FALSE) { -} - RuleChain::RuleChain(const RuleChain& other) : - fKeyword(other.fKeyword), fNext(NULL), ruleHeader(NULL), fDecimalSamples(other.fDecimalSamples), + fKeyword(other.fKeyword), fDecimalSamples(other.fDecimalSamples), fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded), - fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded) { - if (other.ruleHeader != NULL) { + fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded), fInternalStatus(other.fInternalStatus) { + if (U_FAILURE(this->fInternalStatus)) { + return; // stop early if the object we are copying from is invalid. + } + if (other.ruleHeader != nullptr) { this->ruleHeader = new OrConstraint(*(other.ruleHeader)); + if (this->ruleHeader == nullptr) { + this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR; + } + else if (U_FAILURE(this->ruleHeader->fInternalStatus)) { + // If the OrConstraint wasn't fully copied, then set our status to failure as well. + this->fInternalStatus = this->ruleHeader->fInternalStatus; + return; // exit early. + } } - if (other.fNext != NULL ) { + if (other.fNext != nullptr ) { this->fNext = new RuleChain(*other.fNext); + if (this->fNext == nullptr) { + this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR; + } + else if (U_FAILURE(this->fNext->fInternalStatus)) { + // If the RuleChain wasn't fully copied, then set our status to failure as well. + this->fInternalStatus = this->fNext->fInternalStatus; + } } } @@ -887,11 +948,10 @@ RuleChain::~RuleChain() { delete ruleHeader; } - UnicodeString RuleChain::select(const IFixedDecimal &number) const { if (!number.isNaN() && !number.isInfinite()) { - for (const RuleChain *rules = this; rules != NULL; rules = rules->fNext) { + for (const RuleChain *rules = this; rules != nullptr; rules = rules->fNext) { if (rules->ruleHeader->isFulfilled(number)) { return rules->fKeyword; } @@ -923,17 +983,17 @@ void RuleChain::dumpRules(UnicodeString& result) { UChar digitString[16]; - if ( ruleHeader != NULL ) { + if ( ruleHeader != nullptr ) { result += fKeyword; result += COLON; result += SPACE; OrConstraint* orRule=ruleHeader; - while ( orRule != NULL ) { + while ( orRule != nullptr ) { AndConstraint* andRule=orRule->childNode; - while ( andRule != NULL ) { - if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) && (andRule->value == -1)) { + while ( andRule != nullptr ) { + if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==nullptr) && (andRule->value == -1)) { // Empty Rules. - } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==NULL) ) { + } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==nullptr) ) { result += tokenString(andRule->digitsType); result += UNICODE_STRING_SIMPLE(" is "); if (andRule->negated) { @@ -950,7 +1010,7 @@ RuleChain::dumpRules(UnicodeString& result) { uprv_itou(digitString,16, andRule->opNum,10,0); result += UnicodeString(digitString); } - if (andRule->rangeList==NULL) { + if (andRule->rangeList==nullptr) { if (andRule->negated) { result += UNICODE_STRING_SIMPLE(" is not "); uprv_itou(digitString,16, andRule->value,10,0); @@ -993,16 +1053,16 @@ RuleChain::dumpRules(UnicodeString& result) { } } } - if ( (andRule=andRule->next) != NULL) { + if ( (andRule=andRule->next) != nullptr) { result += UNICODE_STRING_SIMPLE(" and "); } } - if ( (orRule = orRule->next) != NULL ) { + if ( (orRule = orRule->next) != nullptr ) { result += UNICODE_STRING_SIMPLE(" or "); } } } - if ( fNext != NULL ) { + if ( fNext != nullptr ) { result += UNICODE_STRING_SIMPLE("; "); fNext->dumpRules(result); } @@ -1011,6 +1071,9 @@ RuleChain::dumpRules(UnicodeString& result) { UErrorCode RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const { + if (U_FAILURE(fInternalStatus)) { + return fInternalStatus; + } if ( arraySize < capacityOfKeywords-1 ) { keywords[arraySize++]=fKeyword; } @@ -1018,7 +1081,7 @@ RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int3 return U_BUFFER_OVERFLOW_ERROR; } - if ( fNext != NULL ) { + if ( fNext != nullptr ) { return fNext->getKeywords(capacityOfKeywords, keywords, arraySize); } else { @@ -1032,7 +1095,7 @@ RuleChain::isKeyword(const UnicodeString& keywordParam) const { return TRUE; } - if ( fNext != NULL ) { + if ( fNext != nullptr ) { return fNext->isKeyword(keywordParam); } else { @@ -1043,7 +1106,7 @@ RuleChain::isKeyword(const UnicodeString& keywordParam) const { PluralRuleParser::PluralRuleParser() : ruleIndex(0), token(), type(none), prevType(none), - curAndConstraint(NULL), currentChain(NULL), rangeLowIdx(-1), rangeHiIdx(-1) + curAndConstraint(nullptr), currentChain(nullptr), rangeLowIdx(-1), rangeHiIdx(-1) { } @@ -1341,21 +1404,36 @@ PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode return; } fKeywordNames.setDeleter(uprv_deleteUObject); - UBool addKeywordOther=TRUE; - RuleChain *node=header; - while(node!=NULL) { - fKeywordNames.addElement(new UnicodeString(node->fKeyword), status); + UBool addKeywordOther = TRUE; + RuleChain *node = header; + while (node != nullptr) { + auto newElem = new UnicodeString(node->fKeyword); + if (newElem == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + fKeywordNames.addElement(newElem, status); if (U_FAILURE(status)) { + delete newElem; return; } if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) { - addKeywordOther= FALSE; + addKeywordOther = FALSE; } - node=node->fNext; + node = node->fNext; } if (addKeywordOther) { - fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status); + auto newElem = new UnicodeString(PLURAL_KEYWORD_OTHER); + if (newElem == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + fKeywordNames.addElement(newElem, status); + if (U_FAILURE(status)) { + delete newElem; + return; + } } } @@ -1364,7 +1442,7 @@ PluralKeywordEnumeration::snext(UErrorCode& status) { if (U_SUCCESS(status) && pos < fKeywordNames.size()) { return (const UnicodeString*)fKeywordNames.elementAt(pos++); } - return NULL; + return nullptr; } void @@ -1374,7 +1452,7 @@ PluralKeywordEnumeration::reset(UErrorCode& /*status*/) { int32_t PluralKeywordEnumeration::count(UErrorCode& /*status*/) const { - return fKeywordNames.size(); + return fKeywordNames.size(); } PluralKeywordEnumeration::~PluralKeywordEnumeration() { @@ -1634,41 +1712,39 @@ int32_t FixedDecimal::getVisibleFractionDigitCount() const { PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) { - fLocales = NULL; - fRes = NULL; fOpenStatus = status; if (U_FAILURE(status)) { return; } - fOpenStatus = U_ZERO_ERROR; - LocalUResourceBundlePointer rb(ures_openDirect(NULL, "plurals", &fOpenStatus)); - fLocales = ures_getByKey(rb.getAlias(), "locales", NULL, &fOpenStatus); + fOpenStatus = U_ZERO_ERROR; // clear any warnings. + LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals", &fOpenStatus)); + fLocales = ures_getByKey(rb.getAlias(), "locales", nullptr, &fOpenStatus); } PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() { ures_close(fLocales); ures_close(fRes); - fLocales = NULL; - fRes = NULL; + fLocales = nullptr; + fRes = nullptr; } const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) { if (U_FAILURE(status)) { - return NULL; + return nullptr; } if (U_FAILURE(fOpenStatus)) { status = fOpenStatus; - return NULL; + return nullptr; } fRes = ures_getNextResource(fLocales, fRes, &status); - if (fRes == NULL || U_FAILURE(status)) { + if (fRes == nullptr || U_FAILURE(status)) { if (status == U_INDEX_OUTOFBOUNDS_ERROR) { status = U_ZERO_ERROR; } - return NULL; + return nullptr; } const char *result = ures_getKey(fRes); - if (resultLength != NULL) { + if (resultLength != nullptr) { *resultLength = static_cast(uprv_strlen(result)); } return result; diff --git a/deps/icu-small/source/i18n/plurrule_impl.h b/deps/icu-small/source/i18n/plurrule_impl.h index 3ab445d5843c8b..f23ae168569d66 100644 --- a/deps/icu-small/source/i18n/plurrule_impl.h +++ b/deps/icu-small/source/i18n/plurrule_impl.h @@ -181,7 +181,6 @@ class PluralRuleParser: public UMemory { kRangeList, kSamples }; - }; enum PluralOperand { @@ -311,32 +310,36 @@ class AndConstraint : public UMemory { NONE, MOD } RuleOp; - RuleOp op; - int32_t opNum; // for mod expressions, the right operand of the mod. - int32_t value; // valid for 'is' rules only. - UVector32 *rangeList; // for 'in', 'within' rules. Null otherwise. - UBool negated; // TRUE for negated rules. - UBool integerOnly; // TRUE for 'within' rules. - tokenType digitsType; // n | i | v | f constraint. - AndConstraint *next; - - AndConstraint(); + RuleOp op = AndConstraint::NONE; + int32_t opNum = -1; // for mod expressions, the right operand of the mod. + int32_t value = -1; // valid for 'is' rules only. + UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. + UBool negated = FALSE; // TRUE for negated rules. + UBool integerOnly = FALSE; // TRUE for 'within' rules. + tokenType digitsType = none; // n | i | v | f constraint. + AndConstraint *next = nullptr; + // Internal error status, used for errors that occur during the copy constructor. + UErrorCode fInternalStatus = U_ZERO_ERROR; + + AndConstraint() = default; AndConstraint(const AndConstraint& other); virtual ~AndConstraint(); - AndConstraint* add(); + AndConstraint* add(UErrorCode& status); // UBool isFulfilled(double number); UBool isFulfilled(const IFixedDecimal &number); }; class OrConstraint : public UMemory { public: - AndConstraint *childNode; - OrConstraint *next; - OrConstraint(); + AndConstraint *childNode = nullptr; + OrConstraint *next = nullptr; + // Internal error status, used for errors that occur during the copy constructor. + UErrorCode fInternalStatus = U_ZERO_ERROR; + OrConstraint() = default; OrConstraint(const OrConstraint& other); virtual ~OrConstraint(); - AndConstraint* add(); + AndConstraint* add(UErrorCode& status); // UBool isFulfilled(double number); UBool isFulfilled(const IFixedDecimal &number); }; @@ -344,15 +347,16 @@ class OrConstraint : public UMemory { class RuleChain : public UMemory { public: UnicodeString fKeyword; - RuleChain *fNext; - OrConstraint *ruleHeader; + RuleChain *fNext = nullptr; + OrConstraint *ruleHeader = nullptr; UnicodeString fDecimalSamples; // Samples strings from rule source UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. - UBool fDecimalSamplesUnbounded; - UBool fIntegerSamplesUnbounded; - + UBool fDecimalSamplesUnbounded = FALSE; + UBool fIntegerSamplesUnbounded = FALSE; + // Internal error status, used for errors that occur during the copy constructor. + UErrorCode fInternalStatus = U_ZERO_ERROR; - RuleChain(); + RuleChain() = default; RuleChain(const RuleChain& other); virtual ~RuleChain(); @@ -386,8 +390,8 @@ class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { virtual int32_t count(UErrorCode& status) const; private: UErrorCode fOpenStatus; - UResourceBundle *fLocales; - UResourceBundle *fRes; + UResourceBundle *fLocales = nullptr; + UResourceBundle *fRes = nullptr; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/rbnf.cpp b/deps/icu-small/source/i18n/rbnf.cpp index ab9ad15e8c720d..74707ccd22d2ac 100644 --- a/deps/icu-small/source/i18n/rbnf.cpp +++ b/deps/icu-small/source/i18n/rbnf.cpp @@ -680,7 +680,7 @@ StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) c RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, const UnicodeString& locs, const Locale& alocale, UParseError& perror, UErrorCode& status) - : ruleSets(NULL) + : fRuleSets(NULL) , ruleSetDescriptions(NULL) , numRuleSets(0) , defaultRuleSet(NULL) @@ -689,7 +689,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , decimalFormatSymbols(NULL) , defaultInfinityRule(NULL) , defaultNaNRule(NULL) - , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) + , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) @@ -705,7 +705,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, const UnicodeString& locs, UParseError& perror, UErrorCode& status) - : ruleSets(NULL) + : fRuleSets(NULL) , ruleSetDescriptions(NULL) , numRuleSets(0) , defaultRuleSet(NULL) @@ -714,7 +714,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , decimalFormatSymbols(NULL) , defaultInfinityRule(NULL) , defaultNaNRule(NULL) - , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) + , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) @@ -730,7 +730,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, LocalizationInfo* info, const Locale& alocale, UParseError& perror, UErrorCode& status) - : ruleSets(NULL) + : fRuleSets(NULL) , ruleSetDescriptions(NULL) , numRuleSets(0) , defaultRuleSet(NULL) @@ -739,7 +739,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , decimalFormatSymbols(NULL) , defaultInfinityRule(NULL) , defaultNaNRule(NULL) - , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) + , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) @@ -754,7 +754,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, UParseError& perror, UErrorCode& status) - : ruleSets(NULL) + : fRuleSets(NULL) , ruleSetDescriptions(NULL) , numRuleSets(0) , defaultRuleSet(NULL) @@ -763,7 +763,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , decimalFormatSymbols(NULL) , defaultInfinityRule(NULL) , defaultNaNRule(NULL) - , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) + , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) @@ -779,7 +779,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, const Locale& aLocale, UParseError& perror, UErrorCode& status) - : ruleSets(NULL) + : fRuleSets(NULL) , ruleSetDescriptions(NULL) , numRuleSets(0) , defaultRuleSet(NULL) @@ -788,7 +788,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, , decimalFormatSymbols(NULL) , defaultInfinityRule(NULL) , defaultNaNRule(NULL) - , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) + , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) @@ -801,7 +801,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description, } RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status) - : ruleSets(NULL) + : fRuleSets(NULL) , ruleSetDescriptions(NULL) , numRuleSets(0) , defaultRuleSet(NULL) @@ -810,7 +810,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& , decimalFormatSymbols(NULL) , defaultInfinityRule(NULL) , defaultNaNRule(NULL) - , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) + , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) @@ -868,7 +868,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) : NumberFormat(rhs) - , ruleSets(NULL) + , fRuleSets(NULL) , ruleSetDescriptions(NULL) , numRuleSets(0) , defaultRuleSet(NULL) @@ -877,7 +877,7 @@ RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs) , decimalFormatSymbols(NULL) , defaultInfinityRule(NULL) , defaultNaNRule(NULL) - , roundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) + , fRoundingMode(DecimalFormat::ERoundingMode::kRoundUnnecessary) , lenient(FALSE) , lenientParseRules(NULL) , localizations(NULL) @@ -950,8 +950,8 @@ RuleBasedNumberFormat::operator==(const Format& other) const ? FALSE : *localizations == rhs.localizations))) { - NFRuleSet** p = ruleSets; - NFRuleSet** q = rhs.ruleSets; + NFRuleSet** p = fRuleSets; + NFRuleSet** q = rhs.fRuleSets; if (p == NULL) { return q == NULL; } else if (q == NULL) { @@ -972,8 +972,8 @@ UnicodeString RuleBasedNumberFormat::getRules() const { UnicodeString result; - if (ruleSets != NULL) { - for (NFRuleSet** p = ruleSets; *p; ++p) { + if (fRuleSets != NULL) { + for (NFRuleSet** p = fRuleSets; *p; ++p) { (*p)->appendRules(result); } } @@ -987,9 +987,9 @@ RuleBasedNumberFormat::getRuleSetName(int32_t index) const UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1); return string; } - else if (ruleSets) { + else if (fRuleSets) { UnicodeString result; - for (NFRuleSet** p = ruleSets; *p; ++p) { + for (NFRuleSet** p = fRuleSets; *p; ++p) { NFRuleSet* rs = *p; if (rs->isPublic()) { if (--index == -1) { @@ -1010,8 +1010,8 @@ RuleBasedNumberFormat::getNumberOfRuleSetNames() const if (localizations) { result = localizations->getNumberOfRuleSets(); } - else if (ruleSets) { - for (NFRuleSet** p = ruleSets; *p; ++p) { + else if (fRuleSets) { + for (NFRuleSet** p = fRuleSets; *p; ++p) { if ((**p).isPublic()) { ++result; } @@ -1098,8 +1098,8 @@ RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, c NFRuleSet* RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const { - if (U_SUCCESS(status) && ruleSets) { - for (NFRuleSet** p = ruleSets; *p; ++p) { + if (U_SUCCESS(status) && fRuleSets) { + for (NFRuleSet** p = fRuleSets; *p; ++p) { NFRuleSet* rs = *p; if (rs->isNamed(name)) { return rs; @@ -1132,11 +1132,17 @@ RuleBasedNumberFormat::format(const DecimalQuantity &number, // The DecimalFormat will provide more accurate results. // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. - NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); + LocalPointer decimalFormat(NumberFormat::createInstance(locale, UNUM_DECIMAL, status), status); + if (decimalFormat.isNull()) { + return appendTo; + } Formattable f; - f.adoptDecimalQuantity(new DecimalQuantity(number)); + LocalPointer decimalQuantity(new DecimalQuantity(number), status); + if (decimalQuantity.isNull()) { + return appendTo; + } + f.adoptDecimalQuantity(decimalQuantity.orphan()); // f now owns decimalQuantity. decimalFormat->format(f, appendTo, posIter, status); - delete decimalFormat; } } return appendTo; @@ -1165,11 +1171,17 @@ RuleBasedNumberFormat::format(const DecimalQuantity &number, // The DecimalFormat will provide more accurate results. // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. - NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); + LocalPointer decimalFormat(NumberFormat::createInstance(locale, UNUM_DECIMAL, status), status); + if (decimalFormat.isNull()) { + return appendTo; + } Formattable f; - f.adoptDecimalQuantity(new DecimalQuantity(number)); + LocalPointer decimalQuantity(new DecimalQuantity(number), status); + if (decimalQuantity.isNull()) { + return appendTo; + } + f.adoptDecimalQuantity(decimalQuantity.orphan()); // f now owns decimalQuantity. decimalFormat->format(f, appendTo, pos, status); - delete decimalFormat; } } return appendTo; @@ -1312,11 +1324,19 @@ RuleBasedNumberFormat::format(int64_t number, NFRuleSet *ruleSet, UnicodeString& // TODO this section should probably be optimized. The DecimalFormat is shared in ICU4J. NumberFormat *decimalFormat = NumberFormat::createInstance(locale, UNUM_DECIMAL, status); + if (decimalFormat == nullptr) { + return toAppendTo; + } Formattable f; FieldPosition pos(FieldPosition::DONT_CARE); - DecimalQuantity *digitList = new DecimalQuantity(); - digitList->setToLong(number); - f.adoptDecimalQuantity(digitList); + DecimalQuantity *decimalQuantity = new DecimalQuantity(); + if (decimalQuantity == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + delete decimalFormat; + return toAppendTo; + } + decimalQuantity->setToLong(number); + f.adoptDecimalQuantity(decimalQuantity); // f now owns decimalQuantity. decimalFormat->format(f, toAppendTo, pos, status); delete decimalFormat; } @@ -1358,7 +1378,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text, Formattable& result, ParsePosition& parsePosition) const { - if (!ruleSets) { + if (!fRuleSets) { parsePosition.setErrorIndex(0); return; } @@ -1369,7 +1389,7 @@ RuleBasedNumberFormat::parse(const UnicodeString& text, ParsePosition high_pp(0); Formattable high_result; - for (NFRuleSet** p = ruleSets; *p; ++p) { + for (NFRuleSet** p = fRuleSets; *p; ++p) { NFRuleSet *rp = *p; if (rp->isPublic() && rp->isParseable()) { ParsePosition working_pp(0); @@ -1457,7 +1477,7 @@ void RuleBasedNumberFormat::initDefaultRuleSet() { defaultRuleSet = NULL; - if (!ruleSets) { + if (!fRuleSets) { return; } @@ -1465,7 +1485,7 @@ RuleBasedNumberFormat::initDefaultRuleSet() const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal")); const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration")); - NFRuleSet**p = &ruleSets[0]; + NFRuleSet**p = &fRuleSets[0]; while (*p) { if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) { defaultRuleSet = *p; @@ -1477,7 +1497,7 @@ RuleBasedNumberFormat::initDefaultRuleSet() defaultRuleSet = *--p; if (!defaultRuleSet->isPublic()) { - while (p != ruleSets) { + while (p != fRuleSets) { if ((*--p)->isPublic()) { defaultRuleSet = *p; break; @@ -1547,7 +1567,7 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali // from the description lenientParseRules = new UnicodeString(); /* test for NULL */ - if (lenientParseRules == 0) { + if (lenientParseRules == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1568,15 +1588,15 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali ++numRuleSets; // our rule list is an array of the appropriate size - ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); + fRuleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *)); /* test for NULL */ - if (ruleSets == 0) { + if (fRuleSets == 0) { status = U_MEMORY_ALLOCATION_ERROR; return; } for (int i = 0; i <= numRuleSets; ++i) { - ruleSets[i] = NULL; + fRuleSets[i] = NULL; } // divide up the descriptions into individual rule-set descriptions @@ -1592,7 +1612,7 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali } ruleSetDescriptions = new UnicodeString[numRuleSets]; - if (ruleSetDescriptions == 0) { + if (ruleSetDescriptions == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1602,8 +1622,8 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali int32_t start = 0; for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) { ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start); - ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); - if (ruleSets[curRuleSet] == 0) { + fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); + if (fRuleSets[curRuleSet] == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1611,8 +1631,8 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali start = p + 1; } ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start); - ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); - if (ruleSets[curRuleSet] == 0) { + fRuleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status); + if (fRuleSets[curRuleSet] == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1630,11 +1650,11 @@ RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* locali initDefaultRuleSet(); // finally, we can go back through the temporary descriptions - // list and finish seting up the substructure (and we throw + // list and finish setting up the substructure (and we throw // away the temporary descriptions as we go) { for (int i = 0; i < numRuleSets; i++) { - ruleSets[i]->parseRules(ruleSetDescriptions[i], status); + fRuleSets[i]->parseRules(ruleSetDescriptions[i], status); } } @@ -1680,7 +1700,7 @@ RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status) if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) || (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) { - UErrorCode status = U_ZERO_ERROR; + status = U_ZERO_ERROR; capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status); if (U_FAILURE(status)) { delete capitalizationBrkIter; @@ -1704,8 +1724,8 @@ RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale) int32_t len = 0; const int32_t * intVector = ures_getIntVector(rb, &len, &status); if (U_SUCCESS(status) && intVector != NULL && len >= 2) { - capitalizationForUIListMenu = intVector[0]; - capitalizationForStandAlone = intVector[1]; + capitalizationForUIListMenu = static_cast(intVector[0]); + capitalizationForStandAlone = static_cast(intVector[1]); } } ures_close(rb); @@ -1740,7 +1760,7 @@ RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) start = p + 1; } - // when we get here, we've seeked off the end of the sring, and + // when we get here, we've seeked off the end of the string, and // we terminate the loop (we continue until *start* is -1 rather // than until *p* is -1, because otherwise we'd miss the last // rule in the description) @@ -1756,12 +1776,12 @@ RuleBasedNumberFormat::stripWhitespace(UnicodeString& description) void RuleBasedNumberFormat::dispose() { - if (ruleSets) { - for (NFRuleSet** p = ruleSets; *p; ++p) { + if (fRuleSets) { + for (NFRuleSet** p = fRuleSets; *p; ++p) { delete *p; } - uprv_free(ruleSets); - ruleSets = NULL; + uprv_free(fRuleSets); + fRuleSets = NULL; } if (ruleSetDescriptions) { @@ -1811,7 +1831,7 @@ const RuleBasedCollator* RuleBasedNumberFormat::getCollator() const { #if !UCONFIG_NO_COLLATION - if (!ruleSets) { + if (!fRuleSets) { return NULL; } @@ -1820,7 +1840,7 @@ RuleBasedNumberFormat::getCollator() const // create a default collator based on the formatter's locale, // then pull out that collator's rules, append any additional // rules specified in the description, and create a _new_ - // collator based on the combinaiton of those rules + // collator based on the combination of those rules UErrorCode status = U_ZERO_ERROR; @@ -1863,13 +1883,10 @@ RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status) // lazy-evaluate the DecimalFormatSymbols object. This object // is shared by all DecimalFormat instances belonging to this // formatter - if (decimalFormatSymbols == NULL) { - DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status); + if (decimalFormatSymbols == nullptr) { + LocalPointer temp(new DecimalFormatSymbols(locale, status), status); if (U_SUCCESS(status)) { - decimalFormatSymbols = temp; - } - else { - delete temp; + decimalFormatSymbols = temp.orphan(); } } return decimalFormatSymbols; @@ -1889,17 +1906,14 @@ NFRule* RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status) { if (U_FAILURE(status)) { - return NULL; + return nullptr; } if (defaultInfinityRule == NULL) { UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: ")); rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol)); - NFRule* temp = new NFRule(this, rule, status); + LocalPointer temp(new NFRule(this, rule, status), status); if (U_SUCCESS(status)) { - defaultInfinityRule = temp; - } - else { - delete temp; + defaultInfinityRule = temp.orphan(); } } return defaultInfinityRule; @@ -1915,17 +1929,14 @@ NFRule* RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status) { if (U_FAILURE(status)) { - return NULL; + return nullptr; } - if (defaultNaNRule == NULL) { + if (defaultNaNRule == nullptr) { UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: ")); rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol)); - NFRule* temp = new NFRule(this, rule, status); + LocalPointer temp(new NFRule(this, rule, status), status); if (U_SUCCESS(status)) { - defaultNaNRule = temp; - } - else { - delete temp; + defaultNaNRule = temp.orphan(); } } return defaultNaNRule; @@ -1963,15 +1974,15 @@ RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsTo defaultNaNRule = NULL; initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols - if (ruleSets) { + if (fRuleSets) { for (int32_t i = 0; i < numRuleSets; i++) { - ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status); + fRuleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status); } } } } -// Setting the symbols is equlivalent to adopting a newly created localized symbols. +// Setting the symbols is equivalent to adopting a newly created localized symbols. void RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols) { @@ -1983,7 +1994,11 @@ RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType, const UnicodeString &pattern, UErrorCode& status) const { - return new PluralFormat(locale, pluralType, pattern, status); + auto *pf = new PluralFormat(locale, pluralType, pattern, status); + if (pf == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + return pf; } /** @@ -1991,7 +2006,7 @@ RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType, * @return A rounding mode */ DecimalFormat::ERoundingMode RuleBasedNumberFormat::getRoundingMode() const { - return roundingMode; + return fRoundingMode; } /** @@ -2000,7 +2015,7 @@ DecimalFormat::ERoundingMode RuleBasedNumberFormat::getRoundingMode() const { * @param roundingMode A rounding mode */ void RuleBasedNumberFormat::setRoundingMode(DecimalFormat::ERoundingMode roundingMode) { - this->roundingMode = roundingMode; + fRoundingMode = roundingMode; } U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/rbt.h b/deps/icu-small/source/i18n/rbt.h index 005fb853847b11..a18452ab2ecb49 100644 --- a/deps/icu-small/source/i18n/rbt.h +++ b/deps/icu-small/source/i18n/rbt.h @@ -29,261 +29,9 @@ class TransliterationRuleData; /** * RuleBasedTransliterator is a transliterator - * that reads a set of rules in order to determine how to perform - * translations. Rule sets are stored in resource bundles indexed by - * name. Rules within a rule set are separated by semicolons (';'). - * To include a literal semicolon, prefix it with a backslash ('\'). - * Whitespace, as defined by Character.isWhitespace(), - * is ignored. If the first non-blank character on a line is '#', - * the entire line is ignored as a comment.

    - * - *

    Each set of rules consists of two groups, one forward, and one - * reverse. This is a convention that is not enforced; rules for one - * direction may be omitted, with the result that translations in - * that direction will not modify the source text. In addition, - * bidirectional forward-reverse rules may be specified for - * symmetrical transformations.

    - * - *

    Rule syntax

    - * - *

    Rule statements take one of the following forms:

    - * - *
    - *
    $alefmadda=\u0622;
    - *
    Variable definition. The name on the - * left is assigned the text on the right. In this example, - * after this statement, instances of the left hand name, - * "$alefmadda", will be replaced by - * the Unicode character U+0622. Variable names must begin - * with a letter and consist only of letters, digits, and - * underscores. Case is significant. Duplicate names cause - * an exception to be thrown, that is, variables cannot be - * redefined. The right hand side may contain well-formed - * text of any length, including no text at all ("$empty=;"). - * The right hand side may contain embedded UnicodeSet - * patterns, for example, "$softvowel=[eiyEIY]".
    - *
     
    - *
    ai>$alefmadda;
    - *
    Forward translation rule. This rule - * states that the string on the left will be changed to the - * string on the right when performing forward - * transliteration.
    - *
     
    - *
    ai<$alefmadda;
    - *
    Reverse translation rule. This rule - * states that the string on the right will be changed to - * the string on the left when performing reverse - * transliteration.
    - *
    - * - *
    - *
    ai<>$alefmadda;
    - *
    Bidirectional translation rule. This - * rule states that the string on the right will be changed - * to the string on the left when performing forward - * transliteration, and vice versa when performing reverse - * transliteration.
    - *
    - * - *

    Translation rules consist of a match pattern and an output - * string. The match pattern consists of literal characters, - * optionally preceded by context, and optionally followed by - * context. Context characters, like literal pattern characters, - * must be matched in the text being transliterated. However, unlike - * literal pattern characters, they are not replaced by the output - * text. For example, the pattern "abc{def}" - * indicates the characters "def" must be - * preceded by "abc" for a successful match. - * If there is a successful match, "def" will - * be replaced, but not "abc". The final '}' - * is optional, so "abc{def" is equivalent to - * "abc{def}". Another example is "{123}456" - * (or "123}456") in which the literal - * pattern "123" must be followed by "456". - *

    - * - *

    The output string of a forward or reverse rule consists of - * characters to replace the literal pattern characters. If the - * output string contains the character '|', this is - * taken to indicate the location of the cursor after - * replacement. The cursor is the point in the text at which the - * next replacement, if any, will be applied. The cursor is usually - * placed within the replacement text; however, it can actually be - * placed into the precending or following context by using the - * special character '@'. Examples:

    - * - *
    - *

    a {foo} z > | @ bar; # foo -> bar, move cursor - * before a
    - * {foo} xyz > bar @@|; # foo -> bar, cursor between - * y and z

    - *
    - * - *

    UnicodeSet

    - * - *

    UnicodeSet patterns may appear anywhere that - * makes sense. They may appear in variable definitions. - * Contrariwise, UnicodeSet patterns may themselves - * contain variable references, such as "$a=[a-z];$not_a=[^$a]", - * or "$range=a-z;$ll=[$range]".

    - * - *

    UnicodeSet patterns may also be embedded directly - * into rule strings. Thus, the following two rules are equivalent:

    - * - *
    - *

    $vowel=[aeiou]; $vowel>'*'; # One way to do this
    - * [aeiou]>'*'; - *                # - * Another way

    - *
    - * - *

    See {@link UnicodeSet} for more documentation and examples.

    - * - *

    Segments

    - * - *

    Segments of the input string can be matched and copied to the - * output string. This makes certain sets of rules simpler and more - * general, and makes reordering possible. For example:

    - * - *
    - *

    ([a-z]) > $1 $1; - *           # - * double lowercase letters
    - * ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs

    - *
    - * - *

    The segment of the input string to be copied is delimited by - * "(" and ")". Up to - * nine segments may be defined. Segments may not overlap. In the - * output string, "$1" through "$9" - * represent the input string segments, in left-to-right order of - * definition.

    - * - *

    Anchors

    - * - *

    Patterns can be anchored to the beginning or the end of the text. This is done with the - * special characters '^' and '$'. For example:

    - * - *
    - *

    ^ a   > 'BEG_A';   # match 'a' at start of text
    - *   a   > 'A';       # match other instances - * of 'a'
    - *   z $ > 'END_Z';   # match 'z' at end of text
    - *   z   > 'Z';       # match other instances - * of 'z'

    - *
    - * - *

    It is also possible to match the beginning or the end of the text using a UnicodeSet. - * This is done by including a virtual anchor character '$' at the end of the - * set pattern. Although this is usually the match chafacter for the end anchor, the set will - * match either the beginning or the end of the text, depending on its placement. For - * example:

    - * - *
    - *

    $x = [a-z$];   # match 'a' through 'z' OR anchor
    - * $x 1    > 2;   # match '1' after a-z or at the start
    - *    3 $x > 4;   # match '3' before a-z or at the end

    - *
    - * - *

    Example

    - * - *

    The following example rules illustrate many of the features of - * the rule language.

    - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
    Rule 1.abc{def}>x|y
    Rule 2.xyz>r
    Rule 3.yz>q
    - * - *

    Applying these rules to the string "adefabcdefz" - * yields the following results:

    - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - * - *
    |adefabcdefzInitial state, no rules match. Advance - * cursor.
    a|defabcdefzStill no match. Rule 1 does not match - * because the preceding context is not present.
    ad|efabcdefzStill no match. Keep advancing until - * there is a match...
    ade|fabcdefz...
    adef|abcdefz...
    adefa|bcdefz...
    adefab|cdefz...
    adefabc|defzRule 1 matches; replace "def" - * with "xy" and back up the cursor - * to before the 'y'.
    adefabcx|yzAlthough "xyz" is - * present, rule 2 does not match because the cursor is - * before the 'y', not before the 'x'. - * Rule 3 does match. Replace "yz" - * with "q".
    adefabcxq|The cursor is at the end; - * transliteration is complete.
    - * - *

    The order of rules is significant. If multiple rules may match - * at some point, the first matching rule is applied.

    - * - *

    Forward and reverse rules may have an empty output string. - * Otherwise, an empty left or right hand side of any statement is a - * syntax error.

    - * - *

    Single quotes are used to quote any character other than a - * digit or letter. To specify a single quote itself, inside or - * outside of quotes, use two single quotes in a row. For example, - * the rule "'>'>o''clock" changes the - * string ">" to the string "o'clock". - *

    - * - *

    Notes

    - * - *

    While a RuleBasedTransliterator is being built, it checks that - * the rules are added in proper order. For example, if the rule - * "a>x" is followed by the rule "ab>y", - * then the second rule will throw an exception. The reason is that - * the second rule can never be triggered, since the first rule - * always matches anything it matches. In other words, the first - * rule masks the second rule.

    + * built from a set of rules as defined for + * Transliterator::createFromRules(). + * See the C++ class Transliterator documentation for the rule syntax. * * @author Alan Liu * @internal Use transliterator factory methods instead since this class will be removed in that release. diff --git a/deps/icu-small/source/i18n/rbt_pars.cpp b/deps/icu-small/source/i18n/rbt_pars.cpp index 8e49a8473a5ab0..e07cc8b63a408c 100644 --- a/deps/icu-small/source/i18n/rbt_pars.cpp +++ b/deps/icu-small/source/i18n/rbt_pars.cpp @@ -194,9 +194,9 @@ const UnicodeFunctor* ParseData::lookupMatcher(UChar32 ch) const { const UnicodeFunctor* set = NULL; int32_t i = ch - data->variablesBase; if (i >= 0 && i < variablesVector->size()) { - int32_t i = ch - data->variablesBase; - set = (i < variablesVector->size()) ? - (UnicodeFunctor*) variablesVector->elementAt(i) : 0; + int32_t j = ch - data->variablesBase; + set = (j < variablesVector->size()) ? + (UnicodeFunctor*) variablesVector->elementAt(j) : 0; } return set; } @@ -1108,8 +1108,8 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, } data->variableNames.removeAll(); - int32_t pos = UHASH_FIRST; - const UHashElement* he = variableNames.nextElement(pos); + int32_t p = UHASH_FIRST; + const UHashElement* he = variableNames.nextElement(p); while (he != NULL) { UnicodeString* tempus = (UnicodeString*)(((UnicodeString*)(he->value.pointer))->clone()); if (tempus == NULL) { @@ -1118,7 +1118,7 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, } data->variableNames.put(*((UnicodeString*)(he->key.pointer)), tempus, status); - he = variableNames.nextElement(pos); + he = variableNames.nextElement(p); } } variablesVector.removeAllElements(); // keeps them from getting deleted when we succeed diff --git a/deps/icu-small/source/i18n/regexcmp.cpp b/deps/icu-small/source/i18n/regexcmp.cpp index 410ff9513bb82a..0c5fca6f67b3f9 100644 --- a/deps/icu-small/source/i18n/regexcmp.cpp +++ b/deps/icu-small/source/i18n/regexcmp.cpp @@ -28,6 +28,7 @@ #include "patternprops.h" #include "putilimp.h" #include "cmemory.h" +#include "cstr.h" #include "cstring.h" #include "uvectr32.h" #include "uvectr64.h" @@ -3892,7 +3893,7 @@ void RegexCompile::stripNOPs() { // //------------------------------------------------------------------------------ void RegexCompile::error(UErrorCode e) { - if (U_SUCCESS(*fStatus)) { + if (U_SUCCESS(*fStatus) || e == U_MEMORY_ALLOCATION_ERROR) { *fStatus = e; // Hmm. fParseErr (UParseError) line & offset fields are int32_t in public // API (see common/unicode/parseerr.h), while fLineNum and fCharNum are @@ -4370,209 +4371,209 @@ static inline void addIdentifierIgnorable(UnicodeSet *set, UErrorCode& ec) { // Includes trying the Java "properties" that aren't supported as // normal ICU UnicodeSet properties // -static const UChar posSetPrefix[] = {0x5b, 0x5c, 0x70, 0x7b, 0}; // "[\p{" -static const UChar negSetPrefix[] = {0x5b, 0x5c, 0x50, 0x7b, 0}; // "[\P{" UnicodeSet *RegexCompile::createSetForProperty(const UnicodeString &propName, UBool negated) { - UnicodeString setExpr; - UnicodeSet *set; - uint32_t usetFlags = 0; if (U_FAILURE(*fStatus)) { - return NULL; + return nullptr; } + LocalPointer set; + UErrorCode status = U_ZERO_ERROR; - // - // First try the property as we received it - // - if (negated) { - setExpr.append(negSetPrefix, -1); - } else { - setExpr.append(posSetPrefix, -1); - } - setExpr.append(propName); - setExpr.append(chRBrace); - setExpr.append(chRBracket); - if (fModeFlags & UREGEX_CASE_INSENSITIVE) { - usetFlags |= USET_CASE_INSENSITIVE; - } - set = new UnicodeSet(setExpr, usetFlags, NULL, *fStatus); - if (U_SUCCESS(*fStatus)) { - return set; - } - delete set; - set = NULL; - - // - // The property as it was didn't work. - - // Do [:word:]. It is not recognized as a property by UnicodeSet. "word" not standard POSIX - // or standard Java, but many other regular expression packages do recognize it. - - if (propName.caseCompare(UNICODE_STRING_SIMPLE("word"), 0) == 0) { - *fStatus = U_ZERO_ERROR; - set = new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET])); - if (set == NULL) { - *fStatus = U_MEMORY_ALLOCATION_ERROR; - return set; + do { // non-loop, exists to allow breaks from the block. + // + // First try the property as we received it + // + UnicodeString setExpr; + uint32_t usetFlags = 0; + setExpr.append(u"[\\p{", -1); + setExpr.append(propName); + setExpr.append(u"}]", -1); + if (fModeFlags & UREGEX_CASE_INSENSITIVE) { + usetFlags |= USET_CASE_INSENSITIVE; } - if (negated) { - set->complement(); + set.adoptInsteadAndCheckErrorCode(new UnicodeSet(setExpr, usetFlags, NULL, status), status); + if (U_SUCCESS(status) || status == U_MEMORY_ALLOCATION_ERROR) { + break; } - return set; - } + // + // The incoming property wasn't directly recognized by ICU. - // Do Java fixes - - // InGreek -> InGreek or Coptic, that being the official Unicode name for that block. - // InCombiningMarksforSymbols -> InCombiningDiacriticalMarksforSymbols. - // - // Note on Spaces: either "InCombiningMarksForSymbols" or "InCombining Marks for Symbols" - // is accepted by Java. The property part of the name is compared - // case-insenstively. The spaces must be exactly as shown, either - // all there, or all omitted, with exactly one at each position - // if they are present. From checking against JDK 1.6 - // - // This code should be removed when ICU properties support the Java compatibility names - // (ICU 4.0?) - // - UnicodeString mPropName = propName; - if (mPropName.caseCompare(UNICODE_STRING_SIMPLE("InGreek"), 0) == 0) { - mPropName = UNICODE_STRING_SIMPLE("InGreek and Coptic"); - } - if (mPropName.caseCompare(UNICODE_STRING_SIMPLE("InCombining Marks for Symbols"), 0) == 0 || - mPropName.caseCompare(UNICODE_STRING_SIMPLE("InCombiningMarksforSymbols"), 0) == 0) { - mPropName = UNICODE_STRING_SIMPLE("InCombining Diacritical Marks for Symbols"); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) { - mPropName = UNICODE_STRING_SIMPLE("javaValidCodePoint"); - } + // Check [:word:] and [:all:]. These are not recognized as a properties by ICU UnicodeSet. + // Java accepts 'word' with mixed case. + // Java accepts 'all' only in all lower case. - // See if the property looks like a Java "InBlockName", which - // we will recast as "Block=BlockName" - // - if (mPropName.startsWith(u"In", 2) && propName.length()>=3) { - setExpr.truncate(4); // Leaves "[\p{", or "[\P{" - setExpr.append(u"Block=", -1); - setExpr.append(UnicodeString(mPropName, 2)); // Property with the leading "In" removed. - setExpr.append(chRBrace); - setExpr.append(chRBracket); - *fStatus = U_ZERO_ERROR; - set = new UnicodeSet(setExpr, usetFlags, NULL, *fStatus); - if (U_SUCCESS(*fStatus)) { - return set; + status = U_ZERO_ERROR; + if (propName.caseCompare(u"word", -1, 0) == 0) { + set.adoptInsteadAndCheckErrorCode(new UnicodeSet(*(fRXPat->fStaticSets[URX_ISWORD_SET])), status); + break; + } + if (propName.compare(u"all", -1) == 0) { + set.adoptInsteadAndCheckErrorCode(new UnicodeSet(0, 0x10ffff), status); + break; } - delete set; - set = NULL; - } - if (propName.startsWith(UNICODE_STRING_SIMPLE("java")) || - propName.compare(UNICODE_STRING_SIMPLE("all")) == 0) - { - UErrorCode localStatus = U_ZERO_ERROR; - //setExpr.remove(); - set = new UnicodeSet(); - // - // Try the various Java specific properties. - // These all begin with "java" + + // Do Java InBlock expressions // - if (mPropName.compare(UNICODE_STRING_SIMPLE("javaDefined")) == 0) { - addCategory(set, U_GC_CN_MASK, localStatus); - set->complement(); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaDigit")) == 0) { - addCategory(set, U_GC_ND_MASK, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaIdentifierIgnorable")) == 0) { - addIdentifierIgnorable(set, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaISOControl")) == 0) { - set->add(0, 0x1F).add(0x7F, 0x9F); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaJavaIdentifierPart")) == 0) { - addCategory(set, U_GC_L_MASK, localStatus); - addCategory(set, U_GC_SC_MASK, localStatus); - addCategory(set, U_GC_PC_MASK, localStatus); - addCategory(set, U_GC_ND_MASK, localStatus); - addCategory(set, U_GC_NL_MASK, localStatus); - addCategory(set, U_GC_MC_MASK, localStatus); - addCategory(set, U_GC_MN_MASK, localStatus); - addIdentifierIgnorable(set, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaJavaIdentifierStart")) == 0) { - addCategory(set, U_GC_L_MASK, localStatus); - addCategory(set, U_GC_NL_MASK, localStatus); - addCategory(set, U_GC_SC_MASK, localStatus); - addCategory(set, U_GC_PC_MASK, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLetter")) == 0) { - addCategory(set, U_GC_L_MASK, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLetterOrDigit")) == 0) { - addCategory(set, U_GC_L_MASK, localStatus); - addCategory(set, U_GC_ND_MASK, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaLowerCase")) == 0) { - addCategory(set, U_GC_LL_MASK, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaMirrored")) == 0) { - set->applyIntPropertyValue(UCHAR_BIDI_MIRRORED, 1, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaSpaceChar")) == 0) { - addCategory(set, U_GC_Z_MASK, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaSupplementaryCodePoint")) == 0) { - set->add(0x10000, UnicodeSet::MAX_VALUE); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaTitleCase")) == 0) { - addCategory(set, U_GC_LT_MASK, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUnicodeIdentifierStart")) == 0) { - addCategory(set, U_GC_L_MASK, localStatus); - addCategory(set, U_GC_NL_MASK, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUnicodeIdentifierPart")) == 0) { - addCategory(set, U_GC_L_MASK, localStatus); - addCategory(set, U_GC_PC_MASK, localStatus); - addCategory(set, U_GC_ND_MASK, localStatus); - addCategory(set, U_GC_NL_MASK, localStatus); - addCategory(set, U_GC_MC_MASK, localStatus); - addCategory(set, U_GC_MN_MASK, localStatus); - addIdentifierIgnorable(set, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaUpperCase")) == 0) { - addCategory(set, U_GC_LU_MASK, localStatus); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaValidCodePoint")) == 0) { - set->add(0, UnicodeSet::MAX_VALUE); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("javaWhitespace")) == 0) { - addCategory(set, U_GC_Z_MASK, localStatus); - set->removeAll(UnicodeSet().add(0xa0).add(0x2007).add(0x202f)); - set->add(9, 0x0d).add(0x1c, 0x1f); - } - else if (mPropName.compare(UNICODE_STRING_SIMPLE("all")) == 0) { - set->add(0, UnicodeSet::MAX_VALUE); + UnicodeString mPropName = propName; + if (mPropName.startsWith(u"In", 2) && mPropName.length() >= 3) { + status = U_ZERO_ERROR; + set.adoptInsteadAndCheckErrorCode(new UnicodeSet(), status); + if (U_FAILURE(status)) { + break; + } + UnicodeString blockName(mPropName, 2); // Property with the leading "In" removed. + set->applyPropertyAlias(UnicodeString(u"Block"), blockName, status); + break; } - if (U_SUCCESS(localStatus) && !set->isEmpty()) { - *fStatus = U_ZERO_ERROR; - if (usetFlags & USET_CASE_INSENSITIVE) { + // Check for the Java form "IsBooleanPropertyValue", which we will recast + // as "BooleanPropertyValue". The property value can be either a + // a General Category or a Script Name. + + if (propName.startsWith(u"Is", 2) && propName.length()>=3) { + mPropName.remove(0, 2); // Strip the "Is" + if (mPropName.indexOf(u'=') >= 0) { + // Reject any "Is..." property expression containing an '=', that is, + // any non-binary property expression. + status = U_REGEX_PROPERTY_SYNTAX; + break; + } + + if (mPropName.caseCompare(u"assigned", -1, 0) == 0) { + mPropName.setTo(u"unassigned", -1); + negated = !negated; + } else if (mPropName.caseCompare(u"TitleCase", -1, 0) == 0) { + mPropName.setTo(u"Titlecase_Letter", -1); + } + + mPropName.insert(0, u"[\\p{", -1); + mPropName.append(u"}]", -1); + set.adoptInsteadAndCheckErrorCode(new UnicodeSet(mPropName, *fStatus), status); + + if (U_SUCCESS(status) && !set->isEmpty() && (usetFlags & USET_CASE_INSENSITIVE)) { set->closeOver(USET_CASE_INSENSITIVE); } - if (negated) { + break; + + } + + if (propName.startsWith(u"java", -1)) { + status = U_ZERO_ERROR; + set.adoptInsteadAndCheckErrorCode(new UnicodeSet(), status); + if (U_FAILURE(status)) { + break; + } + // + // Try the various Java specific properties. + // These all begin with "java" + // + if (propName.compare(u"javaDefined", -1) == 0) { + addCategory(set.getAlias(), U_GC_CN_MASK, status); set->complement(); } - return set; + else if (propName.compare(u"javaDigit", -1) == 0) { + addCategory(set.getAlias(), U_GC_ND_MASK, status); + } + else if (propName.compare(u"javaIdentifierIgnorable", -1) == 0) { + addIdentifierIgnorable(set.getAlias(), status); + } + else if (propName.compare(u"javaISOControl", -1) == 0) { + set->add(0, 0x1F).add(0x7F, 0x9F); + } + else if (propName.compare(u"javaJavaIdentifierPart", -1) == 0) { + addCategory(set.getAlias(), U_GC_L_MASK, status); + addCategory(set.getAlias(), U_GC_SC_MASK, status); + addCategory(set.getAlias(), U_GC_PC_MASK, status); + addCategory(set.getAlias(), U_GC_ND_MASK, status); + addCategory(set.getAlias(), U_GC_NL_MASK, status); + addCategory(set.getAlias(), U_GC_MC_MASK, status); + addCategory(set.getAlias(), U_GC_MN_MASK, status); + addIdentifierIgnorable(set.getAlias(), status); + } + else if (propName.compare(u"javaJavaIdentifierStart", -1) == 0) { + addCategory(set.getAlias(), U_GC_L_MASK, status); + addCategory(set.getAlias(), U_GC_NL_MASK, status); + addCategory(set.getAlias(), U_GC_SC_MASK, status); + addCategory(set.getAlias(), U_GC_PC_MASK, status); + } + else if (propName.compare(u"javaLetter", -1) == 0) { + addCategory(set.getAlias(), U_GC_L_MASK, status); + } + else if (propName.compare(u"javaLetterOrDigit", -1) == 0) { + addCategory(set.getAlias(), U_GC_L_MASK, status); + addCategory(set.getAlias(), U_GC_ND_MASK, status); + } + else if (propName.compare(u"javaLowerCase", -1) == 0) { + addCategory(set.getAlias(), U_GC_LL_MASK, status); + } + else if (propName.compare(u"javaMirrored", -1) == 0) { + set->applyIntPropertyValue(UCHAR_BIDI_MIRRORED, 1, status); + } + else if (propName.compare(u"javaSpaceChar", -1) == 0) { + addCategory(set.getAlias(), U_GC_Z_MASK, status); + } + else if (propName.compare(u"javaSupplementaryCodePoint", -1) == 0) { + set->add(0x10000, UnicodeSet::MAX_VALUE); + } + else if (propName.compare(u"javaTitleCase", -1) == 0) { + addCategory(set.getAlias(), U_GC_LT_MASK, status); + } + else if (propName.compare(u"javaUnicodeIdentifierStart", -1) == 0) { + addCategory(set.getAlias(), U_GC_L_MASK, status); + addCategory(set.getAlias(), U_GC_NL_MASK, status); + } + else if (propName.compare(u"javaUnicodeIdentifierPart", -1) == 0) { + addCategory(set.getAlias(), U_GC_L_MASK, status); + addCategory(set.getAlias(), U_GC_PC_MASK, status); + addCategory(set.getAlias(), U_GC_ND_MASK, status); + addCategory(set.getAlias(), U_GC_NL_MASK, status); + addCategory(set.getAlias(), U_GC_MC_MASK, status); + addCategory(set.getAlias(), U_GC_MN_MASK, status); + addIdentifierIgnorable(set.getAlias(), status); + } + else if (propName.compare(u"javaUpperCase", -1) == 0) { + addCategory(set.getAlias(), U_GC_LU_MASK, status); + } + else if (propName.compare(u"javaValidCodePoint", -1) == 0) { + set->add(0, UnicodeSet::MAX_VALUE); + } + else if (propName.compare(u"javaWhitespace", -1) == 0) { + addCategory(set.getAlias(), U_GC_Z_MASK, status); + set->removeAll(UnicodeSet().add(0xa0).add(0x2007).add(0x202f)); + set->add(9, 0x0d).add(0x1c, 0x1f); + } else { + status = U_REGEX_PROPERTY_SYNTAX; + } + + if (U_SUCCESS(status) && !set->isEmpty() && (usetFlags & USET_CASE_INSENSITIVE)) { + set->closeOver(USET_CASE_INSENSITIVE); + } + break; + } + + // Unrecognized property. ICU didn't like it as it was, and none of the Java compatibility + // extensions matched it. + status = U_REGEX_PROPERTY_SYNTAX; + } while (false); // End of do loop block. Code above breaks out of the block on success or hard failure. + + if (U_SUCCESS(status)) { + U_ASSERT(set.isValid()); + if (negated) { + set->complement(); } - delete set; - set = NULL; + return set.orphan(); + } else { + if (status == U_ILLEGAL_ARGUMENT_ERROR) { + status = U_REGEX_PROPERTY_SYNTAX; + } + error(status); + return nullptr; } - error(*fStatus); - return NULL; } - // // SetEval Part of the evaluation of [set expressions]. // Perform any pending (stacked) operations with precedence diff --git a/deps/icu-small/source/i18n/region.cpp b/deps/icu-small/source/i18n/region.cpp index 66f9ef35ded9ee..f182f61486e6d3 100644 --- a/deps/icu-small/source/i18n/region.cpp +++ b/deps/icu-small/source/i18n/region.cpp @@ -168,10 +168,18 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { continents->addElement(continentName,status); } + UResourceBundle *groupingBundle = nullptr; while ( ures_hasNext(groupingContainment.getAlias()) ) { - UnicodeString *groupingName = new UnicodeString(ures_getNextUnicodeString(groupingContainment.getAlias(),NULL,&status)); - groupings->addElement(groupingName,status); + groupingBundle = ures_getNextResource(groupingContainment.getAlias(), groupingBundle, &status); + if (U_FAILURE(status)) { + break; + } + UnicodeString *groupingName = new UnicodeString(ures_getKey(groupingBundle), -1, US_INV); + if (groupingName) { + groupings->addElement(groupingName,status); + } } + ures_close(groupingBundle); for ( int32_t i = 0 ; i < allRegions->size() ; i++ ) { LocalPointer r(new Region(), status); @@ -182,7 +190,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { r->idStr = *regionName; r->idStr.extract(0,r->idStr.length(),r->id,sizeof(r->id),US_INV); - r->type = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known. + r->fType = URGN_TERRITORY; // Only temporary - figure out the real type later once the aliases are known. Formattable result; UErrorCode ps = U_ZERO_ERROR; @@ -190,7 +198,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { if ( U_SUCCESS(ps) ) { r->code = result.getLong(); // Convert string to number uhash_iput(newNumericCodeMap.getAlias(),r->code,(void *)(r.getAlias()),&status); - r->type = URGN_SUBCONTINENT; + r->fType = URGN_SUBCONTINENT; } else { r->code = -1; } @@ -231,9 +239,9 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { } else { aliasFromRegion->code = -1; } - aliasFromRegion->type = URGN_DEPRECATED; + aliasFromRegion->fType = URGN_DEPRECATED; } else { - aliasFromRegion->type = URGN_DEPRECATED; + aliasFromRegion->fType = URGN_DEPRECATED; } { @@ -290,26 +298,26 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { UnicodeString WORLD_ID_STRING(WORLD_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&WORLD_ID_STRING); if ( r ) { - r->type = URGN_WORLD; + r->fType = URGN_WORLD; } UnicodeString UNKNOWN_REGION_ID_STRING(UNKNOWN_REGION_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&UNKNOWN_REGION_ID_STRING); if ( r ) { - r->type = URGN_UNKNOWN; + r->fType = URGN_UNKNOWN; } for ( int32_t i = 0 ; i < continents->size() ; i++ ) { r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)continents->elementAt(i)); if ( r ) { - r->type = URGN_CONTINENT; + r->fType = URGN_CONTINENT; } } for ( int32_t i = 0 ; i < groupings->size() ; i++ ) { r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)groupings->elementAt(i)); if ( r ) { - r->type = URGN_GROUPING; + r->fType = URGN_GROUPING; } } @@ -319,7 +327,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { UnicodeString OUTLYING_OCEANIA_REGION_ID_STRING(OUTLYING_OCEANIA_REGION_ID); r = (Region *) uhash_get(newRegionIDMap.getAlias(),(void *)&OUTLYING_OCEANIA_REGION_ID_STRING); if ( r ) { - r->type = URGN_SUBCONTINENT; + r->fType = URGN_SUBCONTINENT; } // Load territory containment info from the supplemental data. @@ -356,7 +364,7 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { // Set the parent region to be the containing region of the child. // Regions of type GROUPING can't be set as the parent, since another region // such as a SUBCONTINENT, CONTINENT, or WORLD must always be the parent. - if ( parentRegion->type != URGN_GROUPING) { + if ( parentRegion->fType != URGN_GROUPING) { childRegion->containingRegion = parentRegion; } } @@ -367,15 +375,15 @@ void U_CALLCONV Region::loadRegionData(UErrorCode &status) { int32_t pos = UHASH_FIRST; while ( const UHashElement* element = uhash_nextElement(newRegionIDMap.getAlias(),&pos)) { Region *ar = (Region *)element->value.pointer; - if ( availableRegions[ar->type] == NULL ) { + if ( availableRegions[ar->fType] == NULL ) { LocalPointer newAr(new UVector(uprv_deleteUObject, uhash_compareUnicodeString, status), status); - availableRegions[ar->type] = newAr.orphan(); + availableRegions[ar->fType] = newAr.orphan(); } LocalPointer arString(new UnicodeString(ar->idStr), status); if( U_FAILURE(status) ) { return; // error out } - availableRegions[ar->type]->addElement((void *)arString.orphan(),status); + availableRegions[ar->fType]->addElement((void *)arString.orphan(),status); } ucln_i18n_registerCleanup(UCLN_I18N_REGION, region_cleanup); @@ -416,7 +424,7 @@ void Region::cleanupRegionData() { Region::Region () : code(-1), - type(URGN_UNKNOWN), + fType(URGN_UNKNOWN), containingRegion(NULL), containedRegions(NULL), preferredValues(NULL) { @@ -481,7 +489,7 @@ Region::getInstance(const char *region_code, UErrorCode &status) { return NULL; } - if ( r->type == URGN_DEPRECATED && r->preferredValues->size() == 1) { + if ( r->fType == URGN_DEPRECATED && r->preferredValues->size() == 1) { StringEnumeration *pv = r->getPreferredValues(status); pv->reset(status); const UnicodeString *ustr = pv->snext(status); @@ -529,7 +537,7 @@ Region::getInstance (int32_t code, UErrorCode &status) { return NULL; } - if ( r->type == URGN_DEPRECATED && r->preferredValues->size() == 1) { + if ( r->fType == URGN_DEPRECATED && r->preferredValues->size() == 1) { StringEnumeration *pv = r->getPreferredValues(status); pv->reset(status); const UnicodeString *ustr = pv->snext(status); @@ -580,7 +588,7 @@ Region::getContainingRegion(URegionType type) const { return NULL; } - return ( containingRegion->type == type )? containingRegion: containingRegion->getContainingRegion(type); + return ( containingRegion->fType == type)? containingRegion: containingRegion->getContainingRegion(type); } /** @@ -618,9 +626,9 @@ Region::getContainedRegions( URegionType type, UErrorCode &status ) const { StringEnumeration *cr = getContainedRegions(status); for ( int32_t i = 0 ; i < cr->count(status) ; i++ ) { - const char *id = cr->next(NULL,status); - const Region *r = Region::getInstance(id,status); - if ( r->getType() == type ) { + const char *regionId = cr->next(NULL,status); + const Region *r = Region::getInstance(regionId,status); + if ( r->getType() == type) { result->addElement((void *)&r->idStr,status); } else { StringEnumeration *children = r->getContainedRegions(type, status); @@ -672,7 +680,7 @@ Region::contains(const Region &other) const { StringEnumeration* Region::getPreferredValues(UErrorCode &status) const { umtx_initOnce(gRegionDataInitOnce, &loadRegionData, status); // returns immediately if U_FAILURE(status) - if (U_FAILURE(status) || type != URGN_DEPRECATED) { + if (U_FAILURE(status) || fType != URGN_DEPRECATED) { return NULL; } return new RegionNameEnumeration(preferredValues,status); @@ -697,7 +705,7 @@ Region::getNumericCode() const { */ URegionType Region::getType() const { - return type; + return fType; } RegionNameEnumeration::RegionNameEnumeration(UVector *fNameList, UErrorCode& status) { diff --git a/deps/icu-small/source/i18n/reldatefmt.cpp b/deps/icu-small/source/i18n/reldatefmt.cpp index 42ede7ae4d9903..ae251ed20b46a3 100644 --- a/deps/icu-small/source/i18n/reldatefmt.cpp +++ b/deps/icu-small/source/i18n/reldatefmt.cpp @@ -51,13 +51,13 @@ U_NAMESPACE_BEGIN // RelativeDateTimeFormatter specific data for a single locale class RelativeDateTimeCacheData: public SharedObject { public: - RelativeDateTimeCacheData() : combinedDateAndTime(NULL) { + RelativeDateTimeCacheData() : combinedDateAndTime(nullptr) { // Initialize the cache arrays for (int32_t style = 0; style < UDAT_STYLE_COUNT; ++style) { - for (int32_t relUnit = 0; relUnit < UDAT_RELATIVE_UNIT_COUNT; ++relUnit) { + for (int32_t relUnit = 0; relUnit < UDAT_REL_UNIT_COUNT; ++relUnit) { for (int32_t pl = 0; pl < StandardPlural::COUNT; ++pl) { - relativeUnitsFormatters[style][relUnit][0][pl] = NULL; - relativeUnitsFormatters[style][relUnit][1][pl] = NULL; + relativeUnitsFormatters[style][relUnit][0][pl] = nullptr; + relativeUnitsFormatters[style][relUnit][1][pl] = nullptr; } } } @@ -74,7 +74,7 @@ class RelativeDateTimeCacheData: public SharedObject { // e.g., Next Tuesday; Yesterday; etc. For third index, 0 // means past, e.g., 5 days ago; 1 means future, e.g., in 5 days. SimpleFormatter *relativeUnitsFormatters[UDAT_STYLE_COUNT] - [UDAT_RELATIVE_UNIT_COUNT][2][StandardPlural::COUNT]; + [UDAT_REL_UNIT_COUNT][2][StandardPlural::COUNT]; const UnicodeString& getAbsoluteUnitString(int32_t fStyle, UDateAbsoluteUnit unit, @@ -83,6 +83,10 @@ class RelativeDateTimeCacheData: public SharedObject { UDateRelativeUnit unit, int32_t pastFutureIndex, int32_t pluralUnit) const; + const SimpleFormatter* getRelativeDateTimeUnitFormatter(int32_t fStyle, + URelativeDateTimeUnit unit, + int32_t pastFutureIndex, + int32_t pluralUnit) const; const UnicodeString emptyString; @@ -107,7 +111,7 @@ class RelativeDateTimeCacheData: public SharedObject { RelativeDateTimeCacheData::~RelativeDateTimeCacheData() { // clear out the cache arrays for (int32_t style = 0; style < UDAT_STYLE_COUNT; ++style) { - for (int32_t relUnit = 0; relUnit < UDAT_RELATIVE_UNIT_COUNT; ++relUnit) { + for (int32_t relUnit = 0; relUnit < UDAT_REL_UNIT_COUNT; ++relUnit) { for (int32_t pl = 0; pl < StandardPlural::COUNT; ++pl) { delete relativeUnitsFormatters[style][relUnit][0][pl]; delete relativeUnitsFormatters[style][relUnit][1][pl]; @@ -131,20 +135,41 @@ const UnicodeString& RelativeDateTimeCacheData::getAbsoluteUnitString( return emptyString; } - // Use fallback cache for SimpleFormatter relativeUnits. const SimpleFormatter* RelativeDateTimeCacheData::getRelativeUnitFormatter( int32_t fStyle, UDateRelativeUnit unit, int32_t pastFutureIndex, int32_t pluralUnit) const { + URelativeDateTimeUnit rdtunit = UDAT_REL_UNIT_COUNT; + switch (unit) { + case UDAT_RELATIVE_YEARS: rdtunit = UDAT_REL_UNIT_YEAR; break; + case UDAT_RELATIVE_MONTHS: rdtunit = UDAT_REL_UNIT_MONTH; break; + case UDAT_RELATIVE_WEEKS: rdtunit = UDAT_REL_UNIT_WEEK; break; + case UDAT_RELATIVE_DAYS: rdtunit = UDAT_REL_UNIT_DAY; break; + case UDAT_RELATIVE_HOURS: rdtunit = UDAT_REL_UNIT_HOUR; break; + case UDAT_RELATIVE_MINUTES: rdtunit = UDAT_REL_UNIT_MINUTE; break; + case UDAT_RELATIVE_SECONDS: rdtunit = UDAT_REL_UNIT_SECOND; break; + default: // a unit that the above method does not handle + return nullptr; + } + + return getRelativeDateTimeUnitFormatter(fStyle, rdtunit, pastFutureIndex, pluralUnit); + } + + // Use fallback cache for SimpleFormatter relativeUnits. + const SimpleFormatter* RelativeDateTimeCacheData::getRelativeDateTimeUnitFormatter( + int32_t fStyle, + URelativeDateTimeUnit unit, + int32_t pastFutureIndex, + int32_t pluralUnit) const { int32_t style = fStyle; do { - if (relativeUnitsFormatters[style][unit][pastFutureIndex][pluralUnit] != NULL) { + if (relativeUnitsFormatters[style][unit][pastFutureIndex][pluralUnit] != nullptr) { return relativeUnitsFormatters[style][unit][pastFutureIndex][pluralUnit]; } style = fallBackCache[style]; } while (style != -1); - return NULL; // No formatter found. + return nullptr; // No formatter found. } static UBool getStringWithFallback( @@ -217,23 +242,35 @@ struct RelDateTimeFmtDataSink : public ResourceSink { // Converts the generic units to UDAT_RELATIVE version. switch (genUnit) { case SECOND: - return UDAT_RELATIVE_SECONDS; + return UDAT_REL_UNIT_SECOND; case MINUTE: - return UDAT_RELATIVE_MINUTES; + return UDAT_REL_UNIT_MINUTE; case HOUR: - return UDAT_RELATIVE_HOURS; + return UDAT_REL_UNIT_HOUR; case DAY: - return UDAT_RELATIVE_DAYS; + return UDAT_REL_UNIT_DAY; case WEEK: - return UDAT_RELATIVE_WEEKS; + return UDAT_REL_UNIT_WEEK; case MONTH: - return UDAT_RELATIVE_MONTHS; - /* - * case QUARTER: - * return UDATE_RELATIVE_QUARTERS; - */ + return UDAT_REL_UNIT_MONTH; + case QUARTER: + return UDAT_REL_UNIT_QUARTER; case YEAR: - return UDAT_RELATIVE_YEARS; + return UDAT_REL_UNIT_YEAR; + case SUNDAY: + return UDAT_REL_UNIT_SUNDAY; + case MONDAY: + return UDAT_REL_UNIT_MONDAY; + case TUESDAY: + return UDAT_REL_UNIT_TUESDAY; + case WEDNESDAY: + return UDAT_REL_UNIT_WEDNESDAY; + case THURSDAY: + return UDAT_REL_UNIT_THURSDAY; + case FRIDAY: + return UDAT_REL_UNIT_FRIDAY; + case SATURDAY: + return UDAT_REL_UNIT_SATURDAY; default: return -1; } @@ -248,10 +285,8 @@ struct RelDateTimeFmtDataSink : public ResourceSink { return UDAT_ABSOLUTE_WEEK; case MONTH: return UDAT_ABSOLUTE_MONTH; - /* TODO: Add in QUARTER - * case QUARTER: - * return UDAT_ABSOLUTE_QUARTER; - */ + case QUARTER: + return UDAT_ABSOLUTE_QUARTER; case YEAR: return UDAT_ABSOLUTE_YEAR; case SUNDAY: @@ -312,7 +347,7 @@ struct RelDateTimeFmtDataSink : public ResourceSink { // Utility functions static UDateRelativeDateTimeFormatterStyle styleFromString(const char *s) { - int32_t len = uprv_strlen(s); + int32_t len = static_cast(uprv_strlen(s)); if (len >= 7 && uprv_strcmp(s + len - 7, "-narrow") == 0) { return UDAT_STYLE_NARROW; } @@ -430,7 +465,7 @@ struct RelDateTimeFmtDataSink : public ResourceSink { } int32_t relUnitIndex = relUnitFromGeneric(genericUnit); - if (relUnitIndex == UDAT_RELATIVE_SECONDS && uprv_strcmp(key, "0") == 0 && + if (relUnitIndex == UDAT_REL_UNIT_SECOND && uprv_strcmp(key, "0") == 0 && outputData.absoluteUnits[style][UDAT_ABSOLUTE_NOW][UDAT_DIRECTION_PLAIN].isEmpty()) { // Handle "NOW" outputData.absoluteUnits[style][UDAT_ABSOLUTE_NOW] @@ -463,10 +498,10 @@ struct RelDateTimeFmtDataSink : public ResourceSink { outputData.relativeUnitsFormatters[style][relUnitIndex] [pastFutureIndex]; // Only set if not already established. - if (patterns[pluralIndex] == NULL) { + if (patterns[pluralIndex] == nullptr) { patterns[pluralIndex] = new SimpleFormatter( value.getUnicodeString(errorCode), 0, 1, errorCode); - if (patterns[pluralIndex] == NULL) { + if (patterns[pluralIndex] == nullptr) { errorCode = U_MEMORY_ALLOCATION_ERROR; } } @@ -546,7 +581,7 @@ struct RelDateTimeFmtDataSink : public ResourceSink { consumeAlias(key, value, errorCode); } else { style = styleFromString(key); - int32_t unitSize = uprv_strlen(key) - styleSuffixLength(style); + int32_t unitSize = static_cast(uprv_strlen(key)) - styleSuffixLength(style); genericUnit = unitOrNegativeFromString(key, unitSize); if (style >= 0 && genericUnit != INVALID_UNIT) { consumeTimeUnit(key, value, errorCode); @@ -570,8 +605,14 @@ static void loadWeekdayNames(UnicodeString absoluteUnits[UDAT_STYLE_COUNT] [UDAT_ABSOLUTE_UNIT_COUNT][UDAT_DIRECTION_COUNT], const char* localeId, UErrorCode& status) { + if (U_FAILURE(status)) { + return; + } Locale locale(localeId); DateFormatSymbols dfSym(locale, status); + if (U_FAILURE(status)) { + return; + } for (int32_t style = 0; style < UDAT_STYLE_COUNT; ++style) { DateFormatSymbols::DtWidthType dtfmtWidth = styleToDateFormatSymbolWidth[style]; int32_t count; @@ -595,6 +636,9 @@ static UBool loadUnitData( RelDateTimeFmtDataSink sink(cacheData); ures_getAllItemsWithFallback(resource, "fields", sink, status); + if (U_FAILURE(status)) { + return false; + } // Get the weekday names from DateFormatSymbols. loadWeekdayNames(cacheData.absoluteUnits, localeId, status); @@ -619,7 +663,7 @@ static UBool getDateTimePattern( .append("/DateTimePatterns", status); LocalUResourceBundlePointer topLevel( ures_getByKeyWithFallback( - resource, pathBuffer.data(), NULL, &status)); + resource, pathBuffer.data(), nullptr, &status)); if (U_FAILURE(status)) { return FALSE; } @@ -636,68 +680,68 @@ static UBool getDateTimePattern( template<> U_I18N_API const RelativeDateTimeCacheData *LocaleCacheKey::createObject(const void * /*unused*/, UErrorCode &status) const { const char *localeId = fLoc.getName(); - LocalUResourceBundlePointer topLevel(ures_open(NULL, localeId, &status)); + LocalUResourceBundlePointer topLevel(ures_open(nullptr, localeId, &status)); if (U_FAILURE(status)) { - return NULL; + return nullptr; } LocalPointer result( new RelativeDateTimeCacheData()); if (result.isNull()) { status = U_MEMORY_ALLOCATION_ERROR; - return NULL; + return nullptr; } if (!loadUnitData( topLevel.getAlias(), *result, localeId, status)) { - return NULL; + return nullptr; } UnicodeString dateTimePattern; if (!getDateTimePattern(topLevel.getAlias(), dateTimePattern, status)) { - return NULL; + return nullptr; } result->adoptCombinedDateAndTime( new SimpleFormatter(dateTimePattern, 2, 2, status)); if (U_FAILURE(status)) { - return NULL; + return nullptr; } result->addRef(); return result.orphan(); } RelativeDateTimeFormatter::RelativeDateTimeFormatter(UErrorCode& status) : - fCache(NULL), - fNumberFormat(NULL), - fPluralRules(NULL), + fCache(nullptr), + fNumberFormat(nullptr), + fPluralRules(nullptr), fStyle(UDAT_STYLE_LONG), fContext(UDISPCTX_CAPITALIZATION_NONE), - fOptBreakIterator(NULL) { - init(NULL, NULL, status); + fOptBreakIterator(nullptr) { + init(nullptr, nullptr, status); } RelativeDateTimeFormatter::RelativeDateTimeFormatter( const Locale& locale, UErrorCode& status) : - fCache(NULL), - fNumberFormat(NULL), - fPluralRules(NULL), + fCache(nullptr), + fNumberFormat(nullptr), + fPluralRules(nullptr), fStyle(UDAT_STYLE_LONG), fContext(UDISPCTX_CAPITALIZATION_NONE), - fOptBreakIterator(NULL), + fOptBreakIterator(nullptr), fLocale(locale) { - init(NULL, NULL, status); + init(nullptr, nullptr, status); } RelativeDateTimeFormatter::RelativeDateTimeFormatter( const Locale& locale, NumberFormat *nfToAdopt, UErrorCode& status) : - fCache(NULL), - fNumberFormat(NULL), - fPluralRules(NULL), + fCache(nullptr), + fNumberFormat(nullptr), + fPluralRules(nullptr), fStyle(UDAT_STYLE_LONG), fContext(UDISPCTX_CAPITALIZATION_NONE), - fOptBreakIterator(NULL), + fOptBreakIterator(nullptr), fLocale(locale) { - init(nfToAdopt, NULL, status); + init(nfToAdopt, nullptr, status); } RelativeDateTimeFormatter::RelativeDateTimeFormatter( @@ -706,12 +750,12 @@ RelativeDateTimeFormatter::RelativeDateTimeFormatter( UDateRelativeDateTimeFormatterStyle styl, UDisplayContext capitalizationContext, UErrorCode& status) : - fCache(NULL), - fNumberFormat(NULL), - fPluralRules(NULL), + fCache(nullptr), + fNumberFormat(nullptr), + fPluralRules(nullptr), fStyle(styl), fContext(capitalizationContext), - fOptBreakIterator(NULL), + fOptBreakIterator(nullptr), fLocale(locale) { if (U_FAILURE(status)) { return; @@ -727,7 +771,7 @@ RelativeDateTimeFormatter::RelativeDateTimeFormatter( } init(nfToAdopt, bi, status); } else { - init(nfToAdopt, NULL, status); + init(nfToAdopt, nullptr, status); } } @@ -744,7 +788,7 @@ RelativeDateTimeFormatter::RelativeDateTimeFormatter( fCache->addRef(); fNumberFormat->addRef(); fPluralRules->addRef(); - if (fOptBreakIterator != NULL) { + if (fOptBreakIterator != nullptr) { fOptBreakIterator->addRef(); } } @@ -764,16 +808,16 @@ RelativeDateTimeFormatter& RelativeDateTimeFormatter::operator=( } RelativeDateTimeFormatter::~RelativeDateTimeFormatter() { - if (fCache != NULL) { + if (fCache != nullptr) { fCache->removeRef(); } - if (fNumberFormat != NULL) { + if (fNumberFormat != nullptr) { fNumberFormat->removeRef(); } - if (fPluralRules != NULL) { + if (fPluralRules != nullptr) { fPluralRules->removeRef(); } - if (fOptBreakIterator != NULL) { + if (fOptBreakIterator != nullptr) { fOptBreakIterator->removeRef(); } } @@ -812,7 +856,7 @@ UnicodeString& RelativeDateTimeFormatter::format( const SimpleFormatter* formatter = fCache->getRelativeUnitFormatter(fStyle, unit, bFuture, pluralIndex); - if (formatter == NULL) { + if (formatter == nullptr) { // TODO: WARN - look at quantity formatter's action with an error. status = U_INVALID_FORMAT_ERROR; return appendTo; @@ -828,33 +872,35 @@ UnicodeString& RelativeDateTimeFormatter::formatNumeric( if (U_FAILURE(status)) { return appendTo; } - // TODO: - // The full implementation of this depends on CLDR data that is not yet available, - // see: http://unicode.org/cldr/trac/ticket/9165 Add more relative field data. - // In the meantime do a quick bring-up by calling the old format method; this - // leaves some holes (even for data that is currently available, such as quarter). - // When the new CLDR data is available, update the data storage accordingly, - // rewrite this to use it directly, and rewrite the old format method to call this - // new one; that is covered by http://bugs.icu-project.org/trac/ticket/12171. - UDateRelativeUnit relunit = UDAT_RELATIVE_UNIT_COUNT; - switch (unit) { - case UDAT_REL_UNIT_YEAR: relunit = UDAT_RELATIVE_YEARS; break; - case UDAT_REL_UNIT_MONTH: relunit = UDAT_RELATIVE_MONTHS; break; - case UDAT_REL_UNIT_WEEK: relunit = UDAT_RELATIVE_WEEKS; break; - case UDAT_REL_UNIT_DAY: relunit = UDAT_RELATIVE_DAYS; break; - case UDAT_REL_UNIT_HOUR: relunit = UDAT_RELATIVE_HOURS; break; - case UDAT_REL_UNIT_MINUTE: relunit = UDAT_RELATIVE_MINUTES; break; - case UDAT_REL_UNIT_SECOND: relunit = UDAT_RELATIVE_SECONDS; break; - default: // a unit that the above method does not handle - status = U_UNSUPPORTED_ERROR; - return appendTo; - } UDateDirection direction = UDAT_DIRECTION_NEXT; if (std::signbit(offset)) { // needed to handle -0.0 direction = UDAT_DIRECTION_LAST; offset = -offset; } - return format(offset, direction, relunit, appendTo, status); + if (direction != UDAT_DIRECTION_LAST && direction != UDAT_DIRECTION_NEXT) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return appendTo; + } + int32_t bFuture = direction == UDAT_DIRECTION_NEXT ? 1 : 0; + FieldPosition pos(FieldPosition::DONT_CARE); + + UnicodeString result; + UnicodeString formattedNumber; + + StandardPlural::Form pluralIndex = QuantityFormatter::selectPlural( + offset, **fNumberFormat, **fPluralRules, formattedNumber, pos, + status); + + const SimpleFormatter* formatter = + fCache->getRelativeDateTimeUnitFormatter(fStyle, unit, bFuture, pluralIndex); + if (formatter == nullptr) { + // TODO: WARN - look at quantity formatter's action with an error. + status = U_INVALID_FORMAT_ERROR; + return appendTo; + } + formatter->format(formattedNumber, result, status); + adjustForContext(result); + return appendTo.append(result); } UnicodeString& RelativeDateTimeFormatter::format( @@ -871,7 +917,7 @@ UnicodeString& RelativeDateTimeFormatter::format( // Get string using fallback. UnicodeString result; result.fastCopyFrom(fCache->getAbsoluteUnitString(fStyle, unit, direction)); - if (fOptBreakIterator != NULL) { + if (fOptBreakIterator != nullptr) { adjustForContext(result); } return appendTo.append(result); @@ -908,6 +954,7 @@ UnicodeString& RelativeDateTimeFormatter::format( UDateAbsoluteUnit absunit = UDAT_ABSOLUTE_UNIT_COUNT; switch (unit) { case UDAT_REL_UNIT_YEAR: absunit = UDAT_ABSOLUTE_YEAR; break; + case UDAT_REL_UNIT_QUARTER: absunit = UDAT_ABSOLUTE_QUARTER; break; case UDAT_REL_UNIT_MONTH: absunit = UDAT_ABSOLUTE_MONTH; break; case UDAT_REL_UNIT_WEEK: absunit = UDAT_ABSOLUTE_WEEK; break; case UDAT_REL_UNIT_DAY: absunit = UDAT_ABSOLUTE_DAY; break; @@ -930,7 +977,7 @@ UnicodeString& RelativeDateTimeFormatter::format( const UnicodeString &unitFormatString = fCache->getAbsoluteUnitString(fStyle, absunit, direction); if (!unitFormatString.isEmpty()) { - if (fOptBreakIterator != NULL) { + if (fOptBreakIterator != nullptr) { UnicodeString result(unitFormatString); adjustForContext(result); return appendTo.append(result); @@ -951,7 +998,7 @@ UnicodeString& RelativeDateTimeFormatter::combineDateAndTime( } void RelativeDateTimeFormatter::adjustForContext(UnicodeString &str) const { - if (fOptBreakIterator == NULL + if (fOptBreakIterator == nullptr || str.length() == 0 || !u_islower(str.char32At(0))) { return; } @@ -992,7 +1039,7 @@ void RelativeDateTimeFormatter::init( shared->removeRef(); } else { SharedNumberFormat *shared = new SharedNumberFormat(nf.getAlias()); - if (shared == NULL) { + if (shared == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1003,7 +1050,7 @@ void RelativeDateTimeFormatter::init( SharedObject::clearPtr(fOptBreakIterator); } else { SharedBreakIterator *shared = new SharedBreakIterator(bi.getAlias()); - if (shared == NULL) { + if (shared == nullptr) { status = U_MEMORY_ALLOCATION_ERROR; return; } @@ -1026,13 +1073,13 @@ ureldatefmt_open( const char* locale, UErrorCode* status ) { if (U_FAILURE(*status)) { - return NULL; + return nullptr; } LocalPointer formatter(new RelativeDateTimeFormatter(Locale(locale), (NumberFormat*)nfToAdopt, width, capitalizationContext, *status), *status); if (U_FAILURE(*status)) { - return NULL; + return nullptr; } return (URelativeDateTimeFormatter*)formatter.orphan(); } @@ -1054,13 +1101,13 @@ ureldatefmt_formatNumeric( const URelativeDateTimeFormatter* reldatefmt, if (U_FAILURE(*status)) { return 0; } - if (result == NULL ? resultCapacity != 0 : resultCapacity < 0) { + if (result == nullptr ? resultCapacity != 0 : resultCapacity < 0) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } UnicodeString res; - if (result != NULL) { - // NULL destination for pure preflighting: empty dummy string + if (result != nullptr) { + // nullptr destination for pure preflighting: empty dummy string // otherwise, alias the destination buffer (copied from udat_format) res.setTo(result, 0, resultCapacity); } @@ -1082,13 +1129,13 @@ ureldatefmt_format( const URelativeDateTimeFormatter* reldatefmt, if (U_FAILURE(*status)) { return 0; } - if (result == NULL ? resultCapacity != 0 : resultCapacity < 0) { + if (result == nullptr ? resultCapacity != 0 : resultCapacity < 0) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } UnicodeString res; - if (result != NULL) { - // NULL destination for pure preflighting: empty dummy string + if (result != nullptr) { + // nullptr destination for pure preflighting: empty dummy string // otherwise, alias the destination buffer (copied from udat_format) res.setTo(result, 0, resultCapacity); } @@ -1112,9 +1159,9 @@ ureldatefmt_combineDateAndTime( const URelativeDateTimeFormatter* reldatefmt, if (U_FAILURE(*status)) { return 0; } - if (result == NULL ? resultCapacity != 0 : resultCapacity < 0 || - (relativeDateString == NULL ? relativeDateStringLen != 0 : relativeDateStringLen < -1) || - (timeString == NULL ? timeStringLen != 0 : timeStringLen < -1)) { + if (result == nullptr ? resultCapacity != 0 : resultCapacity < 0 || + (relativeDateString == nullptr ? relativeDateStringLen != 0 : relativeDateStringLen < -1) || + (timeString == nullptr ? timeStringLen != 0 : timeStringLen < -1)) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; } diff --git a/deps/icu-small/source/i18n/reldtfmt.cpp b/deps/icu-small/source/i18n/reldtfmt.cpp index d3ab45dc631ba0..753672d905f16b 100644 --- a/deps/icu-small/source/i18n/reldtfmt.cpp +++ b/deps/icu-small/source/i18n/reldtfmt.cpp @@ -430,7 +430,7 @@ RelativeDateFormat::setContext(UDisplayContext value, UErrorCode& status) if ( fCapitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && fCapitalizationOfRelativeUnitsForUIListMenu) || (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && fCapitalizationOfRelativeUnitsForStandAlone)) ) { - UErrorCode status = U_ZERO_ERROR; + status = U_ZERO_ERROR; fCapitalizationBrkIter = BreakIterator::createSentenceInstance(fLocale, status); if (U_FAILURE(status)) { delete fCapitalizationBrkIter; @@ -456,8 +456,8 @@ RelativeDateFormat::initCapitalizationContextInfo(const Locale& thelocale) const int32_t * intVector = ures_getIntVector(rb.getAlias(), &len, &status); if (U_SUCCESS(status) && intVector != NULL && len >= 2) { - fCapitalizationOfRelativeUnitsForUIListMenu = intVector[0]; - fCapitalizationOfRelativeUnitsForStandAlone = intVector[1]; + fCapitalizationOfRelativeUnitsForUIListMenu = static_cast(intVector[0]); + fCapitalizationOfRelativeUnitsForStandAlone = static_cast(intVector[1]); } } #endif diff --git a/deps/icu-small/source/i18n/reldtfmt.h b/deps/icu-small/source/i18n/reldtfmt.h index 5063a6388f63b0..0403da11efda76 100644 --- a/deps/icu-small/source/i18n/reldtfmt.h +++ b/deps/icu-small/source/i18n/reldtfmt.h @@ -235,7 +235,6 @@ class RelativeDateFormat : public DateFormat { */ virtual const DateFormatSymbols* getDateFormatSymbols(void) const; - /* Cannot use #ifndef U_HIDE_DRAFT_API for the following draft method since it is virtual */ /** * Set a particular UDisplayContext value in the formatter, such as * UDISPCTX_CAPITALIZATION_FOR_STANDALONE. Note: For getContext, see diff --git a/deps/icu-small/source/i18n/rematch.cpp b/deps/icu-small/source/i18n/rematch.cpp index f2521822078169..95fd0d2240af95 100644 --- a/deps/icu-small/source/i18n/rematch.cpp +++ b/deps/icu-small/source/i18n/rematch.cpp @@ -801,7 +801,7 @@ UBool RegexMatcher::find(UErrorCode &status) { case START_LINE: { - UChar32 c; + UChar32 ch; if (startPos == fAnchorStart) { MatchAt(startPos, FALSE, status); if (U_FAILURE(status)) { @@ -811,17 +811,17 @@ UBool RegexMatcher::find(UErrorCode &status) { return TRUE; } UTEXT_SETNATIVEINDEX(fInputText, startPos); - c = UTEXT_NEXT32(fInputText); + ch = UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); } else { UTEXT_SETNATIVEINDEX(fInputText, startPos); - c = UTEXT_PREVIOUS32(fInputText); + ch = UTEXT_PREVIOUS32(fInputText); UTEXT_SETNATIVEINDEX(fInputText, startPos); } if (fPattern->fFlags & UREGEX_UNIX_LINES) { for (;;) { - if (c == 0x0a) { + if (ch == 0x0a) { MatchAt(startPos, FALSE, status); if (U_FAILURE(status)) { return FALSE; @@ -836,7 +836,7 @@ UBool RegexMatcher::find(UErrorCode &status) { fHitEnd = TRUE; return FALSE; } - c = UTEXT_NEXT32(fInputText); + ch = UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop @@ -846,8 +846,8 @@ UBool RegexMatcher::find(UErrorCode &status) { } } else { for (;;) { - if (isLineTerminator(c)) { - if (c == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { + if (isLineTerminator(ch)) { + if (ch == 0x0d && startPos < fActiveLimit && UTEXT_CURRENT32(fInputText) == 0x0a) { (void)UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); } @@ -865,7 +865,7 @@ UBool RegexMatcher::find(UErrorCode &status) { fHitEnd = TRUE; return FALSE; } - c = UTEXT_NEXT32(fInputText); + ch = UTEXT_NEXT32(fInputText); startPos = UTEXT_GETNATIVEINDEX(fInputText); // Note that it's perfectly OK for a pattern to have a zero-length // match at the end of a string, so we must make sure that the loop @@ -1034,7 +1034,7 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { return FALSE; } } - U_ASSERT(FALSE); + U_ASSERT(FALSE); case START_STRING: case START_CHAR: @@ -1067,7 +1067,7 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { case START_LINE: { - UChar32 c; + UChar32 ch; if (startPos == fAnchorStart) { MatchChunkAt(startPos, FALSE, status); if (U_FAILURE(status)) { @@ -1081,8 +1081,8 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { if (fPattern->fFlags & UREGEX_UNIX_LINES) { for (;;) { - c = inputBuf[startPos-1]; - if (c == 0x0a) { + ch = inputBuf[startPos-1]; + if (ch == 0x0a) { MatchChunkAt(startPos, FALSE, status); if (U_FAILURE(status)) { return FALSE; @@ -1105,9 +1105,9 @@ UBool RegexMatcher::findUsingChunk(UErrorCode &status) { } } else { for (;;) { - c = inputBuf[startPos-1]; - if (isLineTerminator(c)) { - if (c == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) { + ch = inputBuf[startPos-1]; + if (isLineTerminator(ch)) { + if (ch == 0x0d && startPos < fActiveLimit && inputBuf[startPos] == 0x0a) { startPos++; } MatchChunkAt(startPos, FALSE, status); @@ -2774,7 +2774,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { int64_t *pat = fPattern->fCompiledPat->getBuffer(); const UChar *litText = fPattern->fLiteralText.getBuffer(); - UVector *sets = fPattern->fSets; + UVector *fSets = fPattern->fSets; fFrameSize = fPattern->fFrameSize; REStackFrame *fp = resetStack(); @@ -3376,7 +3376,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { // There is input left. Pick up one char and test it for set membership. UChar32 c = UTEXT_NEXT32(fInputText); - U_ASSERT(opValue > 0 && opValue < sets->size()); + U_ASSERT(opValue > 0 && opValue < fSets->size()); if (c<256) { Regex8BitSet *s8 = &fPattern->fSets8[opValue]; if (s8->contains(c)) { @@ -3384,7 +3384,7 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { break; } } else { - UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); + UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue); if (s->contains(c)) { // The character is in the set. A Match. fp->fInputIdx = UTEXT_GETNATIVEINDEX(fInputText); @@ -3671,9 +3671,9 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { if (newFP == (int64_t *)fp) { break; } - int32_t i; - for (i=0; isetSize(newStackSize); @@ -3830,9 +3830,9 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { // This makes the capture groups from within the look-ahead // expression available. int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; - int32_t i; - for (i=0; isetSize(newStackSize); @@ -4123,9 +4123,9 @@ void RegexMatcher::MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status) { // This op scans through all matching input. // The following LOOP_C op emulates stack unwinding if the following pattern fails. { - U_ASSERT(opValue > 0 && opValue < sets->size()); + U_ASSERT(opValue > 0 && opValue < fSets->size()); Regex8BitSet *s8 = &fPattern->fSets8[opValue]; - UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); + UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue); // Loop through input, until either the input is exhausted or // we reach a character that is not a member of the set. @@ -4350,7 +4350,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu int64_t *pat = fPattern->fCompiledPat->getBuffer(); const UChar *litText = fPattern->fLiteralText.getBuffer(); - UVector *sets = fPattern->fSets; + UVector *fSets = fPattern->fSets; const UChar *inputBuf = fInputText->chunkContents; @@ -4928,7 +4928,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu break; } - U_ASSERT(opValue > 0 && opValue < sets->size()); + U_ASSERT(opValue > 0 && opValue < fSets->size()); // There is input left. Pick up one char and test it for set membership. UChar32 c; @@ -4940,7 +4940,7 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu break; } } else { - UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); + UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue); if (s->contains(c)) { // The character is in the set. A Match. break; @@ -5214,9 +5214,9 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu if (newFP == (int64_t *)fp) { break; } - int32_t i; - for (i=0; isetSize(newStackSize); @@ -5361,9 +5361,9 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu // This makes the capture groups from within the look-ahead // expression available. int64_t *newFP = fStack->getBuffer() + newStackSize - fFrameSize; - int32_t i; - for (i=0; isetSize(newStackSize); @@ -5623,9 +5623,9 @@ void RegexMatcher::MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &statu // This op scans through all matching input. // The following LOOP_C op emulates stack unwinding if the following pattern fails. { - U_ASSERT(opValue > 0 && opValue < sets->size()); + U_ASSERT(opValue > 0 && opValue < fSets->size()); Regex8BitSet *s8 = &fPattern->fSets8[opValue]; - UnicodeSet *s = (UnicodeSet *)sets->elementAt(opValue); + UnicodeSet *s = (UnicodeSet *)fSets->elementAt(opValue); // Loop through input, until either the input is exhausted or // we reach a character that is not a member of the set. diff --git a/deps/icu-small/source/i18n/rulebasedcollator.cpp b/deps/icu-small/source/i18n/rulebasedcollator.cpp index ab65f10a3bdfa8..b057b6bbd5a12f 100644 --- a/deps/icu-small/source/i18n/rulebasedcollator.cpp +++ b/deps/icu-small/source/i18n/rulebasedcollator.cpp @@ -764,9 +764,9 @@ RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength, // Make sure both or neither strings have a known length. // We do not optimize for mixed length/termination. if(leftLength >= 0) { - if(rightLength < 0) { rightLength = uprv_strlen(right); } + if(rightLength < 0) { rightLength = static_cast(uprv_strlen(right)); } } else { - if(rightLength >= 0) { leftLength = uprv_strlen(left); } + if(rightLength >= 0) { leftLength = static_cast(uprv_strlen(left)); } } return doCompare(reinterpret_cast(left), leftLength, reinterpret_cast(right), rightLength, errorCode); @@ -862,9 +862,9 @@ class FCDUTF16NFDIterator : public UTF16NFDIterator { } else { str.setTo(text, (int32_t)(spanLimit - text)); { - ReorderingBuffer buffer(nfcImpl, str); - if(buffer.init(str.length(), errorCode)) { - nfcImpl.makeFCD(spanLimit, textLimit, &buffer, errorCode); + ReorderingBuffer r_buffer(nfcImpl, str); + if(r_buffer.init(str.length(), errorCode)) { + nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode); } } if(U_SUCCESS(errorCode)) { diff --git a/deps/icu-small/source/i18n/scriptset.cpp b/deps/icu-small/source/i18n/scriptset.cpp index 9358e63b9ee403..558d178e2f97ca 100644 --- a/deps/icu-small/source/i18n/scriptset.cpp +++ b/deps/icu-small/source/i18n/scriptset.cpp @@ -298,7 +298,7 @@ uhash_compareScriptSet(UElement key0, UElement key1) { icu::ScriptSet *s0 = static_cast(key0.pointer); icu::ScriptSet *s1 = static_cast(key1.pointer); int32_t diff = s0->countMembers() - s1->countMembers(); - if (diff != 0) return diff; + if (diff != 0) return static_cast(diff); int32_t i0 = s0->nextSetBit(0); int32_t i1 = s1->nextSetBit(0); while ((diff = i0-i1) == 0 && i0 > 0) { diff --git a/deps/icu-small/source/i18n/shareddateformatsymbols.h b/deps/icu-small/source/i18n/shareddateformatsymbols.h index ca9a2108190c4d..66a06ecae5476a 100644 --- a/deps/icu-small/source/i18n/shareddateformatsymbols.h +++ b/deps/icu-small/source/i18n/shareddateformatsymbols.h @@ -12,6 +12,9 @@ #define __SHARED_DATEFORMATSYMBOLS_H__ #include "unicode/utypes.h" + +#if !UCONFIG_NO_FORMATTING + #include "sharedobject.h" #include "unicode/dtfmtsym.h" @@ -33,4 +36,6 @@ class U_I18N_API SharedDateFormatSymbols : public SharedObject { U_NAMESPACE_END +#endif /* !UCONFIG_NO_FORMATTING */ + #endif diff --git a/deps/icu-small/source/i18n/smpdtfmt.cpp b/deps/icu-small/source/i18n/smpdtfmt.cpp index b1b90882fceead..2bc8e49625fe29 100644 --- a/deps/icu-small/source/i18n/smpdtfmt.cpp +++ b/deps/icu-small/source/i18n/smpdtfmt.cpp @@ -244,9 +244,9 @@ SimpleDateFormat::NSOverride::~NSOverride() { void SimpleDateFormat::NSOverride::free() { NSOverride *cur = this; while (cur) { - NSOverride *next = cur->next; + NSOverride *next_temp = cur->next; delete cur; - cur = next; + cur = next_temp; } } @@ -1304,15 +1304,15 @@ SimpleDateFormat::processOverrideString(const Locale &locale, const UnicodeStrin int32_t nsNameHash = nsName.hashCode(); // See if the numbering system is in the override list, if not, then add it. - NSOverride *cur = overrideList; + NSOverride *curr = overrideList; const SharedNumberFormat *snf = NULL; UBool found = FALSE; - while ( cur && !found ) { - if ( cur->hash == nsNameHash ) { - snf = cur->snf; + while ( curr && !found ) { + if ( curr->hash == nsNameHash ) { + snf = curr->snf; found = TRUE; } - cur = cur->next; + curr = curr->next; } if (!found) { @@ -1824,14 +1824,14 @@ SimpleDateFormat::subFormat(UnicodeString &appendTo, // Stealing am/pm value to use as our array index. // It works out: am/midnight are both 0, pm/noon are both 1, // 12 am is 12 midnight, and 12 pm is 12 noon. - int32_t value = cal.get(UCAL_AM_PM, status); + int32_t val = cal.get(UCAL_AM_PM, status); if (count <= 3) { - toAppend = &fSymbols->fAbbreviatedDayPeriods[value]; + toAppend = &fSymbols->fAbbreviatedDayPeriods[val]; } else if (count == 4 || count > 5) { - toAppend = &fSymbols->fWideDayPeriods[value]; + toAppend = &fSymbols->fWideDayPeriods[val]; } else { // count == 5 - toAppend = &fSymbols->fNarrowDayPeriods[value]; + toAppend = &fSymbols->fNarrowDayPeriods[val]; } } @@ -2281,10 +2281,10 @@ SimpleDateFormat::parse(const UnicodeString& text, Calendar& cal, ParsePosition& if (i+1 < fPattern.length()) { // move to next pattern character - UChar ch = fPattern.charAt(i+1); + UChar c = fPattern.charAt(i+1); // check for whitespace - if (PatternProps::isWhiteSpace(ch)) { + if (PatternProps::isWhiteSpace(c)) { i++; // Advance over run in pattern while ((i+1)isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6) { + UErrorCode monthStatus = U_ZERO_ERROR; + if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && value >= 6) { cal.set(UCAL_MONTH, value); } else { cal.set(UCAL_MONTH, value - 1); @@ -3571,21 +3571,21 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC static const UChar alt_sep = DateFormatSymbols::ALTERNATE_TIME_SEPARATOR; // Try matching a time separator. - int32_t count = 1; + int32_t count_sep = 1; UnicodeString data[3]; fSymbols->getTimeSeparatorString(data[0]); // Add the default, if different from the locale. if (data[0].compare(&def_sep, 1) != 0) { - data[count++].setTo(def_sep); + data[count_sep++].setTo(def_sep); } // If lenient, add also the alternate, if different from the locale. if (isLenient() && data[0].compare(&alt_sep, 1) != 0) { - data[count++].setTo(alt_sep); + data[count_sep++].setTo(alt_sep); } - return matchString(text, start, UCAL_FIELD_COUNT /* => nothing to set */, data, count, NULL, cal); + return matchString(text, start, UCAL_FIELD_COUNT /* => nothing to set */, data, count_sep, NULL, cal); } case UDAT_AM_PM_MIDNIGHT_NOON_FIELD: @@ -3674,7 +3674,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC } parseInt(*src, number, pos, allowNegative,currentNumberFormat); if (pos.getIndex() != parseStart) { - int32_t value = number.getLong(); + int32_t val = number.getLong(); // Don't need suffix processing here (as in number processing at the beginning of the function); // the new fields being handled as numeric values (month, weekdays, quarters) should not have suffixes. @@ -3682,7 +3682,7 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC if (!getBooleanAttribute(UDAT_PARSE_ALLOW_NUMERIC, status)) { // Check the range of the value int32_t bias = gFieldRangeBias[patternCharIndex]; - if (bias >= 0 && (value > cal.getMaximum(field) + bias || value < cal.getMinimum(field) + bias)) { + if (bias >= 0 && (val > cal.getMaximum(field) + bias || val < cal.getMinimum(field) + bias)) { return -start; } } @@ -3696,35 +3696,35 @@ int32_t SimpleDateFormat::subParse(const UnicodeString& text, int32_t& start, UC if (!strcmp(cal.getType(),"hebrew")) { HebrewCalendar *hc = (HebrewCalendar*)&cal; if (cal.isSet(UCAL_YEAR)) { - UErrorCode status = U_ZERO_ERROR; - if (!hc->isLeapYear(hc->get(UCAL_YEAR,status)) && value >= 6) { - cal.set(UCAL_MONTH, value); + UErrorCode monthStatus = U_ZERO_ERROR; + if (!hc->isLeapYear(hc->get(UCAL_YEAR, monthStatus)) && val >= 6) { + cal.set(UCAL_MONTH, val); } else { - cal.set(UCAL_MONTH, value - 1); + cal.set(UCAL_MONTH, val - 1); } } else { - saveHebrewMonth = value; + saveHebrewMonth = val; } } else { - cal.set(UCAL_MONTH, value - 1); + cal.set(UCAL_MONTH, val - 1); } break; case UDAT_STANDALONE_MONTH_FIELD: - cal.set(UCAL_MONTH, value - 1); + cal.set(UCAL_MONTH, val - 1); break; case UDAT_DOW_LOCAL_FIELD: case UDAT_STANDALONE_DAY_FIELD: - cal.set(UCAL_DOW_LOCAL, value); + cal.set(UCAL_DOW_LOCAL, val); break; case UDAT_QUARTER_FIELD: case UDAT_STANDALONE_QUARTER_FIELD: - cal.set(UCAL_MONTH, (value - 1) * 3); + cal.set(UCAL_MONTH, (val - 1) * 3); break; case UDAT_RELATED_YEAR_FIELD: - cal.setRelatedYear(value); + cal.setRelatedYear(val); break; default: - cal.set(field, value); + cal.set(field, val); break; } return pos.getIndex(); @@ -3971,7 +3971,7 @@ SimpleDateFormat::setContext(UDisplayContext value, UErrorCode& status) if (U_SUCCESS(status)) { if ( fCapitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE || value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE) ) { - UErrorCode status = U_ZERO_ERROR; + status = U_ZERO_ERROR; fCapitalizationBrkIter = BreakIterator::createSentenceInstance(fLocale, status); if (U_FAILURE(status)) { delete fCapitalizationBrkIter; diff --git a/deps/icu-small/source/i18n/timezone.cpp b/deps/icu-small/source/i18n/timezone.cpp index f7f45c7d3eeffd..dbf614469e8311 100644 --- a/deps/icu-small/source/i18n/timezone.cpp +++ b/deps/icu-small/source/i18n/timezone.cpp @@ -1031,8 +1031,8 @@ TimeZone::getEquivalentID(const UnicodeString& id, int32_t index) { UResourceBundle *ares = ures_getByKey(top, kNAMES, NULL, &ec); // dereference Zones section if (U_SUCCESS(ec)) { int32_t idLen = 0; - const UChar* id = ures_getStringByIndex(ares, zone, &idLen, &ec); - result.fastCopyFrom(UnicodeString(TRUE, id, idLen)); + const UChar* id2 = ures_getStringByIndex(ares, zone, &idLen, &ec); + result.fastCopyFrom(UnicodeString(TRUE, id2, idLen)); U_DEBUG_TZ_MSG(("gei(%d) -> %d, len%d, %s\n", index, zone, result.length(), u_errorName(ec))); } ures_close(ares); @@ -1199,7 +1199,7 @@ TimeZone::getDisplayName(UBool daylight, EDisplayType style, const Locale& local { UErrorCode status = U_ZERO_ERROR; UDate date = Calendar::getNow(); - UTimeZoneFormatTimeType timeType; + UTimeZoneFormatTimeType timeType = UTZFMT_TIME_TYPE_UNKNOWN; int32_t offset; if (style == GENERIC_LOCATION || style == LONG_GENERIC || style == SHORT_GENERIC) { @@ -1612,7 +1612,7 @@ TimeZone::getWindowsID(const UnicodeString& id, UnicodeString& winid, UErrorCode end = tzids + len; hasNext = FALSE; } - if (canonicalID.compare(start, end - start) == 0) { + if (canonicalID.compare(start, static_cast(end - start)) == 0) { winid = UnicodeString(ures_getKey(winzone), -1 , US_INV); found = TRUE; break; @@ -1673,7 +1673,7 @@ TimeZone::getIDForWindowsID(const UnicodeString& winid, const char* region, Unic if (end == NULL) { id.setTo(tzids, -1); } else { - id.setTo(tzids, end - tzids); + id.setTo(tzids, static_cast(end - tzids)); } gotID = TRUE; } diff --git a/deps/icu-small/source/i18n/transreg.cpp b/deps/icu-small/source/i18n/transreg.cpp index 331f4efdeb339a..4884773faf5ac3 100644 --- a/deps/icu-small/source/i18n/transreg.cpp +++ b/deps/icu-small/source/i18n/transreg.cpp @@ -1330,12 +1330,12 @@ Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID int32_t passNumber = 1; for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) { // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")? - Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), + Transliterator* tl = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++), (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE); - if (t == 0) + if (tl == 0) status = U_MEMORY_ALLOCATION_ERROR; else - rbts->addElement(t, status); + rbts->addElement(tl, status); } if (U_FAILURE(status)) { delete rbts; diff --git a/deps/icu-small/source/i18n/tzfmt.cpp b/deps/icu-small/source/i18n/tzfmt.cpp index 69da4667bc89ac..df4dec1febf60a 100644 --- a/deps/icu-small/source/i18n/tzfmt.cpp +++ b/deps/icu-small/source/i18n/tzfmt.cpp @@ -2648,12 +2648,12 @@ TimeZoneFormat::checkAbuttingHoursAndMinutes() { UVector *items = fGMTOffsetPatternItems[type]; for (int32_t i = 0; i < items->size(); i++) { const GMTOffsetField* item = (GMTOffsetField*)items->elementAt(i); - GMTOffsetField::FieldType type = item->getType(); - if (type != GMTOffsetField::TEXT) { + GMTOffsetField::FieldType fieldType = item->getType(); + if (fieldType != GMTOffsetField::TEXT) { if (afterH) { fAbuttingOffsetHoursAndMinutes = TRUE; break; - } else if (type == GMTOffsetField::HOUR) { + } else if (fieldType == GMTOffsetField::HOUR) { afterH = TRUE; } } else if (afterH) { diff --git a/deps/icu-small/source/i18n/tzgnames.cpp b/deps/icu-small/source/i18n/tzgnames.cpp index c2e685272e9b36..5f5b7db30227cc 100644 --- a/deps/icu-small/source/i18n/tzgnames.cpp +++ b/deps/icu-small/source/i18n/tzgnames.cpp @@ -407,7 +407,7 @@ TZGNCore::initialize(const Locale& locale, UErrorCode& status) { // target region const char* region = fLocale.getCountry(); - int32_t regionLen = uprv_strlen(region); + int32_t regionLen = static_cast(uprv_strlen(region)); if (regionLen == 0) { char loc[ULOC_FULLNAME_CAPACITY]; uloc_addLikelySubtags(fLocale.getName(), loc, sizeof(loc), &status); diff --git a/deps/icu-small/source/i18n/tznames.cpp b/deps/icu-small/source/i18n/tznames.cpp index 689fdeb0915300..5a79c22aacf8f9 100644 --- a/deps/icu-small/source/i18n/tznames.cpp +++ b/deps/icu-small/source/i18n/tznames.cpp @@ -87,7 +87,7 @@ static void sweepCache() { const UHashElement* elem; double now = (double)uprv_getUTCtime(); - while ((elem = uhash_nextElement(gTimeZoneNamesCache, &pos))) { + while ((elem = uhash_nextElement(gTimeZoneNamesCache, &pos)) != 0) { TimeZoneNamesCacheEntry *entry = (TimeZoneNamesCacheEntry *)elem->value.pointer; if (entry->refCount <= 0 && (now - entry->lastAccess) > CACHE_EXPIRATION) { // delete this entry diff --git a/deps/icu-small/source/i18n/tznames_impl.cpp b/deps/icu-small/source/i18n/tznames_impl.cpp index ef04b31c1357f4..6a303ea4a0110f 100644 --- a/deps/icu-small/source/i18n/tznames_impl.cpp +++ b/deps/icu-small/source/i18n/tznames_impl.cpp @@ -1285,7 +1285,7 @@ static void mergeTimeZoneKey(const UnicodeString& mzID, char* result) { char mzIdChar[ZID_KEY_MAX + 1]; int32_t keyLen; - int32_t prefixLen = uprv_strlen(gMZPrefix); + int32_t prefixLen = static_cast(uprv_strlen(gMZPrefix)); keyLen = mzID.extract(0, mzID.length(), mzIdChar, ZID_KEY_MAX + 1, US_INV); uprv_memcpy((void *)result, (void *)gMZPrefix, prefixLen); uprv_memcpy((void *)(result + prefixLen), (void *)mzIdChar, keyLen); @@ -1453,7 +1453,7 @@ struct TimeZoneNamesImpl::ZoneStringsLoader : public ResourceSink { virtual ~ZoneStringsLoader(); void* createKey(const char* key, UErrorCode& status) { - int32_t len = sizeof(char) * (uprv_strlen(key) + 1); + int32_t len = sizeof(char) * (static_cast(uprv_strlen(key)) + 1); char* newKey = (char*) uprv_malloc(len); if (newKey == NULL) { status = U_MEMORY_ALLOCATION_ERROR; @@ -1469,7 +1469,7 @@ struct TimeZoneNamesImpl::ZoneStringsLoader : public ResourceSink { } UnicodeString mzIDFromKey(const char* key) { - return UnicodeString(key + MZ_PREFIX_LEN, uprv_strlen(key) - MZ_PREFIX_LEN, US_INV); + return UnicodeString(key + MZ_PREFIX_LEN, static_cast(uprv_strlen(key)) - MZ_PREFIX_LEN, US_INV); } UnicodeString tzIDFromKey(const char* key) { @@ -1944,8 +1944,8 @@ TZDBNameSearchHandler::handleMatch(int32_t matchLength, const CharacterNode *nod // metazone mapping for "CST" is America_Central, // but if region is one of CN/MO/TW, "CST" is parsed // as metazone China (China Standard Time). - for (int32_t i = 0; i < ninfo->nRegions; i++) { - const char *region = ninfo->parseRegions[i]; + for (int32_t j = 0; j < ninfo->nRegions; j++) { + const char *region = ninfo->parseRegions[j]; if (uprv_strcmp(fRegion, region) == 0) { match = ninfo; matchRegion = TRUE; @@ -2059,7 +2059,7 @@ static void U_CALLCONV prepareFind(UErrorCode &status) { const UnicodeString *mzID; StringEnumeration *mzIDs = TimeZoneNamesImpl::_getAvailableMetaZoneIDs(status); if (U_SUCCESS(status)) { - while ((mzID = mzIDs->snext(status)) && U_SUCCESS(status)) { + while ((mzID = mzIDs->snext(status)) != 0 && U_SUCCESS(status)) { const TZDBNames *names = TZDBTimeZoneNames::getMetaZoneNames(*mzID, status); if (U_FAILURE(status)) { break; @@ -2128,7 +2128,7 @@ TZDBTimeZoneNames::TZDBTimeZoneNames(const Locale& locale) : fLocale(locale) { UBool useWorld = TRUE; const char* region = fLocale.getCountry(); - int32_t regionLen = uprv_strlen(region); + int32_t regionLen = static_cast(uprv_strlen(region)); if (regionLen == 0) { UErrorCode status = U_ZERO_ERROR; char loc[ULOC_FULLNAME_CAPACITY]; diff --git a/deps/icu-small/source/i18n/ucln_in.h b/deps/icu-small/source/i18n/ucln_in.h index 318eafc143c968..4c13b9ffcb539a 100644 --- a/deps/icu-small/source/i18n/ucln_in.h +++ b/deps/icu-small/source/i18n/ucln_in.h @@ -32,6 +32,7 @@ typedef enum ECleanupI18NType { UCLN_I18N_SPOOFDATA, UCLN_I18N_TRANSLITERATOR, UCLN_I18N_REGEX, + UCLN_I18N_JAPANESE_CALENDAR, UCLN_I18N_ISLAMIC_CALENDAR, UCLN_I18N_CHINESE_CALENDAR, UCLN_I18N_HEBREW_CALENDAR, @@ -58,6 +59,7 @@ typedef enum ECleanupI18NType { UCLN_I18N_GENDERINFO, UCLN_I18N_CDFINFO, UCLN_I18N_REGION, + UCLN_I18N_LIST_FORMATTER, UCLN_I18N_COUNT /* This must be last */ } ECleanupI18NType; diff --git a/deps/icu-small/source/i18n/ucol_res.cpp b/deps/icu-small/source/i18n/ucol_res.cpp index 76975ecc01de63..56ed5b3c19caca 100644 --- a/deps/icu-small/source/i18n/ucol_res.cpp +++ b/deps/icu-small/source/i18n/ucol_res.cpp @@ -400,11 +400,11 @@ CollationLoader::loadFromData(UErrorCode &errorCode) { // Try to fetch the optional rules string. { UErrorCode internalErrorCode = U_ZERO_ERROR; - int32_t length; - const UChar *s = ures_getStringByKey(data, "Sequence", &length, + int32_t len; + const UChar *s = ures_getStringByKey(data, "Sequence", &len, &internalErrorCode); if(U_SUCCESS(internalErrorCode)) { - t->rules.setTo(TRUE, s, length); + t->rules.setTo(TRUE, s, len); } } @@ -426,10 +426,10 @@ CollationLoader::loadFromData(UErrorCode &errorCode) { LocalUResourceBundlePointer def( ures_getByKeyWithFallback(actualBundle.getAlias(), "collations/default", NULL, &internalErrorCode)); - int32_t length; - const UChar *s = ures_getString(def.getAlias(), &length, &internalErrorCode); - if(U_SUCCESS(internalErrorCode) && length < UPRV_LENGTHOF(defaultType)) { - u_UCharsToChars(s, defaultType, length + 1); + int32_t len; + const UChar *s = ures_getString(def.getAlias(), &len, &internalErrorCode); + if(U_SUCCESS(internalErrorCode) && len < UPRV_LENGTHOF(defaultType)) { + u_UCharsToChars(s, defaultType, len + 1); } else { uprv_strcpy(defaultType, "standard"); } diff --git a/deps/icu-small/source/i18n/udat.cpp b/deps/icu-small/source/i18n/udat.cpp index d086067c034da6..b7d85cc179ef9a 100644 --- a/deps/icu-small/source/i18n/udat.cpp +++ b/deps/icu-small/source/i18n/udat.cpp @@ -603,7 +603,7 @@ udat_getSymbols(const UDateFormat *fmt, } else { return -1; } - int32_t count; + int32_t count = 0; const UnicodeString *res = NULL; switch(type) { diff --git a/deps/icu-small/source/common/ulistformatter.cpp b/deps/icu-small/source/i18n/ulistformatter.cpp similarity index 100% rename from deps/icu-small/source/common/ulistformatter.cpp rename to deps/icu-small/source/i18n/ulistformatter.cpp diff --git a/deps/icu-small/source/i18n/ulocdata.cpp b/deps/icu-small/source/i18n/ulocdata.cpp index 551f6c64ed5668..f651fee6fc4efa 100644 --- a/deps/icu-small/source/i18n/ulocdata.cpp +++ b/deps/icu-small/source/i18n/ulocdata.cpp @@ -372,7 +372,7 @@ ulocdata_getLocaleSeparator(ULocaleData *uld, p1=u_strstr(separator, sub1); if (p0!=NULL && p1!=NULL && p0<=p1) { separator = (const UChar *)p0 + subLen; - len = p1 - separator; + len = static_cast(p1 - separator); /* Desired separator is no longer zero-terminated; handle that if necessary */ if (len < resultCapacity) { u_strncpy(result, separator, len); diff --git a/deps/icu-small/source/i18n/unicode/alphaindex.h b/deps/icu-small/source/i18n/unicode/alphaindex.h index 54bd29ff88668a..4ebdf1cc56a1a8 100644 --- a/deps/icu-small/source/i18n/unicode/alphaindex.h +++ b/deps/icu-small/source/i18n/unicode/alphaindex.h @@ -266,6 +266,8 @@ class U_I18N_API AlphabeticIndex: public UObject { * Use getBucket() to get the bucket's properties. * * @param name the string to be sorted into an index bucket + * @param errorCode Error code, will be set with the reason if the + * operation fails. * @return the bucket number for the name * @stable ICU 51 */ @@ -377,9 +379,10 @@ class U_I18N_API AlphabeticIndex: public UObject { /** - * Get the default label used for abbreviated buckets between other index characters. - * For example, consider the labels when Latin and Greek are used: - * X Y Z ... Α Β Γ. + * Get the default label used for abbreviated buckets *between* other index characters. + * For example, consider the labels when Latin (X Y Z) and Greek (Α Β Γ) are used: + * + * X Y Z ... Α Β Γ. * * @return inflow label * @stable ICU 4.8 @@ -700,6 +703,7 @@ class U_I18N_API AlphabeticIndex: public UObject { /** * A (name, data) pair, to be sorted by name into one of the index buckets. * The user data is not used by the index implementation. + * \cond * @internal */ struct Record: public UMemory { @@ -708,6 +712,7 @@ class U_I18N_API AlphabeticIndex: public UObject { Record(const UnicodeString &name, const void *data); ~Record(); }; + /** \endcond */ #endif /* U_HIDE_INTERNAL_API */ private: diff --git a/deps/icu-small/source/i18n/unicode/calendar.h b/deps/icu-small/source/i18n/unicode/calendar.h index 48021534b422f6..023cf053f254fb 100644 --- a/deps/icu-small/source/i18n/unicode/calendar.h +++ b/deps/icu-small/source/i18n/unicode/calendar.h @@ -52,83 +52,64 @@ typedef int32_t UFieldResolutionTable[12][8]; class BasicTimeZone; /** - * Calendar is an abstract base class for converting between - * a UDate object and a set of integer fields such as - * YEAR, MONTH, DAY, HOUR, - * and so on. (A UDate object represents a specific instant in + * `Calendar` is an abstract base class for converting between + * a `UDate` object and a set of integer fields such as + * `YEAR`, `MONTH`, `DAY`, `HOUR`, and so on. + * (A `UDate` object represents a specific instant in * time with millisecond precision. See UDate - * for information about the UDate class.) + * for information about the `UDate` class.) * - *

    - * Subclasses of Calendar interpret a UDate + * Subclasses of `Calendar` interpret a `UDate` * according to the rules of a specific calendar system. - * The most commonly used subclass of Calendar is - * GregorianCalendar. Other subclasses could represent + * The most commonly used subclass of `Calendar` is + * `GregorianCalendar`. Other subclasses could represent * the various types of lunar calendars in use in many parts of the world. * - *

    - * NOTE: (ICU 2.6) The subclass interface should be considered unstable - * - it WILL change. + * **NOTE**: (ICU 2.6) The subclass interface should be considered unstable - + * it WILL change. * - *

    - * Like other locale-sensitive classes, Calendar provides a - * static method, createInstance, for getting a generally useful - * object of this type. Calendar's createInstance method - * returns the appropriate Calendar subclass whose + * Like other locale-sensitive classes, `Calendar` provides a + * static method, `createInstance`, for getting a generally useful + * object of this type. `Calendar`'s `createInstance` method + * returns the appropriate `Calendar` subclass whose * time fields have been initialized with the current date and time: - * \htmlonly

    \endhtmlonly - *
    - * Calendar *rightNow = Calendar::createInstance(errCode);
    - * 
    - * \htmlonly
    \endhtmlonly * - *

    - * A Calendar object can produce all the time field values + * Calendar *rightNow = Calendar::createInstance(errCode); + * + * A `Calendar` object can produce all the time field values * needed to implement the date-time formatting for a particular language * and calendar style (for example, Japanese-Gregorian, Japanese-Traditional). * - *

    - * When computing a UDate from time fields, some special circumstances + * When computing a `UDate` from time fields, some special circumstances * may arise: there may be insufficient information to compute the - * UDate (such as only year and month but no day in the month), + * `UDate` (such as only year and month but no day in the month), * there may be inconsistent information (such as "Tuesday, July 15, 1996" * -- July 15, 1996 is actually a Monday), or the input time might be ambiguous * because of time zone transition. * - *

    - * Insufficient information. The calendar will use default + * **Insufficient information.** The calendar will use default * information to specify the missing fields. This may vary by calendar; for * the Gregorian calendar, the default for a field is the same as that of the * start of the epoch: i.e., YEAR = 1970, MONTH = JANUARY, DATE = 1, etc. * - *

    - * Inconsistent information. If fields conflict, the calendar + * **Inconsistent information.** If fields conflict, the calendar * will give preference to fields set more recently. For example, when * determining the day, the calendar will look for one of the following * combinations of fields. The most recent combination, as determined by the * most recently set single field, will be used. * - * \htmlonly

    \endhtmlonly - *
    - * MONTH + DAY_OF_MONTH
    - * MONTH + WEEK_OF_MONTH + DAY_OF_WEEK
    - * MONTH + DAY_OF_WEEK_IN_MONTH + DAY_OF_WEEK
    - * DAY_OF_YEAR
    - * DAY_OF_WEEK + WEEK_OF_YEAR
    - * 
    - * \htmlonly
    \endhtmlonly + * MONTH + DAY_OF_MONTH + * MONTH + WEEK_OF_MONTH + DAY_OF_WEEK + * MONTH + DAY_OF_WEEK_IN_MONTH + DAY_OF_WEEK + * DAY_OF_YEAR + * DAY_OF_WEEK + WEEK_OF_YEAR * * For the time of day: * - * \htmlonly
    \endhtmlonly - *
    - * HOUR_OF_DAY
    - * AM_PM + HOUR
    - * 
    - * \htmlonly
    \endhtmlonly + * HOUR_OF_DAY + * AM_PM + HOUR * - *

    - * Ambiguous Wall Clock Time. When time offset from UTC has + * **Ambiguous Wall Clock Time.** When time offset from UTC has * changed, it produces an ambiguous time slot around the transition. For example, * many US locations observe daylight saving time. On the date switching to daylight * saving time in US, wall clock time jumps from 12:59 AM (standard) to 2:00 AM @@ -138,64 +119,66 @@ class BasicTimeZone; * In this example, 1:30 AM is interpreted as 1:30 AM standard time (non-exist), * so the final result will be 2:30 AM daylight time. * - *

    On the date switching back to standard time, wall clock time is moved back one + * On the date switching back to standard time, wall clock time is moved back one * hour at 2:00 AM. So wall clock time from 1:00 AM to 1:59 AM occur twice. In this * case, the ICU Calendar resolves the time using the UTC offset after the transition * by default. For example, 1:30 AM on the date is resolved as 1:30 AM standard time. * - *

    Ambiguous wall clock time resolution behaviors can be customized by Calendar APIs + * Ambiguous wall clock time resolution behaviors can be customized by Calendar APIs * {@link #setRepeatedWallTimeOption} and {@link #setSkippedWallTimeOption}. * These methods are available in ICU 49 or later versions. * - *

    - * Note: for some non-Gregorian calendars, different + * **Note:** for some non-Gregorian calendars, different * fields may be necessary for complete disambiguation. For example, a full - * specification of the historial Arabic astronomical calendar requires year, - * month, day-of-month and day-of-week in some cases. + * specification of the historical Arabic astronomical calendar requires year, + * month, day-of-month *and* day-of-week in some cases. * - *

    - * Note: There are certain possible ambiguities in + * **Note:** There are certain possible ambiguities in * interpretation of certain singular times, which are resolved in the * following ways: - *

      - *
    1. 24:00:00 "belongs" to the following day. That is, - * 23:59 on Dec 31, 1969 < 24:00 on Jan 1, 1970 < 24:01:00 on Jan 1, 1970 * - *
    2. Although historically not precise, midnight also belongs to "am", - * and noon belongs to "pm", so on the same day, - * 12:00 am (midnight) < 12:01 am, and 12:00 pm (noon) < 12:01 pm - *
    + * 1. 24:00:00 "belongs" to the following day. That is, + * 23:59 on Dec 31, 1969 < 24:00 on Jan 1, 1970 < 24:01:00 on Jan 1, 1970 + * 2. Although historically not precise, midnight also belongs to "am", + * and noon belongs to "pm", so on the same day, + * 12:00 am (midnight) < 12:01 am, and 12:00 pm (noon) < 12:01 pm * - *

    * The date or time format strings are not part of the definition of a * calendar, as those must be modifiable or overridable by the user at - * runtime. Use {@link DateFormat} - * to format dates. + * runtime. Use `DateFormat` to format dates. * - *

    - * Calendar provides an API for field "rolling", where fields + * `Calendar` provides an API for field "rolling", where fields * can be incremented or decremented, but wrap around. For example, rolling the - * month up in the date December 12, 1996 results in - * January 12, 1996. + * month up in the date December 12, **1996** results in + * January 12, **1996**. * - *

    - * Calendar also provides a date arithmetic function for + * `Calendar` also provides a date arithmetic function for * adding the specified (signed) amount of time to a particular time field. - * For example, subtracting 5 days from the date September 12, 1996 - * results in September 7, 1996. + * For example, subtracting 5 days from the date `September 12, 1996` + * results in `September 7, 1996`. * - *

    Supported range + * ***Supported range*** * - *

    The allowable range of Calendar has been - * narrowed. GregorianCalendar used to attempt to support - * the range of dates with millisecond values from - * Long.MIN_VALUE to Long.MAX_VALUE. - * The new Calendar protocol specifies the + * The allowable range of `Calendar` has been narrowed. `GregorianCalendar` used + * to attempt to support the range of dates with millisecond values from + * `Long.MIN_VALUE` to `Long.MAX_VALUE`. The new `Calendar` protocol specifies the * maximum range of supportable dates as those having Julian day numbers - * of -0x7F000000 to +0x7F000000. This - * corresponds to years from ~5,800,000 BCE to ~5,800,000 CE. Programmers - * should use the protected constants in Calendar to - * specify an extremely early or extremely late date.

    + * of `-0x7F000000` to `+0x7F000000`. This corresponds to years from ~5,800,000 BCE + * to ~5,800,000 CE. Programmers should use the protected constants in `Calendar` to + * specify an extremely early or extremely late date. + * + *

    + * The Japanese calendar uses a combination of era name and year number. + * When an emperor of Japan abdicates and a new emperor ascends the throne, + * a new era is declared and year number is reset to 1. Even if the date of + * abdication is scheduled ahead of time, the new era name might not be + * announced until just before the date. In such case, ICU4C may include + * a start date of future era without actual era name, but not enabled + * by default. ICU4C users who want to test the behavior of the future era + * can enable the tentative era by: + *

      + *
    • Environment variable ICU_ENABLE_TENTATIVE_ERA=true.
    • + *
    * * @stable ICU 2.0 */ @@ -903,7 +886,7 @@ class U_I18N_API Calendar : public UObject { /** * Sets the behavior for handling wall time repeating multiple times * at negative time zone offset transitions. For example, 1:30 AM on - * November 6, 2011 in US Eastern time (Ameirca/New_York) occurs twice; + * November 6, 2011 in US Eastern time (America/New_York) occurs twice; * 1:30 AM EDT, then 1:30 AM EST one hour later. When UCAL_WALLTIME_FIRST * is used, the wall time 1:30AM in this example will be interpreted as 1:30 AM EDT * (first occurrence). When UCAL_WALLTIME_LAST is used, it will be @@ -1718,9 +1701,7 @@ class U_I18N_API Calendar : public UObject { /** * Validate a single field of this calendar. Subclasses should * override this method to validate any calendar-specific fields. - * Generic fields can be handled by - * Calendar::validateField(). - * @see #validateField(int, int, int, int&) + * Generic fields can be handled by `Calendar::validateField()`. * @internal */ virtual void validateField(UCalendarDateFields field, UErrorCode &status); @@ -2171,7 +2152,7 @@ class U_I18N_API Calendar : public UObject { TimeZone* fZone; /** - * Option for rpeated wall time + * Option for repeated wall time * @see #setRepeatedWallTimeOption */ UCalendarWallTimeOption fRepeatedWallTime; @@ -2456,7 +2437,7 @@ class U_I18N_API Calendar : public UObject { BasicTimeZone* getBasicTimeZone() const; /** - * Find the previous zone transtion near the given time. + * Find the previous zone transition near the given time. * @param base The base time, inclusive * @param transitionTime Receives the result time * @param status The error status diff --git a/deps/icu-small/source/i18n/unicode/coll.h b/deps/icu-small/source/i18n/unicode/coll.h index d03570509ecebb..653434f54ca664 100644 --- a/deps/icu-small/source/i18n/unicode/coll.h +++ b/deps/icu-small/source/i18n/unicode/coll.h @@ -235,16 +235,16 @@ class U_I18N_API Collator : public UObject { * Returns TRUE if "other" is the same as "this". * * The base class implementation returns TRUE if "other" has the same type/class as "this": - * typeid(*this) == typeid(other). + * `typeid(*this) == typeid(other)`. * * Subclass implementations should do something like the following: - *
    -     *   if (this == &other) { return TRUE; }
    -     *   if (!Collator::operator==(other)) { return FALSE; }  // not the same class
          *
    -     *   const MyCollator &o = (const MyCollator&)other;
    -     *   (compare this vs. o's subclass fields)
    -     * 
    + * if (this == &other) { return TRUE; } + * if (!Collator::operator==(other)) { return FALSE; } // not the same class + * + * const MyCollator &o = (const MyCollator&)other; + * (compare this vs. o's subclass fields) + * * @param other Collator object to be compared * @return TRUE if other is the same as this. * @stable ICU 2.0 diff --git a/deps/icu-small/source/i18n/unicode/compactdecimalformat.h b/deps/icu-small/source/i18n/unicode/compactdecimalformat.h index 7dc92f610062f4..9c1e9183f4657b 100644 --- a/deps/icu-small/source/i18n/unicode/compactdecimalformat.h +++ b/deps/icu-small/source/i18n/unicode/compactdecimalformat.h @@ -30,30 +30,31 @@ U_NAMESPACE_BEGIN class PluralRules; /** - *

    IMPORTANT: New users are strongly encouraged to see if + * **IMPORTANT:** New users are strongly encouraged to see if * numberformatter.h fits their use case. Although not deprecated, this header * is provided for backwards compatibility only. - *


    + * + * ----------------------------------------------------------------------------- * * The CompactDecimalFormat produces abbreviated numbers, suitable for display in * environments will limited real estate. For example, 'Hits: 1.2B' instead of * 'Hits: 1,200,000,000'. The format will be appropriate for the given language, * such as "1,2 Mrd." for German. - *

    + * * For numbers under 1000 trillion (under 10^15, such as 123,456,789,012,345), * the result will be short for supported languages. However, the result may * sometimes exceed 7 characters, such as when there are combining marks or thin * characters. In such cases, the visual width in fonts should still be short. - *

    + * * By default, there are 3 significant digits. After creation, if more than * three significant digits are set (with setMaximumSignificantDigits), or if a * fixed number of digits are set (with setMaximumIntegerDigits or * setMaximumFractionDigits), then result may be wider. - *

    + * * At this time, parsing is not supported, and will produce a U_UNSUPPORTED_ERROR. * Resetting the pattern prefixes or suffixes is not supported; the method calls * are ignored. - *

    + * * @stable ICU 51 */ class U_I18N_API CompactDecimalFormat : public DecimalFormat { @@ -61,9 +62,9 @@ class U_I18N_API CompactDecimalFormat : public DecimalFormat { /** * Returns a compact decimal instance for specified locale. - *

    - * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * + * **NOTE:** New users are strongly encouraged to use + * `number::NumberFormatter` instead of NumberFormat. * @param inLocale the given locale. * @param style whether to use short or long style. * @param status error code returned here. diff --git a/deps/icu-small/source/i18n/unicode/currpinf.h b/deps/icu-small/source/i18n/unicode/currpinf.h index 1a327c5bae0033..80b046251323e9 100644 --- a/deps/icu-small/source/i18n/unicode/currpinf.h +++ b/deps/icu-small/source/i18n/unicode/currpinf.h @@ -2,7 +2,7 @@ // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* - * Copyright (C) 2009-2015, International Business Machines Corporation and * + * Copyright (C) 2009-2015, International Business Machines Corporation and * * others. All Rights Reserved. * ******************************************************************************* */ @@ -240,18 +240,27 @@ class U_I18N_API CurrencyPluralInfo : public UObject { /* * The plural rule is used to format currency plural name, * for example: "3.00 US Dollars". - * If there are 3 currency signs in the currency patttern, + * If there are 3 currency signs in the currency pattern, * the 3 currency signs will be replaced by currency plural name. */ PluralRules* fPluralRules; // locale Locale* fLocale; + +private: + /** + * An internal status variable used to indicate that the object is in an 'invalid' state. + * Used by copy constructor, the assignment operator and the clone method. + */ + UErrorCode fInternalStatus; }; inline UBool -CurrencyPluralInfo::operator!=(const CurrencyPluralInfo& info) const { return !operator==(info); } +CurrencyPluralInfo::operator!=(const CurrencyPluralInfo& info) const { + return !operator==(info); +} U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/unicode/currunit.h b/deps/icu-small/source/i18n/unicode/currunit.h index d5bc4aa6d6d482..48cadc10b704af 100644 --- a/deps/icu-small/source/i18n/unicode/currunit.h +++ b/deps/icu-small/source/i18n/unicode/currunit.h @@ -38,7 +38,7 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { public: /** * Default constructor. Initializes currency code to "XXX" (no currency). - * @draft ICU 60 + * @stable ICU 60 */ CurrencyUnit(); @@ -59,17 +59,15 @@ class U_I18N_API CurrencyUnit: public MeasureUnit { */ CurrencyUnit(const CurrencyUnit& other); -#ifndef U_HIDE_DRAFT_API /** * Copy constructor from MeasureUnit. This constructor allows you to * restore a CurrencyUnit that was sliced to MeasureUnit. * * @param measureUnit The MeasureUnit to copy from. * @param ec Set to a failing value if the MeasureUnit is not a currency. - * @draft ICU 60 + * @stable ICU 60 */ CurrencyUnit(const MeasureUnit& measureUnit, UErrorCode &ec); -#endif /* U_HIDE_DRAFT_API */ /** * Assignment operator diff --git a/deps/icu-small/source/i18n/unicode/datefmt.h b/deps/icu-small/source/i18n/unicode/datefmt.h index c895183931546e..13c63d937675ea 100644 --- a/deps/icu-small/source/i18n/unicode/datefmt.h +++ b/deps/icu-small/source/i18n/unicode/datefmt.h @@ -43,13 +43,17 @@ U_NAMESPACE_BEGIN class TimeZone; class DateTimePatternGenerator; -// explicit template instantiation. see digitlst.h -// (When building DLLs for Windows this is required.) -#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +/** + * \cond + * Export an explicit template instantiation. (See digitlst.h, datefmt.h, and others.) + * (When building DLLs for Windows this is required.) + */ +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN) template class U_I18N_API EnumSet; #endif +/** \endcond */ /** * DateFormat is an abstract class for a family of classes that convert dates and diff --git a/deps/icu-small/source/i18n/unicode/dcfmtsym.h b/deps/icu-small/source/i18n/unicode/dcfmtsym.h index 2f824cec3087f5..55e3d8a6b3b5ec 100644 --- a/deps/icu-small/source/i18n/unicode/dcfmtsym.h +++ b/deps/icu-small/source/i18n/unicode/dcfmtsym.h @@ -181,7 +181,6 @@ class U_I18N_API DecimalFormatSymbols : public UObject { */ DecimalFormatSymbols(const Locale& locale, UErrorCode& status); -#ifndef U_HIDE_DRAFT_API /** * Creates a DecimalFormatSymbols instance for the given locale with digits and symbols * corresponding to the given NumberingSystem. @@ -196,10 +195,9 @@ class U_I18N_API DecimalFormatSymbols : public UObject { * @param ns The numbering system. * @param status Input/output parameter, set to success or * failure code upon return. - * @draft ICU 60 + * @stable ICU 60 */ DecimalFormatSymbols(const Locale& locale, const NumberingSystem& ns, UErrorCode& status); -#endif /* U_HIDE_DRAFT_API */ /** * Create a DecimalFormatSymbols object for the default locale. @@ -406,7 +404,7 @@ class U_I18N_API DecimalFormatSymbols : public UObject { * returning a const reference to one of the symbol strings. * The returned reference becomes invalid when the symbol is changed * or when the DecimalFormatSymbols are destroyed. - * Note: moved #ifndef U_HIDE_INTERNAL_API after this, since this is needed for inline in DecimalFormat + * Note: moved \#ifndef U_HIDE_INTERNAL_API after this, since this is needed for inline in DecimalFormat * * This is not currently stable API, but if you think it should be stable, * post a comment on the following ticket and the ICU team will take a look: @@ -531,7 +529,7 @@ inline const UnicodeString& DecimalFormatSymbols::getConstDigitSymbol(int32_t di ENumberFormatSymbol key = static_cast(kOneDigitSymbol + digit - 1); return fSymbols[key]; } -#endif +#endif /* U_HIDE_INTERNAL_API */ // ------------------------------------- diff --git a/deps/icu-small/source/i18n/unicode/decimfmt.h b/deps/icu-small/source/i18n/unicode/decimfmt.h index 3747f510f79c79..b3a5cc0495f144 100644 --- a/deps/icu-small/source/i18n/unicode/decimfmt.h +++ b/deps/icu-small/source/i18n/unicode/decimfmt.h @@ -63,19 +63,22 @@ class NumberParserImpl; } } -// explicit template instantiation. see digitlst.h -// (When building DLLs for Windows this is required.) -#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN +/** + * \cond + * explicit template instantiation. see digitlst.h + * (When building DLLs for Windows this is required.) + */ +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN) template class U_I18N_API EnumSet; #endif +/** \endcond */ /** - *

    IMPORTANT: New users are strongly encouraged to see if + * **IMPORTANT:** New users are strongly encouraged to see if * numberformatter.h fits their use case. Although not deprecated, this header * is provided for backwards compatibility only. - *


    * * DecimalFormat is a concrete subclass of NumberFormat that formats decimal * numbers. It has a variety of features designed to make it possible to parse @@ -85,13 +88,13 @@ template class U_I18N_API EnumSetTo obtain a NumberFormat for a specific locale (including the default + * To obtain a NumberFormat for a specific locale (including the default * locale) call one of NumberFormat's factory methods such as * createInstance(). Do not call the DecimalFormat constructors directly, unless * you know what you are doing, since the NumberFormat factory methods may * return subclasses other than DecimalFormat. * - *

    Example Usage + * **Example Usage** * * \code * // Normally we would have a GUI with a menu for this @@ -135,11 +138,11 @@ template class U_I18N_API EnumSet - * Another example use createInstance(style) - *

    - *

    - * // Print out a number using the localized number, currency,
    + *
    + * **Another example use createInstance(style)**
    + *
    + * \code
    + * // Print out a number using the localized number, currency,
      * // percent, scientific, integer, iso currency, and plural currency
      * // format for each locale
      * Locale* locale = new Locale("en", "US");
    @@ -150,11 +153,13 @@ template class U_I18N_API    EnumSetformat(myNumber, str) << endl;
      *     format->parse(form->format(myNumber, str), fmtable, success);
    - * }
    + * delete form; + * } + * \endcode * * *

    Patterns @@ -690,7 +695,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { * locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of DecimalFormat. + * #icu::number::NumberFormatter instead of DecimalFormat. * @param status Output param set to success/failure code. If the * pattern is invalid this will be set to a failure code. * @stable ICU 2.0 @@ -708,7 +713,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { * locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of DecimalFormat. + * #icu::number::NumberFormatter instead of DecimalFormat. * @param pattern A non-localized pattern string. * @param status Output param set to success/failure code. If the * pattern is invalid this will be set to a failure code. @@ -728,7 +733,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { * a NumberFormat factory method. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of DecimalFormat. + * #icu::number::NumberFormatter instead of DecimalFormat. * * @param pattern a non-localized pattern string * @param symbolsToAdopt the set of symbols to be used. The caller should not @@ -782,7 +787,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { * May return U_UNSUPPORTED_ERROR if this instance does not support * the specified attribute. * @param attr the attribute to set - * @param newvalue new value + * @param newValue new value * @param status the error type * @return *this - for chaining (example: format.setAttribute(...).setAttribute(...) ) * @stable ICU 51 @@ -839,7 +844,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { * a NumberFormat factory method. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of DecimalFormat. + * #icu::number::NumberFormatter instead of DecimalFormat. * * @param pattern a non-localized pattern string * @param symbolsToAdopt the set of symbols to be used. The caller should not @@ -864,7 +869,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { * a NumberFormat factory method. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of DecimalFormat. + * #icu::number::NumberFormatter instead of DecimalFormat. * * @param pattern a non-localized pattern string * @param symbols the set of symbols to be used @@ -986,6 +991,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { * Result is appended to existing contents. * @param pos On input: an alignment field, if desired. * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. * @return Reference to 'appendTo' parameter. * @internal */ @@ -1031,6 +1037,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { * Result is appended to existing contents. * @param pos On input: an alignment field, if desired. * On output: the offsets of the alignment field. + * @param status Output param filled with success/failure status. * @return Reference to 'appendTo' parameter. * @internal */ @@ -1726,7 +1733,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { virtual void setDecimalPatternMatchRequired(UBool newValue); /** - * {@icu} Returns whether to ignore exponents when parsing. + * Returns whether to ignore exponents when parsing. * * @see #setParseNoExponent * @internal This API is a technical preview. It may change in an upcoming release. @@ -1734,7 +1741,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { virtual UBool isParseNoExponent() const; /** - * {@icu} Specifies whether to stop parsing when an exponent separator is encountered. For + * Specifies whether to stop parsing when an exponent separator is encountered. For * example, parses "123E4" to 123 (with parse position 3) instead of 1230000 (with parse position * 5). * @@ -1744,7 +1751,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { virtual void setParseNoExponent(UBool value); /** - * {@icu} Returns whether parsing is sensitive to case (lowercase/uppercase). + * Returns whether parsing is sensitive to case (lowercase/uppercase). * * @see #setParseCaseSensitive * @internal This API is a technical preview. It may change in an upcoming release. @@ -1752,7 +1759,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { virtual UBool isParseCaseSensitive() const; /** - * {@icu} Whether to pay attention to case when parsing; default is to ignore case (perform + * Whether to pay attention to case when parsing; default is to ignore case (perform * case-folding). For example, "A" == "a" in case-insensitive but not case-sensitive mode. * * Currency symbols are never case-folded. For example, "us$1.00" will not parse in case-insensitive @@ -1763,7 +1770,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { virtual void setParseCaseSensitive(UBool value); /** - * {@icu} Returns whether truncation of high-order integer digits should result in an error. + * Returns whether truncation of high-order integer digits should result in an error. * By default, setMaximumIntegerDigits truncates high-order digits silently. * * @see setFormatFailIfMoreThanMaxDigits @@ -1772,7 +1779,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { virtual UBool isFormatFailIfMoreThanMaxDigits() const; /** - * {@icu} Sets whether truncation of high-order integer digits should result in an error. + * Sets whether truncation of high-order integer digits should result in an error. * By default, setMaximumIntegerDigits truncates high-order digits silently. * * @internal This API is a technical preview. It may change in an upcoming release. @@ -2017,16 +2024,17 @@ class U_I18N_API DecimalFormat : public NumberFormat { virtual void setCurrency(const char16_t* theCurrency); /** - * Sets the Currency Context object used to display currency. + * Sets the `Currency Usage` object used to display currency. * This takes effect immediately, if this format is a * currency format. - * @param currencyContext new currency context object to use. + * @param newUsage new currency usage object to use. + * @param ec input-output error code * @stable ICU 54 */ void setCurrencyUsage(UCurrencyUsage newUsage, UErrorCode* ec); /** - * Returns the Currency Context object used to display currency + * Returns the `Currency Usage` object used to display currency * @stable ICU 54 */ UCurrencyUsage getCurrencyUsage() const; @@ -2050,7 +2058,7 @@ class U_I18N_API DecimalFormat : public NumberFormat { void formatToDecimalQuantity(const Formattable& number, number::impl::DecimalQuantity& output, UErrorCode& status) const; -#endif +#endif /* U_HIDE_INTERNAL_API */ #ifndef U_HIDE_DRAFT_API /** @@ -2072,7 +2080,6 @@ class U_I18N_API DecimalFormat : public NumberFormat { * FormattedNumber result = df->toNumberFormatter().formatDouble(123, status); * * - * @param output The variable into which to store the LocalizedNumberFormatter. * @return The output variable, for chaining. * @draft ICU 62 */ diff --git a/deps/icu-small/source/i18n/unicode/dtptngen.h b/deps/icu-small/source/i18n/unicode/dtptngen.h index feb465e7997401..26ccc64060f15a 100644 --- a/deps/icu-small/source/i18n/unicode/dtptngen.h +++ b/deps/icu-small/source/i18n/unicode/dtptngen.h @@ -498,27 +498,23 @@ class U_I18N_API DateTimePatternGenerator : public UObject { private: /** * Constructor. - * @stable ICU 3.8 */ DateTimePatternGenerator(UErrorCode & status); /** * Constructor. - * @stable ICU 3.8 */ DateTimePatternGenerator(const Locale& locale, UErrorCode & status); /** * Copy constructor. * @param other DateTimePatternGenerator to copy - * @stable ICU 3.8 */ DateTimePatternGenerator(const DateTimePatternGenerator& other); /** * Default assignment operator. * @param other DateTimePatternGenerator to copy - * @stable ICU 3.8 */ DateTimePatternGenerator& operator=(const DateTimePatternGenerator& other); @@ -542,6 +538,11 @@ class U_I18N_API DateTimePatternGenerator : public UObject { int32_t fAllowedHourFormats[7]; // Actually an array of AllowedHourFormat enum type, ending with UNKNOWN. + // Internal error code used for recording/reporting errors that occur during methods that do not + // have a UErrorCode parameter. For example: the Copy Constructor, or the ::clone() method. + // When this is set to an error the object is in an invalid state. + UErrorCode internalErrorCode; + /* internal flags masks for adjustFieldTypes etc. */ enum { kDTPGNoFlags = 0, @@ -569,11 +570,10 @@ class U_I18N_API DateTimePatternGenerator : public UObject { #endif // U_HIDE_DRAFT_API void getAppendName(UDateTimePatternField field, UnicodeString& value); UnicodeString mapSkeletonMetacharacters(const UnicodeString& patternForm, int32_t* flags, UErrorCode& status); - int32_t getCanonicalIndex(const UnicodeString& field); - const UnicodeString* getBestRaw(DateTimeMatcher& source, int32_t includeMask, DistanceInfo* missingFields, const PtnSkeleton** specifiedSkeletonPtr = 0); + const UnicodeString* getBestRaw(DateTimeMatcher& source, int32_t includeMask, DistanceInfo* missingFields, UErrorCode& status, const PtnSkeleton** specifiedSkeletonPtr = 0); UnicodeString adjustFieldTypes(const UnicodeString& pattern, const PtnSkeleton* specifiedSkeleton, int32_t flags, UDateTimePatternMatchOptions options = UDATPG_MATCH_NO_OPTIONS); - UnicodeString getBestAppending(int32_t missingFields, int32_t flags, UDateTimePatternMatchOptions options = UDATPG_MATCH_NO_OPTIONS); - int32_t getTopBitNumber(int32_t foundMask); + UnicodeString getBestAppending(int32_t missingFields, int32_t flags, UErrorCode& status, UDateTimePatternMatchOptions options = UDATPG_MATCH_NO_OPTIONS); + int32_t getTopBitNumber(int32_t foundMask) const; void setAvailableFormat(const UnicodeString &key, UErrorCode& status); UBool isAvailableFormatSet(const UnicodeString &key) const; void copyHashtable(Hashtable *other, UErrorCode &status); diff --git a/deps/icu-small/source/i18n/unicode/fmtable.h b/deps/icu-small/source/i18n/unicode/fmtable.h index 2359b61d46186e..a06c23dc3bd532 100644 --- a/deps/icu-small/source/i18n/unicode/fmtable.h +++ b/deps/icu-small/source/i18n/unicode/fmtable.h @@ -658,7 +658,7 @@ class U_I18N_API Formattable : public UObject { /** * Adopt, and set value from, a DecimalQuantity * Internal Function, do not use. - * @param dl the DecimalQuantity to be adopted + * @param dq the DecimalQuantity to be adopted * @internal */ void adoptDecimalQuantity(number::impl::DecimalQuantity *dq); diff --git a/deps/icu-small/source/i18n/unicode/gender.h b/deps/icu-small/source/i18n/unicode/gender.h index 467b64ec5ebad0..b7c31cb554a696 100644 --- a/deps/icu-small/source/i18n/unicode/gender.h +++ b/deps/icu-small/source/i18n/unicode/gender.h @@ -18,6 +18,11 @@ #ifndef _GENDER #define _GENDER +/** + * \file + * \brief C++ API: GenderInfo computes the gender of a list. + */ + #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING @@ -30,7 +35,7 @@ class GenderInfoTest; U_NAMESPACE_BEGIN -// Forward Declaration +/** \internal Forward Declaration */ void U_CALLCONV GenderInfo_initCache(UErrorCode &status); /** diff --git a/deps/icu-small/source/common/unicode/listformatter.h b/deps/icu-small/source/i18n/unicode/listformatter.h similarity index 79% rename from deps/icu-small/source/common/unicode/listformatter.h rename to deps/icu-small/source/i18n/unicode/listformatter.h index 180fbcb5cde5b7..5e36cf71cc54b7 100644 --- a/deps/icu-small/source/common/unicode/listformatter.h +++ b/deps/icu-small/source/i18n/unicode/listformatter.h @@ -26,6 +26,9 @@ U_NAMESPACE_BEGIN +class FieldPositionIterator; +class FieldPositionHandler; + /** @internal */ class Hashtable; @@ -33,7 +36,10 @@ class Hashtable; struct ListFormatInternal; /* The following can't be #ifndef U_HIDE_INTERNAL_API, needed for other .h file declarations */ -/** @internal */ +/** + * @internal + * \cond + */ struct ListFormatData : public UMemory { UnicodeString twoPattern; UnicodeString startPattern; @@ -43,6 +49,7 @@ struct ListFormatData : public UMemory { ListFormatData(const UnicodeString& two, const UnicodeString& start, const UnicodeString& middle, const UnicodeString& end) : twoPattern(two), startPattern(start), middlePattern(middle), endPattern(end) {} }; +/** \endcond */ /** @@ -61,7 +68,7 @@ struct ListFormatData : public UMemory { * The ListFormatter class is not intended for public subclassing. * @stable ICU 50 */ -class U_COMMON_API ListFormatter : public UObject{ +class U_I18N_API ListFormatter : public UObject{ public: @@ -133,6 +140,27 @@ class U_COMMON_API ListFormatter : public UObject{ UnicodeString& format(const UnicodeString items[], int32_t n_items, UnicodeString& appendTo, UErrorCode& errorCode) const; +#ifndef U_HIDE_DRAFT_API + /** + * Format a list of strings. + * + * @param items An array of strings to be combined and formatted. + * @param n_items Length of the array items. + * @param appendTo The string to which the formatted result will be + * appended. + * @param posIter On return, can be used to iterate over positions of + * fields generated by this format call. Field values are + * defined in UListFormatterField. Can be NULL. + * @param errorCode ICU error code returned here. + * @return Formatted string combining the elements of items, + * appended to appendTo. + * @draft ICU 63 + */ + UnicodeString& format(const UnicodeString items[], int32_t n_items, + UnicodeString & appendTo, FieldPositionIterator* posIter, + UErrorCode& errorCode) const; +#endif /* U_HIDE_DRAFT_API */ + #ifndef U_HIDE_INTERNAL_API /** @internal for MeasureFormat @@ -160,6 +188,10 @@ class U_COMMON_API ListFormatter : public UObject{ struct ListPatternsSink; static ListFormatInternal* loadListFormatInternal(const Locale& locale, const char* style, UErrorCode& errorCode); + UnicodeString& format_( + const UnicodeString items[], int32_t n_items, UnicodeString& appendTo, + int32_t index, int32_t &offset, FieldPositionHandler* handler, UErrorCode& errorCode) const; + ListFormatter(); ListFormatInternal* owned; diff --git a/deps/icu-small/source/i18n/unicode/measfmt.h b/deps/icu-small/source/i18n/unicode/measfmt.h index 14399dd59a700a..bbdd2364bdd997 100644 --- a/deps/icu-small/source/i18n/unicode/measfmt.h +++ b/deps/icu-small/source/i18n/unicode/measfmt.h @@ -104,7 +104,7 @@ class U_I18N_API MeasureFormat : public Format { * Constructor. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @stable ICU 53 */ MeasureFormat( @@ -114,7 +114,7 @@ class U_I18N_API MeasureFormat : public Format { * Constructor. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @stable ICU 53 */ MeasureFormat( @@ -202,7 +202,7 @@ class U_I18N_API MeasureFormat : public Format { * formatted string is 3.5 meters per second. * @param measure The measure object. In above example, 3.5 meters. * @param perUnit The per unit. In above example, it is - * *MeasureUnit::createSecond(status). + * `*%MeasureUnit::createSecond(status)`. * @param appendTo formatted string appended here. * @param pos the field position. * @param status the error. @@ -223,7 +223,7 @@ class U_I18N_API MeasureFormat : public Format { * @param unit The unit for which to get a display name. * @param status the error. * @return The display name in the locale and width specified in - * {@link MeasureFormat#getInstance}, or null if there is no display name available + * the MeasureFormat constructor, or null if there is no display name available * for the specified unit. * * @stable ICU 58 @@ -236,7 +236,7 @@ class U_I18N_API MeasureFormat : public Format { * locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @param locale desired locale * @param ec input-output error code * @return a formatter object, or NULL upon error @@ -250,7 +250,7 @@ class U_I18N_API MeasureFormat : public Format { * locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @param ec input-output error code * @return a formatter object, or NULL upon error * @stable ICU 3.0 @@ -348,7 +348,7 @@ class U_I18N_API MeasureFormat : public Format { const MeasureFormatCacheData *cache; const SharedNumberFormat *numberFormat; const SharedPluralRules *pluralRules; - UMeasureFormatWidth width; + UMeasureFormatWidth fWidth; // Declared outside of MeasureFormatSharedData because ListFormatter // objects are relatively cheap to copy; therefore, they don't need to be diff --git a/deps/icu-small/source/i18n/unicode/measunit.h b/deps/icu-small/source/i18n/unicode/measunit.h index f552253544f890..676fdeb9c8cd8a 100644 --- a/deps/icu-small/source/i18n/unicode/measunit.h +++ b/deps/icu-small/source/i18n/unicode/measunit.h @@ -368,6 +368,26 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit *createPartPerMillion(UErrorCode &status); +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of concentr: percent. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 63 + */ + static MeasureUnit *createPercent(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of concentr: permille. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 63 + */ + static MeasureUnit *createPermille(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + /** * Returns unit of consumption: liter-per-100kilometers. * Caller owns returned value and must free it. @@ -464,6 +484,16 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit *createMegabyte(UErrorCode &status); +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of digital: petabyte. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 63 + */ + static MeasureUnit *createPetabyte(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + /** * Returns unit of digital: terabit. * Caller owns returned value and must free it. @@ -984,6 +1014,16 @@ class U_I18N_API MeasureUnit: public UObject { */ static MeasureUnit *createWatt(UErrorCode &status); +#ifndef U_HIDE_DRAFT_API + /** + * Returns unit of pressure: atmosphere. + * Caller owns returned value and must free it. + * @param status ICU error code. + * @draft ICU 63 + */ + static MeasureUnit *createAtmosphere(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ + /** * Returns unit of pressure: hectopascal. * Caller owns returned value and must free it. diff --git a/deps/icu-small/source/i18n/unicode/msgfmt.h b/deps/icu-small/source/i18n/unicode/msgfmt.h index fef80107747bf8..074d93354000ef 100644 --- a/deps/icu-small/source/i18n/unicode/msgfmt.h +++ b/deps/icu-small/source/i18n/unicode/msgfmt.h @@ -124,7 +124,7 @@ class NumberFormat; * argNumber = '0' | ('1'..'9' ('0'..'9')*) * * argType = "number" | "date" | "time" | "spellout" | "ordinal" | "duration" - * argStyle = "short" | "medium" | "long" | "full" | "integer" | "currency" | "percent" | argStyleText + * argStyle = "short" | "medium" | "long" | "full" | "integer" | "currency" | "percent" | argStyleText | "::" argSkeletonText * * *

      @@ -166,7 +166,7 @@ class NumberFormat; * (none) * null * - * number + * number * (none) * NumberFormat.createInstance(getLocale(), status) * @@ -182,6 +182,9 @@ class NumberFormat; * argStyleText * new DecimalFormat(argStyleText, new DecimalFormatSymbols(getLocale(), status), status) * + * argSkeletonText + * NumberFormatter::forSkeleton(argSkeletonText, status).locale(getLocale()).toFormat(status) + * * date * (none) * DateFormat.createDateInstance(kDefault, getLocale(), status) @@ -199,7 +202,7 @@ class NumberFormat; * DateFormat.createDateInstance(kFull, getLocale(), status) * * argStyleText - * new SimpleDateFormat(argStyleText, getLocale(), status) + * new SimpleDateFormat(argStyleText, getLocale(), status) * * time * (none) @@ -218,7 +221,7 @@ class NumberFormat; * DateFormat.createTimeInstance(kFull, getLocale(), status) * * argStyleText - * new SimpleDateFormat(argStyleText, getLocale(), status) + * new SimpleDateFormat(argStyleText, getLocale(), status) * * spellout * argStyleText (optional) diff --git a/deps/icu-small/source/i18n/unicode/numberformatter.h b/deps/icu-small/source/i18n/unicode/numberformatter.h index 3ab08319f73bc7..469949a2878eb4 100644 --- a/deps/icu-small/source/i18n/unicode/numberformatter.h +++ b/deps/icu-small/source/i18n/unicode/numberformatter.h @@ -144,11 +144,31 @@ class MultiplierFormatHandler; class CurrencySymbols; class GeneratorHelpers; class DecNum; +class NumberRangeFormatterImpl; +struct RangeMacroProps; + +/** + * Used for NumberRangeFormatter and implemented in numrange_fluent.cpp. + * Declared here so it can be friended. + * + * @internal + */ +void touchRangeLocales(impl::RangeMacroProps& macros); } // namespace impl -// Reserve extra names in case they are added as classes in the future: +/** + * Extra name reserved in case it is needed in the future. + * + * @draft ICU 63 + */ typedef Notation CompactNotation; + +/** + * Extra name reserved in case it is needed in the future. + * + * @draft ICU 63 + */ typedef Notation SimpleNotation; /** @@ -308,10 +328,15 @@ class U_I18N_API Notation : public UMemory { union NotationUnion { // For NTN_SCIENTIFIC + /** @internal */ struct ScientificSettings { + /** @internal */ int8_t fEngineeringInterval; + /** @internal */ bool fRequireMinInt; + /** @internal */ impl::digits_t fMinExponentDigits; + /** @internal */ UNumberSignDisplay fExponentSignDisplay; } scientific; @@ -407,15 +432,39 @@ class U_I18N_API ScientificNotation : public Notation { friend class impl::NumberPropertyMapper; }; -// Reserve extra names in case they are added as classes in the future: +/** + * Extra name reserved in case it is needed in the future. + * + * @draft ICU 63 + */ typedef Precision SignificantDigitsPrecision; // Typedefs for ICU 60/61 compatibility. // These will be removed in ICU 64. // See http://bugs.icu-project.org/trac/ticket/13746 + +/** + * This will be removed in ICU 64. See ICU-13746. + * @deprecated ICU 63 + */ typedef Precision Rounder; + +/** + * This will be removed in ICU 64. See ICU-13746. + * @deprecated ICU 63 + */ typedef FractionPrecision FractionRounder; + +/** + * This will be removed in ICU 64. See ICU-13746. + * @deprecated ICU 63 + */ typedef IncrementPrecision IncrementRounder; + +/** + * This will be removed in ICU 64. See ICU-13746. + * @deprecated ICU 63 + */ typedef CurrencyPrecision CurrencyRounder; /** @@ -672,16 +721,25 @@ class U_I18N_API Precision : public UMemory { } fType; union PrecisionUnion { + /** @internal */ struct FractionSignificantSettings { // For RND_FRACTION, RND_SIGNIFICANT, and RND_FRACTION_SIGNIFICANT + /** @internal */ impl::digits_t fMinFrac; + /** @internal */ impl::digits_t fMaxFrac; + /** @internal */ impl::digits_t fMinSig; + /** @internal */ impl::digits_t fMaxSig; } fracSig; + /** @internal */ struct IncrementSettings { + /** @internal */ double fIncrement; + /** @internal */ impl::digits_t fMinFrac; + /** @internal */ impl::digits_t fMaxFrac; } increment; // For RND_INCREMENT UCurrencyUsage currencyUsage; // For RND_CURRENCY @@ -1205,7 +1263,7 @@ class U_I18N_API Grouper : public UMemory { public: #ifndef U_HIDE_INTERNAL_API /** @internal */ - static Grouper forStrategy(UGroupingStrategy grouping); + static Grouper forStrategy(UNumberGroupingStrategy grouping); /** * Resolve the values in Properties to a Grouper object. @@ -1216,7 +1274,7 @@ class U_I18N_API Grouper : public UMemory { // Future: static Grouper forProperties(DecimalFormatProperties& properties); /** @internal */ - Grouper(int16_t grouping1, int16_t grouping2, int16_t minGrouping, UGroupingStrategy strategy) + Grouper(int16_t grouping1, int16_t grouping2, int16_t minGrouping, UNumberGroupingStrategy strategy) : fGrouping1(grouping1), fGrouping2(grouping2), fMinGrouping(minGrouping), @@ -1251,10 +1309,10 @@ class U_I18N_API Grouper : public UMemory { int16_t fMinGrouping; /** - * The UGroupingStrategy that was used to create this Grouper, or UNUM_GROUPING_COUNT if this - * was not created from a UGroupingStrategy. + * The UNumberGroupingStrategy that was used to create this Grouper, or UNUM_GROUPING_COUNT if this + * was not created from a UNumberGroupingStrategy. */ - UGroupingStrategy fStrategy; + UNumberGroupingStrategy fStrategy; Grouper() : fGrouping1(-3) {}; @@ -1423,7 +1481,8 @@ struct U_I18N_API MacroProps : public UMemory { /** * An abstract base class for specifying settings related to number formatting. This class is implemented by - * {@link UnlocalizedNumberFormatter} and {@link LocalizedNumberFormatter}. + * {@link UnlocalizedNumberFormatter} and {@link LocalizedNumberFormatter}. This class is not intended for + * public subclassing. */ template class U_I18N_API NumberFormatterSettings { @@ -1710,7 +1769,7 @@ class U_I18N_API NumberFormatterSettings { * The exact grouping widths will be chosen based on the locale. * *

      - * Pass this method an element from the {@link UGroupingStrategy} enum. For example: + * Pass this method an element from the {@link UNumberGroupingStrategy} enum. For example: * *

            * NumberFormatter::with().grouping(UNUM_GROUPING_MIN2)
      @@ -1724,7 +1783,7 @@ class U_I18N_API NumberFormatterSettings {
            * @return The fluent chain.
            * @draft ICU 61
            */
      -    Derived grouping(UGroupingStrategy strategy) const &;
      +    Derived grouping(UNumberGroupingStrategy strategy) const &;
       
           /**
            * Overload of grouping() for use on an rvalue reference.
      @@ -1733,10 +1792,9 @@ class U_I18N_API NumberFormatterSettings {
            *            The grouping strategy to use.
            * @return The fluent chain.
            * @see #grouping
      -     * @provisional This API might change or be removed in a future release.
            * @draft ICU 62
            */
      -    Derived grouping(UGroupingStrategy strategy) &&;
      +    Derived grouping(UNumberGroupingStrategy strategy) &&;
       
           /**
            * Specifies the minimum and maximum number of digits to render before the decimal mark.
      @@ -1748,7 +1806,7 @@ class U_I18N_API NumberFormatterSettings {
            * 
    * *

    - * Pass this method the return value of {@link IntegerWidth#zeroFillTo(int)}. For example: + * Pass this method the return value of {@link IntegerWidth#zeroFillTo}. For example: * *

          * NumberFormatter::with().integerWidth(IntegerWidth::zeroFillTo(2))
    @@ -2099,15 +2157,18 @@ class U_I18N_API NumberFormatterSettings {
     
         // NOTE: Uses default copy and move constructors.
     
    -  protected:
    +  private:
         impl::MacroProps fMacros;
     
    -  private:
         // Don't construct me directly!  Use (Un)LocalizedNumberFormatter.
         NumberFormatterSettings() = default;
     
         friend class LocalizedNumberFormatter;
         friend class UnlocalizedNumberFormatter;
    +
    +    // Give NumberRangeFormatter access to the MacroProps
    +    friend void impl::touchRangeLocales(impl::RangeMacroProps& macros);
    +    friend class impl::NumberRangeFormatterImpl;
     };
     
     /**
    @@ -2124,13 +2185,6 @@ class U_I18N_API UnlocalizedNumberFormatter
          * Associate the given locale with the number formatter. The locale is used for picking the appropriate symbols,
          * formats, and other data for number display.
          *
    -     * 

    - * To use the Java default locale, call Locale::getDefault(): - * - *

    -     * NumberFormatter::with(). ... .locale(Locale::getDefault())
    -     * 
    - * * @param locale * The locale to use when loading data for number formatting. * @return The fluent chain. @@ -2156,7 +2210,6 @@ class U_I18N_API UnlocalizedNumberFormatter */ UnlocalizedNumberFormatter() = default; - // Make default copy constructor call the NumberFormatterSettings copy constructor. /** * Returns a copy of this UnlocalizedNumberFormatter. * @draft ICU 60 @@ -2271,7 +2324,7 @@ class U_I18N_API LocalizedNumberFormatter */ int32_t getCallCount() const; -#endif +#endif /* U_HIDE_INTERNAL_API */ /** * Creates a representation of this LocalizedNumberFormat as an icu::Format, enabling the use @@ -2295,7 +2348,6 @@ class U_I18N_API LocalizedNumberFormatter */ LocalizedNumberFormatter() = default; - // Make default copy constructor call the NumberFormatterSettings copy constructor. /** * Returns a copy of this LocalizedNumberFormatter. * @draft ICU 60 @@ -2333,11 +2385,12 @@ class U_I18N_API LocalizedNumberFormatter * * @param results * The results object. This method will mutate it to save the results. + * @param status * @internal */ void formatImpl(impl::UFormattedNumberData *results, UErrorCode &status) const; -#endif +#endif /* U_HIDE_INTERNAL_API */ /** * Destruct this LocalizedNumberFormatter, cleaning up any memory it might own. @@ -2359,6 +2412,8 @@ class U_I18N_API LocalizedNumberFormatter LocalizedNumberFormatter(impl::MacroProps &¯os, const Locale &locale); + void clear(); + void lnfMoveHelper(LocalizedNumberFormatter&& src); /** @@ -2430,7 +2485,7 @@ class U_I18N_API FormattedNumber : public UMemory { * @draft ICU 62 * @see Appendable */ - Appendable &appendTo(Appendable &appendable, UErrorCode& status); + Appendable &appendTo(Appendable &appendable, UErrorCode& status) const; #ifndef U_HIDE_DEPRECATED_API /** @@ -2457,9 +2512,9 @@ class U_I18N_API FormattedNumber : public UMemory { #endif /* U_HIDE_DEPRECATED_API */ /** - * Determines the start and end indices of the next occurrence of the given field in the - * output string. This allows you to determine the locations of, for example, the integer part, - * fraction part, or symbols. + * Determines the start (inclusive) and end (exclusive) indices of the next occurrence of the given + * field in the output string. This allows you to determine the locations of, for example, + * the integer part, fraction part, or symbols. * * If a field occurs just once, calling this method will find that occurrence and return it. If a * field occurs multiple times, this method may be called repeatedly with the following pattern: @@ -2478,7 +2533,7 @@ class U_I18N_API FormattedNumber : public UMemory { * Input+output variable. On input, the "field" property determines which field to look * up, and the "beginIndex" and "endIndex" properties determine where to begin the search. * On output, the "beginIndex" is set to the beginning of the first occurrence of the - * field with either begin or end indices after the input indices, "endIndex" is set to + * field with either begin or end indices after the input indices; "endIndex" is set to * the end of that occurrence of the field (exclusive index). If a field position is not * found, the method returns FALSE and the FieldPosition may or may not be changed. * @param status @@ -2537,7 +2592,7 @@ class U_I18N_API FormattedNumber : public UMemory { */ void getAllFieldPositionsImpl(FieldPositionIteratorHandler& fpih, UErrorCode& status) const; -#endif +#endif /* U_HIDE_INTERNAL_API */ /** * Copying not supported; use move constructor instead. diff --git a/deps/icu-small/source/i18n/unicode/numberrangeformatter.h b/deps/icu-small/source/i18n/unicode/numberrangeformatter.h new file mode 100644 index 00000000000000..3e6248d934de61 --- /dev/null +++ b/deps/icu-small/source/i18n/unicode/numberrangeformatter.h @@ -0,0 +1,866 @@ +// © 2018 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +#if !UCONFIG_NO_FORMATTING +#ifndef __NUMBERRANGEFORMATTER_H__ +#define __NUMBERRANGEFORMATTER_H__ + +#include +#include "unicode/appendable.h" +#include "unicode/fieldpos.h" +#include "unicode/fpositer.h" +#include "unicode/numberformatter.h" + +#ifndef U_HIDE_DRAFT_API + +/** + * \file + * \brief C++ API: Library for localized formatting of number, currency, and unit ranges. + * + * The main entrypoint to the formatting of ranges of numbers, including currencies and other units of measurement. + *

    + * Usage example: + *

    + *

    + * NumberRangeFormatter::with()
    + *     .identityFallback(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE)
    + *     .numberFormatterFirst(NumberFormatter::with().adoptUnit(MeasureUnit::createMeter()))
    + *     .numberFormatterSecond(NumberFormatter::with().adoptUnit(MeasureUnit::createKilometer()))
    + *     .locale("en-GB")
    + *     .formatRange(750, 1.2, status)
    + *     .toString(status);
    + * // => "750 m - 1.2 km"
    + * 
    + *

    + * Like NumberFormatter, NumberRangeFormatter instances are immutable and thread-safe. This API is based on the + * fluent design pattern popularized by libraries such as Google's Guava. + * + * @author Shane Carr + */ + + +/** + * Defines how to merge fields that are identical across the range sign. + * + * @draft ICU 63 + */ +typedef enum UNumberRangeCollapse { + /** + * Use locale data and heuristics to determine how much of the string to collapse. Could end up collapsing none, + * some, or all repeated pieces in a locale-sensitive way. + * + * The heuristics used for this option are subject to change over time. + * + * @draft ICU 63 + */ + UNUM_RANGE_COLLAPSE_AUTO, + + /** + * Do not collapse any part of the number. Example: "3.2 thousand kilograms – 5.3 thousand kilograms" + * + * @draft ICU 63 + */ + UNUM_RANGE_COLLAPSE_NONE, + + /** + * Collapse the unit part of the number, but not the notation, if present. Example: "3.2 thousand – 5.3 thousand + * kilograms" + * + * @draft ICU 63 + */ + UNUM_RANGE_COLLAPSE_UNIT, + + /** + * Collapse any field that is equal across the range sign. May introduce ambiguity on the magnitude of the + * number. Example: "3.2 – 5.3 thousand kilograms" + * + * @draft ICU 63 + */ + UNUM_RANGE_COLLAPSE_ALL +} UNumberRangeCollapse; + +/** + * Defines the behavior when the two numbers in the range are identical after rounding. To programmatically detect + * when the identity fallback is used, compare the lower and upper BigDecimals via FormattedNumber. + * + * @draft ICU 63 + * @see NumberRangeFormatter + */ +typedef enum UNumberRangeIdentityFallback { + /** + * Show the number as a single value rather than a range. Example: "$5" + * + * @draft ICU 63 + */ + UNUM_IDENTITY_FALLBACK_SINGLE_VALUE, + + /** + * Show the number using a locale-sensitive approximation pattern. If the numbers were the same before rounding, + * show the single value. Example: "~$5" or "$5" + * + * @draft ICU 63 + */ + UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE, + + /** + * Show the number using a locale-sensitive approximation pattern. Use the range pattern always, even if the + * inputs are the same. Example: "~$5" + * + * @draft ICU 63 + */ + UNUM_IDENTITY_FALLBACK_APPROXIMATELY, + + /** + * Show the number as the range of two equal values. Use the range pattern always, even if the inputs are the + * same. Example (with RangeCollapse.NONE): "$5 – $5" + * + * @draft ICU 63 + */ + UNUM_IDENTITY_FALLBACK_RANGE +} UNumberRangeIdentityFallback; + +/** + * Used in the result class FormattedNumberRange to indicate to the user whether the numbers formatted in the range + * were equal or not, and whether or not the identity fallback was applied. + * + * @draft ICU 63 + * @see NumberRangeFormatter + */ +typedef enum UNumberRangeIdentityResult { + /** + * Used to indicate that the two numbers in the range were equal, even before any rounding rules were applied. + * + * @draft ICU 63 + * @see NumberRangeFormatter + */ + UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING, + + /** + * Used to indicate that the two numbers in the range were equal, but only after rounding rules were applied. + * + * @draft ICU 63 + * @see NumberRangeFormatter + */ + UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING, + + /** + * Used to indicate that the two numbers in the range were not equal, even after rounding rules were applied. + * + * @draft ICU 63 + * @see NumberRangeFormatter + */ + UNUM_IDENTITY_RESULT_NOT_EQUAL, + +#ifndef U_HIDE_INTERNAL_API + /** + * The number of entries in this enum. + * @internal + */ + UNUM_IDENTITY_RESULT_COUNT +#endif + +} UNumberRangeIdentityResult; + +U_NAMESPACE_BEGIN + +namespace number { // icu::number + +// Forward declarations: +class UnlocalizedNumberRangeFormatter; +class LocalizedNumberRangeFormatter; +class FormattedNumberRange; + +namespace impl { + +// Forward declarations: +struct RangeMacroProps; +class DecimalQuantity; +struct UFormattedNumberRangeData; +class NumberRangeFormatterImpl; + +} // namespace impl + +/** + * \cond + * Export an explicit template instantiation. See datefmt.h + * (When building DLLs for Windows this is required.) + */ +#if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN && !defined(U_IN_DOXYGEN) +template struct U_I18N_API std::atomic; +#endif +/** \endcond */ + +// Other helper classes would go here, but there are none. + +namespace impl { // icu::number::impl + +// Do not enclose entire MacroProps with #ifndef U_HIDE_INTERNAL_API, needed for a protected field +/** @internal */ +struct U_I18N_API RangeMacroProps : public UMemory { + /** @internal */ + UnlocalizedNumberFormatter formatter1; // = NumberFormatter::with(); + + /** @internal */ + UnlocalizedNumberFormatter formatter2; // = NumberFormatter::with(); + + /** @internal */ + bool singleFormatter = true; + + /** @internal */ + UNumberRangeCollapse collapse = UNUM_RANGE_COLLAPSE_AUTO; + + /** @internal */ + UNumberRangeIdentityFallback identityFallback = UNUM_IDENTITY_FALLBACK_APPROXIMATELY; + + /** @internal */ + Locale locale; + + // NOTE: Uses default copy and move constructors. + + /** + * Check all members for errors. + * @internal + */ + bool copyErrorTo(UErrorCode &status) const { + return formatter1.copyErrorTo(status) || formatter2.copyErrorTo(status); + } +}; + +} // namespace impl + +/** + * An abstract base class for specifying settings related to number formatting. This class is implemented by + * {@link UnlocalizedNumberRangeFormatter} and {@link LocalizedNumberRangeFormatter}. This class is not intended for + * public subclassing. + */ +template +class U_I18N_API NumberRangeFormatterSettings { + public: + /** + * Sets the NumberFormatter instance to use for the numbers in the range. The same formatter is applied to both + * sides of the range. + *

    + * The NumberFormatter instances must not have a locale applied yet; the locale specified on the + * NumberRangeFormatter will be used. + * + * @param formatter + * The formatter to use for both numbers in the range. + * @return The fluent chain. + * @draft ICU 63 + */ + Derived numberFormatterBoth(const UnlocalizedNumberFormatter &formatter) const &; + + /** + * Overload of numberFormatterBoth() for use on an rvalue reference. + * + * @param formatter + * The formatter to use for both numbers in the range. + * @return The fluent chain. + * @see #numberFormatterBoth + * @draft ICU 63 + */ + Derived numberFormatterBoth(const UnlocalizedNumberFormatter &formatter) &&; + + /** + * Overload of numberFormatterBoth() for use on an rvalue reference. + * + * @param formatter + * The formatter to use for both numbers in the range. + * @return The fluent chain. + * @see #numberFormatterBoth + * @draft ICU 63 + */ + Derived numberFormatterBoth(UnlocalizedNumberFormatter &&formatter) const &; + + /** + * Overload of numberFormatterBoth() for use on an rvalue reference. + * + * @param formatter + * The formatter to use for both numbers in the range. + * @return The fluent chain. + * @see #numberFormatterBoth + * @draft ICU 63 + */ + Derived numberFormatterBoth(UnlocalizedNumberFormatter &&formatter) &&; + + /** + * Sets the NumberFormatter instance to use for the first number in the range. + *

    + * The NumberFormatter instances must not have a locale applied yet; the locale specified on the + * NumberRangeFormatter will be used. + * + * @param formatterFirst + * The formatter to use for the first number in the range. + * @return The fluent chain. + * @draft ICU 63 + */ + Derived numberFormatterFirst(const UnlocalizedNumberFormatter &formatterFirst) const &; + + /** + * Overload of numberFormatterFirst() for use on an rvalue reference. + * + * @param formatterFirst + * The formatter to use for the first number in the range. + * @return The fluent chain. + * @see #numberFormatterFirst + * @draft ICU 63 + */ + Derived numberFormatterFirst(const UnlocalizedNumberFormatter &formatterFirst) &&; + + /** + * Overload of numberFormatterFirst() for use on an rvalue reference. + * + * @param formatterFirst + * The formatter to use for the first number in the range. + * @return The fluent chain. + * @see #numberFormatterFirst + * @draft ICU 63 + */ + Derived numberFormatterFirst(UnlocalizedNumberFormatter &&formatterFirst) const &; + + /** + * Overload of numberFormatterFirst() for use on an rvalue reference. + * + * @param formatterFirst + * The formatter to use for the first number in the range. + * @return The fluent chain. + * @see #numberFormatterFirst + * @draft ICU 63 + */ + Derived numberFormatterFirst(UnlocalizedNumberFormatter &&formatterFirst) &&; + + /** + * Sets the NumberFormatter instance to use for the second number in the range. + *

    + * The NumberFormatter instances must not have a locale applied yet; the locale specified on the + * NumberRangeFormatter will be used. + * + * @param formatterSecond + * The formatter to use for the second number in the range. + * @return The fluent chain. + * @draft ICU 63 + */ + Derived numberFormatterSecond(const UnlocalizedNumberFormatter &formatterSecond) const &; + + /** + * Overload of numberFormatterSecond() for use on an rvalue reference. + * + * @param formatterSecond + * The formatter to use for the second number in the range. + * @return The fluent chain. + * @see #numberFormatterSecond + * @draft ICU 63 + */ + Derived numberFormatterSecond(const UnlocalizedNumberFormatter &formatterSecond) &&; + + /** + * Overload of numberFormatterSecond() for use on an rvalue reference. + * + * @param formatterSecond + * The formatter to use for the second number in the range. + * @return The fluent chain. + * @see #numberFormatterSecond + * @draft ICU 63 + */ + Derived numberFormatterSecond(UnlocalizedNumberFormatter &&formatterSecond) const &; + + /** + * Overload of numberFormatterSecond() for use on an rvalue reference. + * + * @param formatterSecond + * The formatter to use for the second number in the range. + * @return The fluent chain. + * @see #numberFormatterSecond + * @draft ICU 63 + */ + Derived numberFormatterSecond(UnlocalizedNumberFormatter &&formatterSecond) &&; + + /** + * Sets the aggressiveness of "collapsing" fields across the range separator. Possible values: + *

    + *

      + *
    • ALL: "3-5K miles"
    • + *
    • UNIT: "3K - 5K miles"
    • + *
    • NONE: "3K miles - 5K miles"
    • + *
    • AUTO: usually UNIT or NONE, depending on the locale and formatter settings
    • + *
    + *

    + * The default value is AUTO. + * + * @param collapse + * The collapsing strategy to use for this range. + * @return The fluent chain. + * @draft ICU 63 + */ + Derived collapse(UNumberRangeCollapse collapse) const &; + + /** + * Overload of collapse() for use on an rvalue reference. + * + * @param collapse + * The collapsing strategy to use for this range. + * @return The fluent chain. + * @see #collapse + * @draft ICU 63 + */ + Derived collapse(UNumberRangeCollapse collapse) &&; + + /** + * Sets the behavior when the two sides of the range are the same. This could happen if the same two numbers are + * passed to the formatRange function, or if different numbers are passed to the function but they become the same + * after rounding rules are applied. Possible values: + *

    + *

      + *
    • SINGLE_VALUE: "5 miles"
    • + *
    • APPROXIMATELY_OR_SINGLE_VALUE: "~5 miles" or "5 miles", depending on whether the number was the same before + * rounding was applied
    • + *
    • APPROXIMATELY: "~5 miles"
    • + *
    • RANGE: "5-5 miles" (with collapse=UNIT)
    • + *
    + *

    + * The default value is APPROXIMATELY. + * + * @param identityFallback + * The strategy to use when formatting two numbers that end up being the same. + * @return The fluent chain. + * @draft ICU 63 + */ + Derived identityFallback(UNumberRangeIdentityFallback identityFallback) const &; + + /** + * Overload of identityFallback() for use on an rvalue reference. + * + * @param identityFallback + * The strategy to use when formatting two numbers that end up being the same. + * @return The fluent chain. + * @see #identityFallback + * @draft ICU 63 + */ + Derived identityFallback(UNumberRangeIdentityFallback identityFallback) &&; + + /** + * Sets the UErrorCode if an error occurred in the fluent chain. + * Preserves older error codes in the outErrorCode. + * @return TRUE if U_FAILURE(outErrorCode) + * @draft ICU 63 + */ + UBool copyErrorTo(UErrorCode &outErrorCode) const { + if (U_FAILURE(outErrorCode)) { + // Do not overwrite the older error code + return TRUE; + } + fMacros.copyErrorTo(outErrorCode); + return U_FAILURE(outErrorCode); + }; + + // NOTE: Uses default copy and move constructors. + + private: + impl::RangeMacroProps fMacros; + + // Don't construct me directly! Use (Un)LocalizedNumberFormatter. + NumberRangeFormatterSettings() = default; + + friend class LocalizedNumberRangeFormatter; + friend class UnlocalizedNumberRangeFormatter; +}; + +/** + * A NumberRangeFormatter that does not yet have a locale. In order to format, a locale must be specified. + * + * @see NumberRangeFormatter + * @draft ICU 63 + */ +class U_I18N_API UnlocalizedNumberRangeFormatter + : public NumberRangeFormatterSettings, public UMemory { + + public: + /** + * Associate the given locale with the number range formatter. The locale is used for picking the + * appropriate symbols, formats, and other data for number display. + * + * @param locale + * The locale to use when loading data for number formatting. + * @return The fluent chain. + * @draft ICU 63 + */ + LocalizedNumberRangeFormatter locale(const icu::Locale &locale) const &; + + /** + * Overload of locale() for use on an rvalue reference. + * + * @param locale + * The locale to use when loading data for number formatting. + * @return The fluent chain. + * @see #locale + * @draft ICU 63 + */ + LocalizedNumberRangeFormatter locale(const icu::Locale &locale) &&; + + /** + * Default constructor: puts the formatter into a valid but undefined state. + * + * @draft ICU 63 + */ + UnlocalizedNumberRangeFormatter() = default; + + /** + * Returns a copy of this UnlocalizedNumberRangeFormatter. + * @draft ICU 63 + */ + UnlocalizedNumberRangeFormatter(const UnlocalizedNumberRangeFormatter &other); + + /** + * Move constructor: + * The source UnlocalizedNumberRangeFormatter will be left in a valid but undefined state. + * @draft ICU 63 + */ + UnlocalizedNumberRangeFormatter(UnlocalizedNumberRangeFormatter&& src) U_NOEXCEPT; + + /** + * Copy assignment operator. + * @draft ICU 63 + */ + UnlocalizedNumberRangeFormatter& operator=(const UnlocalizedNumberRangeFormatter& other); + + /** + * Move assignment operator: + * The source UnlocalizedNumberRangeFormatter will be left in a valid but undefined state. + * @draft ICU 63 + */ + UnlocalizedNumberRangeFormatter& operator=(UnlocalizedNumberRangeFormatter&& src) U_NOEXCEPT; + + private: + explicit UnlocalizedNumberRangeFormatter( + const NumberRangeFormatterSettings& other); + + explicit UnlocalizedNumberRangeFormatter( + NumberRangeFormatterSettings&& src) U_NOEXCEPT; + + // To give the fluent setters access to this class's constructor: + friend class NumberRangeFormatterSettings; + + // To give NumberRangeFormatter::with() access to this class's constructor: + friend class NumberRangeFormatter; +}; + +/** + * A NumberRangeFormatter that has a locale associated with it; this means .formatRange() methods are available. + * + * @see NumberFormatter + * @draft ICU 63 + */ +class U_I18N_API LocalizedNumberRangeFormatter + : public NumberRangeFormatterSettings, public UMemory { + public: + /** + * Format the given Formattables to a string using the settings specified in the NumberRangeFormatter fluent setting + * chain. + * + * @param first + * The first number in the range, usually to the left in LTR locales. + * @param second + * The second number in the range, usually to the right in LTR locales. + * @param status + * Set if an error occurs while formatting. + * @return A FormattedNumberRange object; call .toString() to get the string. + * @draft ICU 63 + */ + FormattedNumberRange formatFormattableRange( + const Formattable& first, const Formattable& second, UErrorCode& status) const; + + /** + * Default constructor: puts the formatter into a valid but undefined state. + * + * @draft ICU 63 + */ + LocalizedNumberRangeFormatter() = default; + + /** + * Returns a copy of this LocalizedNumberRangeFormatter. + * @draft ICU 63 + */ + LocalizedNumberRangeFormatter(const LocalizedNumberRangeFormatter &other); + + /** + * Move constructor: + * The source LocalizedNumberRangeFormatter will be left in a valid but undefined state. + * @draft ICU 63 + */ + LocalizedNumberRangeFormatter(LocalizedNumberRangeFormatter&& src) U_NOEXCEPT; + + /** + * Copy assignment operator. + * @draft ICU 63 + */ + LocalizedNumberRangeFormatter& operator=(const LocalizedNumberRangeFormatter& other); + + /** + * Move assignment operator: + * The source LocalizedNumberRangeFormatter will be left in a valid but undefined state. + * @draft ICU 63 + */ + LocalizedNumberRangeFormatter& operator=(LocalizedNumberRangeFormatter&& src) U_NOEXCEPT; + +#ifndef U_HIDE_INTERNAL_API + + /** + * @param results + * The results object. This method will mutate it to save the results. + * @param equalBeforeRounding + * Whether the number was equal before copying it into a DecimalQuantity. + * Used for determining the identity fallback behavior. + * @param status + * Set if an error occurs while formatting. + * @internal + */ + void formatImpl(impl::UFormattedNumberRangeData& results, bool equalBeforeRounding, + UErrorCode& status) const; + +#endif + + /** + * Destruct this LocalizedNumberRangeFormatter, cleaning up any memory it might own. + * @draft ICU 63 + */ + ~LocalizedNumberRangeFormatter(); + + private: + std::atomic fAtomicFormatter = {}; + + const impl::NumberRangeFormatterImpl* getFormatter(UErrorCode& stauts) const; + + explicit LocalizedNumberRangeFormatter( + const NumberRangeFormatterSettings& other); + + explicit LocalizedNumberRangeFormatter( + NumberRangeFormatterSettings&& src) U_NOEXCEPT; + + LocalizedNumberRangeFormatter(const impl::RangeMacroProps ¯os, const Locale &locale); + + LocalizedNumberRangeFormatter(impl::RangeMacroProps &¯os, const Locale &locale); + + void clear(); + + // To give the fluent setters access to this class's constructor: + friend class NumberRangeFormatterSettings; + friend class NumberRangeFormatterSettings; + + // To give UnlocalizedNumberRangeFormatter::locale() access to this class's constructor: + friend class UnlocalizedNumberRangeFormatter; +}; + +/** + * The result of a number range formatting operation. This class allows the result to be exported in several data types, + * including a UnicodeString and a FieldPositionIterator. + * + * @draft ICU 63 + */ +class U_I18N_API FormattedNumberRange : public UMemory { + public: + /** + * Returns a UnicodeString representation of the formatted number range. + * + * @param status + * Set if an error occurs while formatting the number to the UnicodeString. + * @return a UnicodeString containing the localized number range. + * @draft ICU 63 + */ + UnicodeString toString(UErrorCode& status) const; + + /** + * Appends the formatted number range to an Appendable. + * + * @param appendable + * The Appendable to which to append the formatted number range string. + * @param status + * Set if an error occurs while formatting the number range to the Appendable. + * @return The same Appendable, for chaining. + * @draft ICU 63 + * @see Appendable + */ + Appendable &appendTo(Appendable &appendable, UErrorCode& status) const; + + /** + * Determines the start (inclusive) and end (exclusive) indices of the next occurrence of the given + * field in the output string. This allows you to determine the locations of, for example, + * the integer part, fraction part, or symbols. + * + * If both sides of the range have the same field, the field will occur twice, once before the + * range separator and once after the range separator, if applicable. + * + * If a field occurs just once, calling this method will find that occurrence and return it. If a + * field occurs multiple times, this method may be called repeatedly with the following pattern: + * + *

    +     * FieldPosition fpos(UNUM_INTEGER_FIELD);
    +     * while (formattedNumberRange.nextFieldPosition(fpos, status)) {
    +     *   // do something with fpos.
    +     * }
    +     * 
    + * + * This method is useful if you know which field to query. If you want all available field position + * information, use #getAllFieldPositions(). + * + * @param fieldPosition + * Input+output variable. See {@link FormattedNumber#nextFieldPosition}. + * @param status + * Set if an error occurs while populating the FieldPosition. + * @return TRUE if a new occurrence of the field was found; FALSE otherwise. + * @draft ICU 63 + * @see UNumberFormatFields + */ + UBool nextFieldPosition(FieldPosition& fieldPosition, UErrorCode& status) const; + + /** + * Export the formatted number range to a FieldPositionIterator. This allows you to determine which characters in + * the output string correspond to which fields, such as the integer part, fraction part, and sign. + * + * If information on only one field is needed, use #nextFieldPosition() instead. + * + * @param iterator + * The FieldPositionIterator to populate with all of the fields present in the formatted number. + * @param status + * Set if an error occurs while populating the FieldPositionIterator. + * @draft ICU 63 + * @see UNumberFormatFields + */ + void getAllFieldPositions(FieldPositionIterator &iterator, UErrorCode &status) const; + + /** + * Export the first formatted number as a decimal number. This endpoint + * is useful for obtaining the exact number being printed after scaling + * and rounding have been applied by the number range formatting pipeline. + * + * The syntax of the unformatted number is a "numeric string" + * as defined in the Decimal Arithmetic Specification, available at + * http://speleotrove.com/decimal + * + * @return A decimal representation of the first formatted number. + * @draft ICU 63 + * @see NumberRangeFormatter + * @see #getSecondDecimal + */ + UnicodeString getFirstDecimal(UErrorCode& status) const; + + /** + * Export the second formatted number as a decimal number. This endpoint + * is useful for obtaining the exact number being printed after scaling + * and rounding have been applied by the number range formatting pipeline. + * + * The syntax of the unformatted number is a "numeric string" + * as defined in the Decimal Arithmetic Specification, available at + * http://speleotrove.com/decimal + * + * @return A decimal representation of the second formatted number. + * @draft ICU 63 + * @see NumberRangeFormatter + * @see #getFirstDecimal + */ + UnicodeString getSecondDecimal(UErrorCode& status) const; + + /** + * Returns whether the pair of numbers was successfully formatted as a range or whether an identity fallback was + * used. For example, if the first and second number were the same either before or after rounding occurred, an + * identity fallback was used. + * + * @return An indication the resulting identity situation in the formatted number range. + * @draft ICU 63 + * @see UNumberRangeIdentityFallback + */ + UNumberRangeIdentityResult getIdentityResult(UErrorCode& status) const; + + /** + * Copying not supported; use move constructor instead. + */ + FormattedNumberRange(const FormattedNumberRange&) = delete; + + /** + * Copying not supported; use move assignment instead. + */ + FormattedNumberRange& operator=(const FormattedNumberRange&) = delete; + + /** + * Move constructor: + * Leaves the source FormattedNumberRange in an undefined state. + * @draft ICU 63 + */ + FormattedNumberRange(FormattedNumberRange&& src) U_NOEXCEPT; + + /** + * Move assignment: + * Leaves the source FormattedNumberRange in an undefined state. + * @draft ICU 63 + */ + FormattedNumberRange& operator=(FormattedNumberRange&& src) U_NOEXCEPT; + + /** + * Destruct an instance of FormattedNumberRange, cleaning up any memory it might own. + * @draft ICU 63 + */ + ~FormattedNumberRange(); + + private: + // Can't use LocalPointer because UFormattedNumberRangeData is forward-declared + const impl::UFormattedNumberRangeData *fResults; + + // Error code for the terminal methods + UErrorCode fErrorCode; + + /** + * Internal constructor from data type. Adopts the data pointer. + * @internal + */ + explicit FormattedNumberRange(impl::UFormattedNumberRangeData *results) + : fResults(results), fErrorCode(U_ZERO_ERROR) {}; + + explicit FormattedNumberRange(UErrorCode errorCode) + : fResults(nullptr), fErrorCode(errorCode) {}; + + void getAllFieldPositionsImpl(FieldPositionIteratorHandler& fpih, UErrorCode& status) const; + + // To give LocalizedNumberRangeFormatter format methods access to this class's constructor: + friend class LocalizedNumberRangeFormatter; +}; + +/** + * See the main description in numberrangeformatter.h for documentation and examples. + * + * @draft ICU 63 + */ +class U_I18N_API NumberRangeFormatter final { + public: + /** + * Call this method at the beginning of a NumberRangeFormatter fluent chain in which the locale is not currently + * known at the call site. + * + * @return An {@link UnlocalizedNumberRangeFormatter}, to be used for chaining. + * @draft ICU 63 + */ + static UnlocalizedNumberRangeFormatter with(); + + /** + * Call this method at the beginning of a NumberRangeFormatter fluent chain in which the locale is known at the call + * site. + * + * @param locale + * The locale from which to load formats and symbols for number range formatting. + * @return A {@link LocalizedNumberRangeFormatter}, to be used for chaining. + * @draft ICU 63 + */ + static LocalizedNumberRangeFormatter withLocale(const Locale &locale); + + /** + * Use factory methods instead of the constructor to create a NumberFormatter. + */ + NumberRangeFormatter() = delete; +}; + +} // namespace number +U_NAMESPACE_END + +#endif // U_HIDE_DRAFT_API + +#endif // __NUMBERRANGEFORMATTER_H__ + +#endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/i18n/unicode/numfmt.h b/deps/icu-small/source/i18n/unicode/numfmt.h index 572e6afc71b807..871fbd93e15e2e 100644 --- a/deps/icu-small/source/i18n/unicode/numfmt.h +++ b/deps/icu-small/source/i18n/unicode/numfmt.h @@ -56,7 +56,6 @@ class StringEnumeration; *

    IMPORTANT: New users are strongly encouraged to see if * numberformatter.h fits their use case. Although not deprecated, this header * is provided for backwards compatibility only. - *


    * * Abstract base class for all number formats. Provides interface for * formatting and parsing a number. Also provides methods for @@ -710,7 +709,7 @@ class U_I18N_API NumberFormat : public Format { * The default formatting style is locale dependent. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createInstance(UErrorCode&); @@ -721,7 +720,7 @@ class U_I18N_API NumberFormat : public Format { * @param inLocale the given locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createInstance(const Locale& inLocale, @@ -731,7 +730,7 @@ class U_I18N_API NumberFormat : public Format { * Create a specific style NumberFormat for the specified locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @param desiredLocale the given locale. * @param style the given style. * @param errorCode Output param filled with success/failure status. @@ -770,7 +769,7 @@ class U_I18N_API NumberFormat : public Format { * Returns a currency format for the current default locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createCurrencyInstance(UErrorCode&); @@ -779,7 +778,7 @@ class U_I18N_API NumberFormat : public Format { * Returns a currency format for the specified locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @stable ICU 2.0 */ @@ -790,7 +789,7 @@ class U_I18N_API NumberFormat : public Format { * Returns a percentage format for the current default locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createPercentInstance(UErrorCode&); @@ -799,7 +798,7 @@ class U_I18N_API NumberFormat : public Format { * Returns a percentage format for the specified locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @stable ICU 2.0 */ @@ -810,7 +809,7 @@ class U_I18N_API NumberFormat : public Format { * Returns a scientific format for the current default locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @stable ICU 2.0 */ static NumberFormat* U_EXPORT2 createScientificInstance(UErrorCode&); @@ -819,7 +818,7 @@ class U_I18N_API NumberFormat : public Format { * Returns a scientific format for the specified locale. *

    * NOTE: New users are strongly encouraged to use - * {@link NumberFormatter} instead of NumberFormat. + * {@link icu::number::NumberFormatter} instead of NumberFormat. * @param inLocale the given locale. * @stable ICU 2.0 */ @@ -1028,14 +1027,14 @@ class U_I18N_API NumberFormat : public Format { * Get the rounding mode. This will always return NumberFormat::ERoundingMode::kRoundUnnecessary * if the subclass does not support rounding. * @return A rounding mode - * @draft ICU 60 + * @stable ICU 60 */ virtual ERoundingMode getRoundingMode(void) const; /** * Set the rounding mode. If a subclass does not support rounding, this will do nothing. * @param roundingMode A rounding mode - * @draft ICU 60 + * @stable ICU 60 */ virtual void setRoundingMode(ERoundingMode roundingMode); diff --git a/deps/icu-small/source/i18n/unicode/plurfmt.h b/deps/icu-small/source/i18n/unicode/plurfmt.h index 9a83e52550c540..6b757c88419316 100644 --- a/deps/icu-small/source/i18n/unicode/plurfmt.h +++ b/deps/icu-small/source/i18n/unicode/plurfmt.h @@ -520,15 +520,7 @@ class U_I18N_API PluralFormat : public Format { */ virtual UClassID getDynamicClassID() const; -#if (defined(__xlC__) && (__xlC__ < 0x0C00)) || (U_PLATFORM == U_PF_OS390) || (U_PLATFORM ==U_PF_OS400) -// Work around a compiler bug on xlC 11.1 on AIX 7.1 that would -// prevent PluralSelectorAdapter from implementing private PluralSelector. -// xlC error message: -// 1540-0300 (S) The "private" member "class icu_49::PluralFormat::PluralSelector" cannot be accessed. -public: -#else private: -#endif /** * @internal */ @@ -564,10 +556,6 @@ class U_I18N_API PluralFormat : public Format { PluralRules* pluralRules; }; -#if defined(__xlC__) -// End of xlC bug workaround, keep remaining definitions private. -private: -#endif Locale locale; MessagePattern msgPattern; NumberFormat* numberFormat; diff --git a/deps/icu-small/source/i18n/unicode/plurrule.h b/deps/icu-small/source/i18n/unicode/plurrule.h index 03dea3f1b92988..daeed52bee632e 100644 --- a/deps/icu-small/source/i18n/unicode/plurrule.h +++ b/deps/icu-small/source/i18n/unicode/plurrule.h @@ -118,7 +118,6 @@ class SharedPluralRules; * Examples are in the following table: *

    * - * * * * @@ -155,7 +154,6 @@ class SharedPluralRules; * * * - * *
    ni232
    *

    * The difference between 'in' and 'within' is that 'in' only includes integers in the specified range, while 'within' @@ -499,6 +497,12 @@ class U_I18N_API PluralRules : public UObject { UnicodeString getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& status); RuleChain *rulesForKeyword(const UnicodeString &keyword) const; + /** + * An internal status variable used to indicate that the object is in an 'invalid' state. + * Used by copy constructor, the assignment operator and the clone method. + */ + UErrorCode mInternalStatus; + friend class PluralRuleParser; }; diff --git a/deps/icu-small/source/i18n/unicode/rbnf.h b/deps/icu-small/source/i18n/unicode/rbnf.h index 2d284909f8a1d4..f7cd85a322d1f5 100644 --- a/deps/icu-small/source/i18n/unicode/rbnf.h +++ b/deps/icu-small/source/i18n/unicode/rbnf.h @@ -313,7 +313,6 @@ enum URBNFRuleSetTag { * The rule for an IEEE 754 NaN (not a number). * * - * * nothing * If the rule's rule descriptor is left out, the base value is one plus the * preceding rule's base value (or zero if this is the first rule in the list) in a normal @@ -1013,14 +1012,14 @@ class U_I18N_API RuleBasedNumberFormat : public NumberFormat { /** * Get the rounding mode. * @return A rounding mode - * @draft ICU 60 + * @stable ICU 60 */ virtual ERoundingMode getRoundingMode(void) const; /** * Set the rounding mode. * @param roundingMode A rounding mode - * @draft ICU 60 + * @stable ICU 60 */ virtual void setRoundingMode(ERoundingMode roundingMode); @@ -1095,7 +1094,7 @@ class U_I18N_API RuleBasedNumberFormat : public NumberFormat { void format(double number, NFRuleSet& rs, UnicodeString& toAppendTo, UErrorCode& status) const; private: - NFRuleSet **ruleSets; + NFRuleSet **fRuleSets; UnicodeString* ruleSetDescriptions; int32_t numRuleSets; NFRuleSet *defaultRuleSet; @@ -1104,7 +1103,7 @@ class U_I18N_API RuleBasedNumberFormat : public NumberFormat { DecimalFormatSymbols* decimalFormatSymbols; NFRule *defaultInfinityRule; NFRule *defaultNaNRule; - ERoundingMode roundingMode; + ERoundingMode fRoundingMode; UBool lenient; UnicodeString* lenientParseRules; LocalizationInfo* localizations; diff --git a/deps/icu-small/source/i18n/unicode/region.h b/deps/icu-small/source/i18n/unicode/region.h index 667c4051f0eb74..ccd63901a5d1b1 100644 --- a/deps/icu-small/source/i18n/unicode/region.h +++ b/deps/icu-small/source/i18n/unicode/region.h @@ -192,7 +192,7 @@ class U_I18N_API Region : public UObject { char id[4]; UnicodeString idStr; int32_t code; - URegionType type; + URegionType fType; Region *containingRegion; UVector *containedRegions; UVector *preferredValues; diff --git a/deps/icu-small/source/i18n/unicode/reldatefmt.h b/deps/icu-small/source/i18n/unicode/reldatefmt.h index abd43522c3a965..dd8bc53d55f500 100644 --- a/deps/icu-small/source/i18n/unicode/reldatefmt.h +++ b/deps/icu-small/source/i18n/unicode/reldatefmt.h @@ -165,12 +165,20 @@ typedef enum UDateAbsoluteUnit { */ UDAT_ABSOLUTE_NOW, +#ifndef U_HIDE_DRAFT_API + /** + * Quarter + * @draft ICU 63 + */ + UDAT_ABSOLUTE_QUARTER, +#endif // U_HIDE_DRAFT_API + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UDateAbsoluteUnit value. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UDAT_ABSOLUTE_UNIT_COUNT + UDAT_ABSOLUTE_UNIT_COUNT = UDAT_ABSOLUTE_NOW + 2 #endif // U_HIDE_DEPRECATED_API } UDateAbsoluteUnit; @@ -328,7 +336,7 @@ class U_I18N_API RelativeDateTimeFormatter : public UObject { * @param nfToAdopt Constructed object takes ownership of this pointer. * It is an error for caller to delete this pointer or change its * contents after calling this constructor. - * @status Any error is returned here. + * @param status Any error is returned here. * @stable ICU 53 */ RelativeDateTimeFormatter( @@ -346,7 +354,7 @@ class U_I18N_API RelativeDateTimeFormatter : public UObject { * @param style the format style. The UDAT_RELATIVE bit field has no effect. * @param capitalizationContext A value from UDisplayContext that pertains to * capitalization. - * @status Any error is returned here. + * @param status Any error is returned here. * @stable ICU 54 */ RelativeDateTimeFormatter( diff --git a/deps/icu-small/source/i18n/unicode/smpdtfmt.h b/deps/icu-small/source/i18n/unicode/smpdtfmt.h index 305412b8d1535f..929c1b4675b407 100644 --- a/deps/icu-small/source/i18n/unicode/smpdtfmt.h +++ b/deps/icu-small/source/i18n/unicode/smpdtfmt.h @@ -1147,7 +1147,7 @@ class U_I18N_API SimpleDateFormat: public DateFormat { * Overrides base class method and * This method clears per field NumberFormat instances * previously set by {@see adoptNumberFormat(const UnicodeString&, NumberFormat*, UErrorCode)} - * @param adoptNF the NumbeferFormat used + * @param formatToAdopt the NumbeferFormat used * @stable ICU 54 */ void adoptNumberFormat(NumberFormat *formatToAdopt); @@ -1162,7 +1162,7 @@ class U_I18N_API SimpleDateFormat: public DateFormat { * Per field NumberFormat can also be cleared in {@see DateFormat::setNumberFormat(const NumberFormat& newNumberFormat)} * * @param fields the fields to override(like y) - * @param adoptNF the NumbeferFormat used + * @param formatToAdopt the NumbeferFormat used * @param status Receives a status code, which will be U_ZERO_ERROR * if the operation succeeds. * @stable ICU 54 diff --git a/deps/icu-small/source/i18n/unicode/translit.h b/deps/icu-small/source/i18n/unicode/translit.h index dc31d97bc63643..6b4888145f1be5 100644 --- a/deps/icu-small/source/i18n/unicode/translit.h +++ b/deps/icu-small/source/i18n/unicode/translit.h @@ -31,7 +31,6 @@ U_NAMESPACE_BEGIN class UnicodeFilter; class UnicodeSet; -class CompoundTransliterator; class TransliteratorParser; class NormalizationTransliterator; class TransliteratorIDParser; @@ -77,8 +76,7 @@ class TransliteratorIDParser; * transliteration. For example, given a string input * and a transliterator t, the call * - * \htmlonly

    \endhtmlonlyString result = t.transliterate(input); - * \htmlonly
    \endhtmlonly + * String result = t.transliterate(input); * * will transliterate it and return the result. Other methods allow * the client to specify a substring to be transliterated and to use @@ -98,22 +96,20 @@ class TransliteratorIDParser; * contents of the buffer may show text being modified as each new * character arrives. * - *

    Consider the simple RuleBasedTransliterator: - * - * \htmlonly

    \endhtmlonly - * th>{theta}
    - * t>{tau} - *
    \htmlonly
    \endhtmlonly + *

    Consider the simple rule-based Transliterator: + *

    + *     th>{theta}
    + *     t>{tau}
    + * 
    * * When the user types 't', nothing will happen, since the * transliterator is waiting to see if the next character is 'h'. To * remedy this, we introduce the notion of a cursor, marked by a '|' * in the output string: - * - * \htmlonly
    \endhtmlonly - * t>|{tau}
    - * {tau}h>{theta} - *
    \htmlonly
    \endhtmlonly + *
    + *     t>|{tau}
    + *     {tau}h>{theta}
    + * 
    * * Now when the user types 't', tau appears, and if the next character * is 'h', the tau changes to a theta. This is accomplished by @@ -135,7 +131,7 @@ class TransliteratorIDParser; * which the transliterator last stopped, either because it reached * the end, or because it required more characters to disambiguate * between possible inputs. The CURSOR can also be - * explicitly set by rules in a RuleBasedTransliterator. + * explicitly set by rules in a rule-based Transliterator. * Any characters before the CURSOR index are frozen; * future keyboard transliteration calls within this input sequence * will not change them. New text is inserted at the @@ -237,6 +233,255 @@ class TransliteratorIDParser; * if the performance of these methods can be improved over the * performance obtained by the default implementations in this class. * + *

    Rule syntax + * + *

    A set of rules determines how to perform translations. + * Rules within a rule set are separated by semicolons (';'). + * To include a literal semicolon, prefix it with a backslash ('\'). + * Unicode Pattern_White_Space is ignored. + * If the first non-blank character on a line is '#', + * the entire line is ignored as a comment. + * + *

    Each set of rules consists of two groups, one forward, and one + * reverse. This is a convention that is not enforced; rules for one + * direction may be omitted, with the result that translations in + * that direction will not modify the source text. In addition, + * bidirectional forward-reverse rules may be specified for + * symmetrical transformations. + * + *

    Note: Another description of the Transliterator rule syntax is available in + * section + * Transform Rules Syntax of UTS #35: Unicode LDML. + * The rules are shown there using arrow symbols ← and → and ↔. + * ICU supports both those and the equivalent ASCII symbols < and > and <>. + * + *

    Rule statements take one of the following forms: + * + *

    + *
    $alefmadda=\\u0622;
    + *
    Variable definition. The name on the + * left is assigned the text on the right. In this example, + * after this statement, instances of the left hand name, + * "$alefmadda", will be replaced by + * the Unicode character U+0622. Variable names must begin + * with a letter and consist only of letters, digits, and + * underscores. Case is significant. Duplicate names cause + * an exception to be thrown, that is, variables cannot be + * redefined. The right hand side may contain well-formed + * text of any length, including no text at all ("$empty=;"). + * The right hand side may contain embedded UnicodeSet + * patterns, for example, "$softvowel=[eiyEIY]".
    + *
    ai>$alefmadda;
    + *
    Forward translation rule. This rule + * states that the string on the left will be changed to the + * string on the right when performing forward + * transliteration.
    + *
    ai<$alefmadda;
    + *
    Reverse translation rule. This rule + * states that the string on the right will be changed to + * the string on the left when performing reverse + * transliteration.
    + *
    + * + *
    + *
    ai<>$alefmadda;
    + *
    Bidirectional translation rule. This + * rule states that the string on the right will be changed + * to the string on the left when performing forward + * transliteration, and vice versa when performing reverse + * transliteration.
    + *
    + * + *

    Translation rules consist of a match pattern and an output + * string. The match pattern consists of literal characters, + * optionally preceded by context, and optionally followed by + * context. Context characters, like literal pattern characters, + * must be matched in the text being transliterated. However, unlike + * literal pattern characters, they are not replaced by the output + * text. For example, the pattern "abc{def}" + * indicates the characters "def" must be + * preceded by "abc" for a successful match. + * If there is a successful match, "def" will + * be replaced, but not "abc". The final '}' + * is optional, so "abc{def" is equivalent to + * "abc{def}". Another example is "{123}456" + * (or "123}456") in which the literal + * pattern "123" must be followed by "456". + * + *

    The output string of a forward or reverse rule consists of + * characters to replace the literal pattern characters. If the + * output string contains the character '|', this is + * taken to indicate the location of the cursor after + * replacement. The cursor is the point in the text at which the + * next replacement, if any, will be applied. The cursor is usually + * placed within the replacement text; however, it can actually be + * placed into the precending or following context by using the + * special character '@'. Examples: + * + *

    + *     a {foo} z > | @ bar; # foo -> bar, move cursor before a
    + *     {foo} xyz > bar @@|; # foo -> bar, cursor between y and z
    + * 
    + * + *

    UnicodeSet + * + *

    UnicodeSet patterns may appear anywhere that + * makes sense. They may appear in variable definitions. + * Contrariwise, UnicodeSet patterns may themselves + * contain variable references, such as "$a=[a-z];$not_a=[^$a]", + * or "$range=a-z;$ll=[$range]". + * + *

    UnicodeSet patterns may also be embedded directly + * into rule strings. Thus, the following two rules are equivalent: + * + *

    + *     $vowel=[aeiou]; $vowel>'*'; # One way to do this
    + *     [aeiou]>'*'; # Another way
    + * 
    + * + *

    See {@link UnicodeSet} for more documentation and examples. + * + *

    Segments + * + *

    Segments of the input string can be matched and copied to the + * output string. This makes certain sets of rules simpler and more + * general, and makes reordering possible. For example: + * + *

    + *     ([a-z]) > $1 $1; # double lowercase letters
    + *     ([:Lu:]) ([:Ll:]) > $2 $1; # reverse order of Lu-Ll pairs
    + * 
    + * + *

    The segment of the input string to be copied is delimited by + * "(" and ")". Up to + * nine segments may be defined. Segments may not overlap. In the + * output string, "$1" through "$9" + * represent the input string segments, in left-to-right order of + * definition. + * + *

    Anchors + * + *

    Patterns can be anchored to the beginning or the end of the text. This is done with the + * special characters '^' and '$'. For example: + * + *

    + *   ^ a   > 'BEG_A';   # match 'a' at start of text
    + *     a   > 'A'; # match other instances of 'a'
    + *     z $ > 'END_Z';   # match 'z' at end of text
    + *     z   > 'Z';       # match other instances of 'z'
    + * 
    + * + *

    It is also possible to match the beginning or the end of the text using a UnicodeSet. + * This is done by including a virtual anchor character '$' at the end of the + * set pattern. Although this is usually the match chafacter for the end anchor, the set will + * match either the beginning or the end of the text, depending on its placement. For + * example: + * + *

    + *   $x = [a-z$];   # match 'a' through 'z' OR anchor
    + *   $x 1    > 2;   # match '1' after a-z or at the start
    + *      3 $x > 4;   # match '3' before a-z or at the end
    + * 
    + * + *

    Example + * + *

    The following example rules illustrate many of the features of + * the rule language. + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
    Rule 1.abc{def}>x|y
    Rule 2.xyz>r
    Rule 3.yz>q
    + * + *

    Applying these rules to the string "adefabcdefz" + * yields the following results: + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + * + *
    |adefabcdefzInitial state, no rules match. Advance + * cursor.
    a|defabcdefzStill no match. Rule 1 does not match + * because the preceding context is not present.
    ad|efabcdefzStill no match. Keep advancing until + * there is a match...
    ade|fabcdefz...
    adef|abcdefz...
    adefa|bcdefz...
    adefab|cdefz...
    adefabc|defzRule 1 matches; replace "def" + * with "xy" and back up the cursor + * to before the 'y'.
    adefabcx|yzAlthough "xyz" is + * present, rule 2 does not match because the cursor is + * before the 'y', not before the 'x'. + * Rule 3 does match. Replace "yz" + * with "q".
    adefabcxq|The cursor is at the end; + * transliteration is complete.
    + * + *

    The order of rules is significant. If multiple rules may match + * at some point, the first matching rule is applied. + * + *

    Forward and reverse rules may have an empty output string. + * Otherwise, an empty left or right hand side of any statement is a + * syntax error. + * + *

    Single quotes are used to quote any character other than a + * digit or letter. To specify a single quote itself, inside or + * outside of quotes, use two single quotes in a row. For example, + * the rule "'>'>o''clock" changes the + * string ">" to the string "o'clock". + * + *

    Notes + * + *

    While a Transliterator is being built from rules, it checks that + * the rules are added in proper order. For example, if the rule + * "a>x" is followed by the rule "ab>y", + * then the second rule will throw an exception. The reason is that + * the second rule can never be triggered, since the first rule + * always matches anything it matches. In other words, the first + * rule masks the second rule. + * * @author Alan Liu * @stable ICU 2.0 */ @@ -499,9 +744,9 @@ class U_I18N_API Transliterator : public UObject { * for details. * @param text the buffer holding transliterated and * untransliterated text - * @param index an array of three integers. See {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString*, UErrorCode&) const }. + * @param index an array of three integers. * @param status Output param to filled in with a success or an error. - * @see #transliterate(Replaceable, int[], String) + * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode &) const * @stable ICU 2.0 */ virtual void transliterate(Replaceable& text, UTransPosition& index, @@ -632,7 +877,7 @@ class U_I18N_API Transliterator : public UObject { /** * Transliterate a substring of text, as specified by index, taking filters * into account. This method is for subclasses that need to delegate to - * another transliterator, such as CompoundTransliterator. + * another transliterator. * @param text the text to be transliterated * @param index the position indices * @param incremental if TRUE, then assume more characters may be inserted @@ -846,17 +1091,19 @@ class U_I18N_API Transliterator : public UObject { /** * Returns a Transliterator object constructed from - * the given rule string. This will be a RuleBasedTransliterator, + * the given rule string. This will be a rule-based Transliterator, * if the rule string contains only rules, or a - * CompoundTransliterator, if it contains ID blocks, or a - * NullTransliterator, if it contains ID blocks which parse as + * compound Transliterator, if it contains ID blocks, or a + * null Transliterator, if it contains ID blocks which parse as * empty for the given direction. + * * @param ID the id for the transliterator. * @param rules rules, separated by ';' * @param dir either FORWARD or REVERSE. - * @param parseError Struct to recieve information on position + * @param parseError Struct to receive information on position * of error if an error is encountered * @param status Output param set to success/failure code. + * @return a newly created Transliterator * @stable ICU 2.0 */ static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID, diff --git a/deps/icu-small/source/i18n/unicode/tzfmt.h b/deps/icu-small/source/i18n/unicode/tzfmt.h index 633cd8dc6987ce..d2aa768b8c8b34 100644 --- a/deps/icu-small/source/i18n/unicode/tzfmt.h +++ b/deps/icu-small/source/i18n/unicode/tzfmt.h @@ -237,10 +237,10 @@ typedef enum UTimeZoneFormatParseOption { */ UTZFMT_PARSE_OPTION_ALL_STYLES = 0x01, /** - * When parsing a time zone display name in UTZFMT_STYLE_SPECIFIC_SHORT, + * When parsing a time zone display name in \link UTZFMT_STYLE_SPECIFIC_SHORT \endlink, * look for the IANA tz database compatible zone abbreviations in addition - * to the localized names coming from the {@link TimeZoneNames} currently - * used by the {@link TimeZoneFormat}. + * to the localized names coming from the icu::TimeZoneNames currently + * used by the icu::TimeZoneFormat. * @stable ICU 54 */ UTZFMT_PARSE_OPTION_TZ_DATABASE_ABBREVIATIONS = 0x02 diff --git a/deps/icu-small/source/i18n/unicode/ucal.h b/deps/icu-small/source/i18n/unicode/ucal.h index 10d8bc52745d2b..889a1ec51db52c 100644 --- a/deps/icu-small/source/i18n/unicode/ucal.h +++ b/deps/icu-small/source/i18n/unicode/ucal.h @@ -139,6 +139,19 @@ * For example, subtracting 5 days from the date September 12, 1996 * results in September 7, 1996. * + *

    + * The Japanese calendar uses a combination of era name and year number. + * When an emperor of Japan abdicates and a new emperor ascends the throne, + * a new era is declared and year number is reset to 1. Even if the date of + * abdication is scheduled ahead of time, the new era name might not be + * announced until just before the date. In such case, ICU4C may include + * a start date of future era without actual era name, but not enabled + * by default. ICU4C users who want to test the behavior of the future era + * can enable the tentative era by: + *

      + *
    • Environment variable ICU_ENABLE_TENTATIVE_ERA=true.
    • + *
    + * * @stable ICU 2.0 */ diff --git a/deps/icu-small/source/i18n/unicode/ucol.h b/deps/icu-small/source/i18n/unicode/ucol.h index b5bacbfcb4734e..f084ac61e614f3 100644 --- a/deps/icu-small/source/i18n/unicode/ucol.h +++ b/deps/icu-small/source/i18n/unicode/ucol.h @@ -1149,7 +1149,7 @@ ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); * The recommended way to achieve "merged" sorting is by * concatenating strings with U+FFFE between them. * The concatenation has the same sort order as the merged sort keys, - * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2). + * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\\uFFFE' + str2). * Using strings with U+FFFE may yield shorter sort keys. * * For details about Sort Key Features see @@ -1294,6 +1294,7 @@ U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCod * the top of one of the supported reordering groups, * and it must not be beyond the last of those groups. * See ucol_setMaxVariable(). + * @param coll collator to be set * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop * @param status error code * @see ucol_getVariableTop diff --git a/deps/icu-small/source/i18n/unicode/ugender.h b/deps/icu-small/source/i18n/unicode/ugender.h index d015a2300cf6d6..903f3dd5deebbe 100644 --- a/deps/icu-small/source/i18n/unicode/ugender.h +++ b/deps/icu-small/source/i18n/unicode/ugender.h @@ -49,11 +49,11 @@ enum UGender { */ typedef enum UGender UGender; +struct UGenderInfo; /** * Opaque UGenderInfo object for use in C programs. * @stable ICU 50 */ -struct UGenderInfo; typedef struct UGenderInfo UGenderInfo; /** @@ -77,7 +77,7 @@ ugender_getInstance(const char *locale, UErrorCode *status); * @stable ICU 50 */ U_STABLE UGender U_EXPORT2 -ugender_getListGender(const UGenderInfo* genderinfo, const UGender *genders, int32_t size, UErrorCode *status); +ugender_getListGender(const UGenderInfo* genderInfo, const UGender *genders, int32_t size, UErrorCode *status); #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/deps/icu-small/source/common/unicode/ulistformatter.h b/deps/icu-small/source/i18n/unicode/ulistformatter.h similarity index 90% rename from deps/icu-small/source/common/unicode/ulistformatter.h rename to deps/icu-small/source/i18n/unicode/ulistformatter.h index e98a9f045221d9..242d7d21644762 100644 --- a/deps/icu-small/source/common/unicode/ulistformatter.h +++ b/deps/icu-small/source/i18n/unicode/ulistformatter.h @@ -33,6 +33,26 @@ struct UListFormatter; typedef struct UListFormatter UListFormatter; /**< C typedef for struct UListFormatter. @stable ICU 55 */ +#ifndef U_HIDE_DRAFT_API +/** + * FieldPosition and UFieldPosition selectors for format fields + * defined by ListFormatter. + * @draft ICU 63 + */ +typedef enum UListFormatterField { + /** + * The literal text in the result which came from the resources. + * @draft ICU 63 + */ + ULISTFMT_LITERAL_FIELD, + /** + * The element text in the result which came from the input strings. + * @draft ICU 63 + */ + ULISTFMT_ELEMENT_FIELD +} UListFormatterField; +#endif // U_HIDE_DRAFT_API + /** * Open a new UListFormatter object using the rules for a given locale. * @param locale diff --git a/deps/icu-small/source/i18n/unicode/unumberformatter.h b/deps/icu-small/source/i18n/unicode/unumberformatter.h index b37f80c503a76d..d05b15cdeccafb 100644 --- a/deps/icu-small/source/i18n/unicode/unumberformatter.h +++ b/deps/icu-small/source/i18n/unicode/unumberformatter.h @@ -91,7 +91,7 @@ * * *

    - * This enum is similar to {@link com.ibm.icu.text.MeasureFormat.FormatWidth}. + * This enum is similar to {@link UMeasureFormatWidth}. * * @draft ICU 60 */ @@ -190,10 +190,9 @@ typedef enum UNumberUnitWidth { * Note: This enum specifies the strategy for grouping sizes. To set which character to use as the * grouping separator, use the "symbols" setter. * - * @draft ICU 61 -- TODO: This should be renamed to UNumberGroupingStrategy before promoting to stable, - * for consistency with the other enums. + * @draft ICU 63 */ -typedef enum UGroupingStrategy { +typedef enum UNumberGroupingStrategy { /** * Do not display grouping separators in any locale. * @@ -254,16 +253,28 @@ typedef enum UGroupingStrategy { * * @draft ICU 61 */ - UNUM_GROUPING_THOUSANDS, + UNUM_GROUPING_THOUSANDS +#ifndef U_HIDE_INTERNAL_API + , /** - * One more than the highest UGroupingStrategy value. + * One more than the highest UNumberGroupingStrategy value. * * @internal ICU 62: The numeric value may change over time; see ICU ticket #12420. */ UNUM_GROUPING_COUNT +#endif /* U_HIDE_INTERNAL_API */ + +} UNumberGroupingStrategy; + +#ifndef U_HIDE_DEPRECATED_API +/** + * Old name for compatibility: will be removed in ICU 64. + * @deprecated ICU 63 + */ +typedef UNumberGroupingStrategy UGroupingStrategy; +#endif /* U_HIDE_DEPRECATED_API */ -} UGroupingStrategy; #endif /* U_HIDE_DRAFT_API */ #ifndef U_HIDE_DRAFT_API @@ -398,6 +409,8 @@ typedef enum UNumberDecimalSeparatorDisplay { #endif /* U_HIDE_DRAFT_API */ #ifndef U_HIDE_DRAFT_API + +struct UNumberFormatter; /** * C-compatible version of icu::number::LocalizedNumberFormatter. * @@ -405,10 +418,9 @@ typedef enum UNumberDecimalSeparatorDisplay { * * @draft ICU 62 */ -struct UNumberFormatter; typedef struct UNumberFormatter UNumberFormatter; - +struct UFormattedNumber; /** * C-compatible version of icu::number::FormattedNumber. * @@ -416,7 +428,6 @@ typedef struct UNumberFormatter UNumberFormatter; * * @draft ICU 62 */ -struct UFormattedNumber; typedef struct UFormattedNumber UFormattedNumber; @@ -559,7 +570,8 @@ unumf_resultToString(const UFormattedNumber* uresult, UChar* buffer, int32_t buf * * NOTE: All fields of the UFieldPosition must be initialized before calling this method. * - * @param fieldPosition + * @param uresult The object containing the formatted number. + * @param ufpos * Input+output variable. On input, the "field" property determines which field to look up, * and the "endIndex" property determines where to begin the search. On output, the * "beginIndex" field is set to the beginning of the first occurrence of the field after the @@ -580,7 +592,7 @@ unumf_resultNextFieldPosition(const UFormattedNumber* uresult, UFieldPosition* u * If you need information on only one field, use unumf_resultNextFieldPosition(). * * @param uresult The object containing the formatted number. - * @param fpositer + * @param ufpositer * A pointer to a UFieldPositionIterator created by {@link #ufieldpositer_open}. Iteration * information already present in the UFieldPositionIterator is deleted, and the iterator is reset * to apply to the fields in the formatted string created by this function call. The field values diff --git a/deps/icu-small/source/i18n/unicode/usearch.h b/deps/icu-small/source/i18n/unicode/usearch.h index 600f9142b45021..6b495ef00140f1 100644 --- a/deps/icu-small/source/i18n/unicode/usearch.h +++ b/deps/icu-small/source/i18n/unicode/usearch.h @@ -257,10 +257,9 @@ typedef enum { * match an e with the same diacritic or a plain e in the searched text. * * This option is similar to "asymmetric search" as described in - * - * UTS #10 Unicode Collation Algorithm * UTS 39 defines two strings to be confusable if they map to the same skeleton string. A skeleton can * be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so * the following snippet is equivalent to the example above: @@ -128,7 +127,6 @@ * free(skel2); * \endcode * - *

    * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling * {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below: * @@ -172,14 +170,12 @@ * uspoof_close(sc); * \endcode * - *

    * Note: Since the Unicode confusables mapping table is frequently updated, confusable skeletons are not * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons. * *

    Spoof Detection

    * - *

    * The following snippet shows a minimal example of using USpoofChecker to perform spoof detection on a * string: * @@ -204,16 +200,13 @@ * uset_close(allowed); * \endcode * - *

    * As in the case for confusability checking, it is good practice to create one USpoofChecker instance at * startup, and call the cheaper {@link uspoof_check} online. We specify the set of * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39. * - *

    * In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings, * and {@link uspoof_checkUnicodeString} is exposed for C++ programmers. * - *

    * If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks * is available in the returned bitmask. For complete information, use the {@link uspoof_check2} class of functions * with a {@link USpoofCheckResult} parameter: @@ -274,7 +267,6 @@ * // Explicit cleanup not necessary. * \endcode * - *

    * The return value is a bitmask of the checks that failed. In this case, there was one check that failed: * {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are: * @@ -307,7 +299,6 @@ * uspoof_close(sc); * \endcode * - *

    * Here is an example in C++ showing how to compute the restriction level of a string: * * \code{.cpp} @@ -334,11 +325,9 @@ * printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status)); * \endcode * - *

    * The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE. Since * USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check. * - *

    * Note: The Restriction Level is the most powerful of the checks. The full logic is documented in * UTS 39, but the basic idea is that strings * are restricted to contain characters from only a single script, except that most scripts are allowed to have @@ -352,15 +341,12 @@ * *

    Additional Information

    * - *

    * A USpoofChecker instance may be used repeatedly to perform checks on any number of identifiers. * - *

    * Thread Safety: The test functions for checking a single identifier, or for testing whether * two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads, * using the same USpoofChecker instance. * - *

    * More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are * thread safe. Those that take a non-const USpoofChecker are not thread safe.. * @@ -1219,14 +1205,21 @@ U_NAMESPACE_BEGIN /** * \class LocalUSpoofCheckResultPointer - * "Smart pointer" class, closes a USpoofCheckResult via {@link uspoof_closeCheckResult}. + * "Smart pointer" class, closes a USpoofCheckResult via `uspoof_closeCheckResult()`. * For most methods see the LocalPointerBase base class. * * @see LocalPointerBase * @see LocalPointer * @stable ICU 58 */ + +/** + * \cond + * Note: Doxygen is giving a bogus warning on this U_DEFINE_LOCAL_OPEN_POINTER. + * For now, suppress with a Doxygen cond + */ U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult); +/** \endcond */ U_NAMESPACE_END diff --git a/deps/icu-small/source/i18n/uregex.cpp b/deps/icu-small/source/i18n/uregex.cpp index 370384363483e7..f504aec91bd7c3 100644 --- a/deps/icu-small/source/i18n/uregex.cpp +++ b/deps/icu-small/source/i18n/uregex.cpp @@ -1654,8 +1654,8 @@ int32_t RegexCImpl::appendTail(RegularExpression *regexp, } else if (UTEXT_USES_U16(m->fInputText)) { srcIdx = (int32_t)nativeIdx; } else { - UErrorCode status = U_ZERO_ERROR; - srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status); + UErrorCode newStatus = U_ZERO_ERROR; + srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &newStatus); } for (;;) { diff --git a/deps/icu-small/source/i18n/usearch.cpp b/deps/icu-small/source/i18n/usearch.cpp index e1e6c28e2bc985..0e4cca77a13ae0 100644 --- a/deps/icu-small/source/i18n/usearch.cpp +++ b/deps/icu-small/source/i18n/usearch.cpp @@ -498,7 +498,7 @@ inline void setShiftTable(int16_t shift[], int16_t backshift[], for (count = 0; count < cesize; count ++) { // number of ces from right of array to the count int temp = defaultforward - count - 1; - shift[hashFromCE32(cetable[count])] = temp > 1 ? temp : 1; + shift[hashFromCE32(cetable[count])] = temp > 1 ? static_cast(temp) : 1; } shift[hashFromCE32(cetable[cesize])] = 1; // for ignorables we just shift by one. see test examples. diff --git a/deps/icu-small/source/i18n/uspoof_impl.h b/deps/icu-small/source/i18n/uspoof_impl.h index 470a31f2c979bc..7d6d0f76e34275 100644 --- a/deps/icu-small/source/i18n/uspoof_impl.h +++ b/deps/icu-small/source/i18n/uspoof_impl.h @@ -20,7 +20,7 @@ #include "unicode/uspoof.h" #include "unicode/uscript.h" #include "unicode/udata.h" - +#include "udataswp.h" #include "utrie2.h" #if !UCONFIG_NO_NORMALIZATION diff --git a/deps/icu-small/source/i18n/vtzone.cpp b/deps/icu-small/source/i18n/vtzone.cpp index 0c76c9b6c98bf2..e39eada51b7f61 100644 --- a/deps/icu-small/source/i18n/vtzone.cpp +++ b/deps/icu-small/source/i18n/vtzone.cpp @@ -135,7 +135,7 @@ static UnicodeString& appendAsciiDigits(int32_t number, uint8_t length, UnicodeS digits[i++] = number % 10; number /= 10; } while (number != 0); - length = i; + length = static_cast(i); } else { // fixed digits for (i = 0; i < length; i++) { diff --git a/deps/icu-small/source/i18n/zonemeta.cpp b/deps/icu-small/source/i18n/zonemeta.cpp index 02562048a525da..b7139a807b3fb7 100644 --- a/deps/icu-small/source/i18n/zonemeta.cpp +++ b/deps/icu-small/source/i18n/zonemeta.cpp @@ -319,10 +319,10 @@ ZoneMeta::getCanonicalCLDRID(const UnicodeString &tzid, UErrorCode& status) { id[len] = (char) 0; // Make sure it is null terminated. // replace '/' with ':' - char *p = id; - while (*p++) { - if (*p == '/') { - *p = ':'; + char *q = id; + while (*q++) { + if (*q == '/') { + *q = ':'; } } @@ -850,13 +850,13 @@ ZoneMeta::createCustomTimeZone(int32_t offset) { negative = TRUE; tmp = -offset; } - int32_t hour, min, sec; + uint8_t hour, min, sec; tmp /= 1000; - sec = tmp % 60; + sec = static_cast(tmp % 60); tmp /= 60; - min = tmp % 60; - hour = tmp / 60; + min = static_cast(tmp % 60); + hour = static_cast(tmp / 60); UnicodeString zid; formatCustomID(hour, min, sec, negative, zid); diff --git a/deps/icu-small/source/tools/genrb/parse.cpp b/deps/icu-small/source/tools/genrb/parse.cpp index ddfb082afe63f4..1f6246d3cfbf46 100644 --- a/deps/icu-small/source/tools/genrb/parse.cpp +++ b/deps/icu-small/source/tools/genrb/parse.cpp @@ -1018,6 +1018,11 @@ addCollation(ParseState* state, TableResource *result, const char *collationTyp icu::CollationInfo::printReorderRanges( *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength); } +#if 0 // debugging output + } else { + printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType); + icu::CollationInfo::printSizes(totalSize, indexes); +#endif } struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, NULL, NULL, status); result->add(collationBin, line, *status); diff --git a/deps/icu-small/source/tools/genrb/reslist.cpp b/deps/icu-small/source/tools/genrb/reslist.cpp index 2e04bbce21ef74..0493347ebe5194 100644 --- a/deps/icu-small/source/tools/genrb/reslist.cpp +++ b/deps/icu-small/source/tools/genrb/reslist.cpp @@ -1395,7 +1395,7 @@ SRBRoot::compactKeys(UErrorCode &errorCode) { int32_t offset; suffix = keys + map[j].oldpos; for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {} - offset = (int32_t)(keyLimit - key) - (suffixLimit - suffix); + offset = static_cast((keyLimit - key) - (suffixLimit - suffix)); if (offset < 0) { break; /* suffix cannot be longer than the original */ } diff --git a/deps/icu-small/source/tools/pkgdata/pkgdata.cpp b/deps/icu-small/source/tools/pkgdata/pkgdata.cpp index d4dc271732d80f..d7e5721c2d0fab 100644 --- a/deps/icu-small/source/tools/pkgdata/pkgdata.cpp +++ b/deps/icu-small/source/tools/pkgdata/pkgdata.cpp @@ -511,7 +511,7 @@ main(int argc, char* argv[]) { static int runCommand(const char* command, UBool specialHandling) { char *cmd = NULL; char cmdBuffer[SMALL_BUFFER_MAX_SIZE]; - int32_t len = strlen(command); + int32_t len = static_cast(strlen(command)); if (len == 0) { return 0; @@ -904,7 +904,8 @@ static void createFileNames(UPKGOptions *o, const char mode, const char *version if (IN_DLL_MODE(mode)) { sprintf(libFileNames[LIB_FILE], "%s", libName); } else { - sprintf(libFileNames[LIB_FILE], "%s%s", + sprintf(libFileNames[LIB_FILE], "%s%s%s", + (strstr(libName, "icudt") ? "lib" : ""), pkgDataFlags[LIBPREFIX], libName); } @@ -1020,7 +1021,7 @@ static int32_t pkg_createSymLinks(const char *targetDir, UBool specialHandling) char name2[SMALL_BUFFER_MAX_SIZE]; /* file name to symlink */ const char* FILE_EXTENSION_SEP = uprv_strlen(pkgDataFlags[SO_EXT]) == 0 ? "" : "."; -#if !defined(USING_CYGWIN) && U_PLATFORM != U_PF_MINGW +#if U_PLATFORM != U_PF_CYGWIN /* No symbolic link to make. */ if (uprv_strlen(libFileNames[LIB_FILE_VERSION]) == 0 || uprv_strlen(libFileNames[LIB_FILE_VERSION_MAJOR]) == 0 || uprv_strcmp(libFileNames[LIB_FILE_VERSION], libFileNames[LIB_FILE_VERSION_MAJOR]) == 0) { @@ -1225,7 +1226,7 @@ static int32_t pkg_installFileMode(const char *installDir, const char *srcDir, c if (f != NULL) { for(;;) { if (T_FileStream_readLine(f, buffer, SMALL_BUFFER_MAX_SIZE) != NULL) { - bufferLength = uprv_strlen(buffer); + bufferLength = static_cast(uprv_strlen(buffer)); /* Remove new line character. */ if (bufferLength > 0) { buffer[bufferLength-1] = 0; diff --git a/deps/icu-small/source/tools/toolutil/filestrm.cpp b/deps/icu-small/source/tools/toolutil/filestrm.cpp index cfffa1b75d7724..a170c7b0f29e2e 100644 --- a/deps/icu-small/source/tools/toolutil/filestrm.cpp +++ b/deps/icu-small/source/tools/toolutil/filestrm.cpp @@ -104,14 +104,14 @@ T_FileStream_tmpfile() U_CAPI int32_t U_EXPORT2 T_FileStream_read(FileStream* fileStream, void* addr, int32_t len) { - return fread(addr, 1, len, (FILE*)fileStream); + return static_cast(fread(addr, 1, len, (FILE*)fileStream)); } U_CAPI int32_t U_EXPORT2 T_FileStream_write(FileStream* fileStream, const void* addr, int32_t len) { - return fwrite(addr, 1, len, (FILE*)fileStream); + return static_cast(fwrite(addr, 1, len, (FILE*)fileStream)); } U_CAPI void U_EXPORT2 diff --git a/deps/icu-small/source/tools/toolutil/filetools.cpp b/deps/icu-small/source/tools/toolutil/filetools.cpp index 176a791b0df853..6e88c94b5200b5 100644 --- a/deps/icu-small/source/tools/toolutil/filetools.cpp +++ b/deps/icu-small/source/tools/toolutil/filetools.cpp @@ -134,7 +134,7 @@ static int32_t whichFileModTimeIsLater(const char *file1, const char *file2) { /* Swap the file separater character given with the new one in the file path. */ U_CAPI void U_EXPORT2 swapFileSepChar(char *filePath, const char oldFileSepChar, const char newFileSepChar) { - for (int32_t i = 0, length = uprv_strlen(filePath); i < length; i++) { + for (int32_t i = 0, length = static_cast(uprv_strlen(filePath)); i < length; i++) { filePath[i] = (filePath[i] == oldFileSepChar ) ? newFileSepChar : filePath[i]; } } diff --git a/deps/icu-small/source/tools/toolutil/package.cpp b/deps/icu-small/source/tools/toolutil/package.cpp index d96c6dd36ddb41..f4e428a37e7153 100644 --- a/deps/icu-small/source/tools/toolutil/package.cpp +++ b/deps/icu-small/source/tools/toolutil/package.cpp @@ -610,7 +610,7 @@ Package::readPackage(const char *filename) { memcpy(prefix, s, ++prefixLength); // include the / } else { // Use the package basename as prefix. - int32_t inPkgNameLength=strlen(inPkgName); + int32_t inPkgNameLength= static_cast(strlen(inPkgName)); memcpy(prefix, inPkgName, inPkgNameLength); prefixLength=inPkgNameLength; @@ -1043,7 +1043,7 @@ Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOw memset(items+idx, 0, sizeof(Item)); // copy the item's name - items[idx].name=allocString(TRUE, strlen(name)); + items[idx].name=allocString(TRUE, static_cast(strlen(name))); strcpy(items[idx].name, name); pathToTree(items[idx].name); } else { diff --git a/deps/icu-small/source/tools/toolutil/swapimpl.cpp b/deps/icu-small/source/tools/toolutil/swapimpl.cpp index f3f333a005eb43..e8850cb9864168 100644 --- a/deps/icu-small/source/tools/toolutil/swapimpl.cpp +++ b/deps/icu-small/source/tools/toolutil/swapimpl.cpp @@ -243,7 +243,7 @@ uprops_swap(const UDataSwapper *ds, * swap the main properties UTrie * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) */ - utrie2_swapAnyVersion(ds, + utrie_swapAnyVersion(ds, inData32+UPROPS_INDEX_COUNT, 4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT), outData32+UPROPS_INDEX_COUNT, @@ -274,7 +274,7 @@ uprops_swap(const UDataSwapper *ds, * swap the additional UTrie * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties */ - utrie2_swapAnyVersion(ds, + utrie_swapAnyVersion(ds, inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], 4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]), outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX], @@ -336,7 +336,7 @@ ucase_swap(const UDataSwapper *ds, ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || - 2<=pInfo->formatVersion[0] || pInfo->formatVersion[0]<=4) + (2<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=4)) )) { udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], @@ -391,7 +391,7 @@ ucase_swap(const UDataSwapper *ds, /* swap the UTrie */ count=indexes[UCASE_IX_TRIE_SIZE]; - utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint16_t exceptions[] and unfold[] */ @@ -493,7 +493,7 @@ ubidi_swap(const UDataSwapper *ds, /* swap the UTrie */ count=indexes[UBIDI_IX_TRIE_SIZE]; - utrie2_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); + utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode); offset+=count; /* swap the uint32_t mirrors[] */ diff --git a/deps/icu-small/source/tools/toolutil/udbgutil.cpp b/deps/icu-small/source/tools/toolutil/udbgutil.cpp index dcc80ebe069519..285f68a0ec66a1 100644 --- a/deps/icu-small/source/tools/toolutil/udbgutil.cpp +++ b/deps/icu-small/source/tools/toolutil/udbgutil.cpp @@ -400,7 +400,7 @@ U_CAPI int32_t paramStatic(const USystemParams *param, char *target, int32_t targetCapacity, UErrorCode *status) { if(param->paramStr==NULL) return paramEmpty(param,target,targetCapacity,status); if(U_FAILURE(*status))return 0; - int32_t len = uprv_strlen(param->paramStr); + int32_t len = static_cast(uprv_strlen(param->paramStr)); if(target!=NULL) { uprv_strncpy(target,param->paramStr,uprv_min(len,targetCapacity)); } @@ -412,14 +412,14 @@ static const char *nullString = "(null)"; static int32_t stringToStringBuffer(char *target, int32_t targetCapacity, const char *str, UErrorCode *status) { if(str==NULL) str=nullString; - int32_t len = uprv_strlen(str); + int32_t len = static_cast(uprv_strlen(str)); if (U_SUCCESS(*status)) { if(target!=NULL) { uprv_strncpy(target,str,uprv_min(len,targetCapacity)); } } else { const char *s = u_errorName(*status); - len = uprv_strlen(s); + len = static_cast(uprv_strlen(s)); if(target!=NULL) { uprv_strncpy(target,s,uprv_min(len,targetCapacity)); } diff --git a/deps/icu-small/source/tools/toolutil/unewdata.cpp b/deps/icu-small/source/tools/toolutil/unewdata.cpp index 5c28e992c9a98b..22d8540881f454 100644 --- a/deps/icu-small/source/tools/toolutil/unewdata.cpp +++ b/deps/icu-small/source/tools/toolutil/unewdata.cpp @@ -61,17 +61,17 @@ udata_create(const char *dir, const char *type, const char *name, length = 0; /* Start with nothing */ if(dir != NULL && *dir !=0) /* Add directory length if one was given */ { - length += strlen(dir); + length += static_cast(strlen(dir)); /* Add 1 if dir doesn't end with path sep */ if (dir[strlen(dir) - 1]!= U_FILE_SEP_CHAR) { length++; } } - length += strlen(name); /* Add the filename length */ + length += static_cast(strlen(name)); /* Add the filename length */ if(type != NULL && *type !=0) { /* Add directory length if given */ - length += strlen(type); + length += static_cast(strlen(type)); } diff --git a/deps/icu-small/source/tools/toolutil/writesrc.cpp b/deps/icu-small/source/tools/toolutil/writesrc.cpp index c05a07acd3aa34..1a1dd3964d34a2 100644 --- a/deps/icu-small/source/tools/toolutil/writesrc.cpp +++ b/deps/icu-small/source/tools/toolutil/writesrc.cpp @@ -22,13 +22,14 @@ #include #include "unicode/utypes.h" #include "unicode/putil.h" +#include "unicode/ucptrie.h" #include "utrie2.h" #include "cstring.h" #include "writesrc.h" static FILE * usrc_createWithHeader(const char *path, const char *filename, - const char *generator, const char *header) { + const char *header, const char *generator) { char buffer[1024]; const char *p; char *q; @@ -71,20 +72,34 @@ usrc_createWithHeader(const char *path, const char *filename, } U_CAPI FILE * U_EXPORT2 -usrc_create(const char *path, const char *filename, const char *generator) { - // TODO: Add parameter for the first year this file was generated, not before 2016. - static const char *header= - "// © 2016 and later: Unicode, Inc. and others.\n" - "// License & terms of use: http://www.unicode.org/copyright.html\n" - "//\n" - "// Copyright (C) 1999-2016, International Business Machines\n" - "// Corporation and others. All Rights Reserved.\n" - "//\n" - "// file name: %s\n" - "//\n" - "// machine-generated by: %s\n" - "\n\n"; - return usrc_createWithHeader(path, filename, generator, header); +usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) { + const char *header; + char buffer[200]; + if(copyrightYear<=2016) { + header= + "// © 2016 and later: Unicode, Inc. and others.\n" + "// License & terms of use: http://www.unicode.org/copyright.html\n" + "//\n" + "// Copyright (C) 1999-2016, International Business Machines\n" + "// Corporation and others. All Rights Reserved.\n" + "//\n" + "// file name: %s\n" + "//\n" + "// machine-generated by: %s\n" + "\n\n"; + } else { + sprintf(buffer, + "// © %d and later: Unicode, Inc. and others.\n" + "// License & terms of use: http://www.unicode.org/copyright.html\n" + "//\n" + "// file name: %%s\n" + "//\n" + "// machine-generated by: %%s\n" + "\n\n", + (int)copyrightYear); + header=buffer; + } + return usrc_createWithHeader(path, filename, header, generator); } U_CAPI FILE * U_EXPORT2 @@ -100,7 +115,7 @@ usrc_createTextData(const char *path, const char *filename, const char *generato "#\n" "# machine-generated by: %s\n" "\n\n"; - return usrc_createWithHeader(path, filename, generator, header); + return usrc_createWithHeader(path, filename, header, generator); } U_CAPI void U_EXPORT2 @@ -228,6 +243,68 @@ usrc_writeUTrie2Struct(FILE *f, } } +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieArrays(FILE *f, + const char *indexPrefix, const char *dataPrefix, + const UCPTrie *pTrie, + const char *postfix) { + usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, postfix); + int32_t width= + pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; + usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, postfix); +} + +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieStruct(FILE *f, + const char *prefix, + const UCPTrie *pTrie, + const char *indexName, const char *dataName, + const char *postfix) { + if(prefix!=NULL) { + fputs(prefix, f); + } + fprintf( + f, + " %s,\n" // index + " { %s },\n", // data (union) + indexName, + dataName); + fprintf( + f, + " %ld, %ld,\n" // indexLength, dataLength + " 0x%lx, 0x%x,\n" // highStart, shifted12HighStart + " %d, %d,\n" // type, valueWidth + " 0, 0,\n" // reserved32, reserved16 + " 0x%x, 0x%lx,\n" // index3NullOffset, dataNullOffset + " 0x%lx,\n", // nullValue + (long)pTrie->indexLength, (long)pTrie->dataLength, + (long)pTrie->highStart, pTrie->shifted12HighStart, + pTrie->type, pTrie->valueWidth, + pTrie->index3NullOffset, (long)pTrie->dataNullOffset, + (long)pTrie->nullValue); + if(postfix!=NULL) { + fputs(postfix, f); + } +} + +U_CAPI void U_EXPORT2 +usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie) { + int32_t width= + pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 : + pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0; + char line[100], line2[100], line3[100]; + sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", name); + sprintf(line2, "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name); + usrc_writeUCPTrieArrays(f, line, line2, pTrie, "\n};\n\n"); + sprintf(line, "static const UCPTrie %s_trie={\n", name); + sprintf(line2, "%s_trieIndex", name); + sprintf(line3, "%s_trieData", name); + usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, "};\n\n"); +} + U_CAPI void U_EXPORT2 usrc_writeArrayOfMostlyInvChars(FILE *f, const char *prefix, diff --git a/deps/icu-small/source/tools/toolutil/writesrc.h b/deps/icu-small/source/tools/toolutil/writesrc.h index fdcf1f9a6b40f3..35ba256793c03a 100644 --- a/deps/icu-small/source/tools/toolutil/writesrc.h +++ b/deps/icu-small/source/tools/toolutil/writesrc.h @@ -23,6 +23,7 @@ #include #include "unicode/utypes.h" +#include "unicode/ucptrie.h" #include "utrie2.h" /** @@ -30,7 +31,7 @@ * Writes a C/Java-style comment with the generator name. */ U_CAPI FILE * U_EXPORT2 -usrc_create(const char *path, const char *filename, const char *generator); +usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator); /** * Creates a source text file and writes a header comment with the ICU copyright. @@ -75,6 +76,33 @@ usrc_writeUTrie2Struct(FILE *f, const char *indexName, const char *dataName, const char *postfix); +/** + * Calls usrc_writeArray() for the index and data arrays of a UCPTrie. + */ +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieArrays(FILE *f, + const char *indexPrefix, const char *dataPrefix, + const UCPTrie *pTrie, + const char *postfix); + +/** + * Writes the UCPTrie struct values. + * The {} and declaration etc. need to be included in prefix/postfix or + * printed before and after the array contents. + */ +U_CAPI void U_EXPORT2 +usrc_writeUCPTrieStruct(FILE *f, + const char *prefix, + const UCPTrie *pTrie, + const char *indexName, const char *dataName, + const char *postfix); + +/** + * Writes the UCPTrie arrays and struct values. + */ +U_CAPI void U_EXPORT2 +usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie); + /** * Writes the contents of an array of mostly invariant characters. * Characters 0..0x1f are printed as numbers, diff --git a/tools/icu/current_ver.dep b/tools/icu/current_ver.dep index 5e439928ebb540..ce0d89485ab312 100644 --- a/tools/icu/current_ver.dep +++ b/tools/icu/current_ver.dep @@ -1,6 +1,14 @@ [ { - "url": "https://sourceforge.net/projects/icu/files/ICU4C/62.1/icu4c-62_1-src.zip", - "md5": "408854f7b9b58311b68fab4b4dfc80be" + "url": "https://github.com/unicode-org/icu/releases/download/release-63-1/icu4c-63_1-src.zip", + "md5": "d25bc38089db64668fd86f34b4aa9b93" + }, + { + "url": "https://sourceforge.net/projects/icu/files/ICU4C/63.1/icu4c-63_1-src.zip", + "md5": "d25bc38089db64668fd86f34b4aa9b93" + }, + { + "url": "https://download.icu-project.org/files/icu4c/63.1/icu4c-63_1-src.zip", + "md5": "d25bc38089db64668fd86f34b4aa9b93" } ]