Skip to content

Commit

Permalink
Unicoset Perf fix #2
Browse files Browse the repository at this point in the history
Cherry-pick another upstream patch to improve the performance
of UnicodeSet.

unicode-org/icu#278

Bug: 899983,901532
Test: Android webview start-up perf and Windows perf graph
Change-Id: I2c2a22a398883178734a2c2a6c5975d9551b039e
TBR=ftang@chromium.org
Reviewed-on: https://chromium-review.googlesource.com/c/1335799
Reviewed-by: Jungshik Shin <jshin@chromium.org>
  • Loading branch information
jungshik committed Nov 14, 2018
1 parent ecae5c0 commit d13a96f
Show file tree
Hide file tree
Showing 12 changed files with 2,034 additions and 430 deletions.
1 change: 1 addition & 0 deletions README.chromium
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,4 @@ D. Local Modifications
https://unicode-org.atlassian.net/browse/ICU-20250
- Fix:
https://github.com/unicode-org/icu/pull/265
https://github.com/unicode-org/icu/pull/278
1,607 changes: 1,607 additions & 0 deletions patches/uniset_perf2.patch

Large diffs are not rendered by default.

99 changes: 72 additions & 27 deletions source/common/characterproperties.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,23 @@
#include "umutex.h"
#include "uprops.h"

using icu::LocalPointer;
using icu::Normalizer2Factory;
using icu::Normalizer2Impl;
using icu::UInitOnce;
using icu::UnicodeSet;

namespace {

UBool U_CALLCONV characterproperties_cleanup();

constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;

struct Inclusion {
UnicodeSet *fSet;
UInitOnce fInitOnce;
};
Inclusion gInclusions[UPROPS_SRC_COUNT]; // cached getInclusions()
Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()

UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};

Expand Down Expand Up @@ -80,43 +85,29 @@ UBool U_CALLCONV characterproperties_cleanup() {
return TRUE;
}

} // namespace

U_NAMESPACE_BEGIN

/*
Reduce excessive reallocation, and make it easier to detect initialization problems.
Usually you don't see smaller sets than this for Unicode 5.0.
*/
constexpr int32_t DEFAULT_INCLUSION_CAPACITY = 3072;

void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCode &errorCode) {
void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
// This function is a friend of class UnicodeSet.

U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
if (src == UPROPS_SRC_NONE) {
errorCode = U_INTERNAL_PROGRAM_ERROR;
return;
}
UnicodeSet * &incl = gInclusions[src].fSet;
U_ASSERT(incl == nullptr);
U_ASSERT(gInclusions[src].fSet == nullptr);

incl = new UnicodeSet();
if (incl == nullptr) {
LocalPointer<UnicodeSet> incl(new UnicodeSet());
if (incl.isNull()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
USetAdder sa = {
(USet *)incl,
(USet *)incl.getAlias(),
_set_add,
_set_addRange,
_set_addString,
nullptr, // don't need remove()
nullptr // don't need removeRange()
};

incl->ensureCapacity(DEFAULT_INCLUSION_CAPACITY, errorCode);
switch(src) {
case UPROPS_SRC_CHAR:
uchar_addPropertyStarts(&sa, &errorCode);
Expand Down Expand Up @@ -183,12 +174,15 @@ void U_CALLCONV CharacterProperties::initInclusion(UPropertySource src, UErrorCo
}

if (U_FAILURE(errorCode)) {
delete incl;
incl = nullptr;
return;
}
// Compact for caching
if (incl->isBogus()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
// Compact for caching.
incl->compact();
gInclusions[src].fSet = incl.orphan();
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
}

Expand All @@ -199,15 +193,66 @@ const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorC
return nullptr;
}
Inclusion &i = gInclusions[src];
umtx_initOnce(i.fInitOnce, &CharacterProperties::initInclusion, src, errorCode);
umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
return i.fSet;
}

void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
// This function is invoked only via umtx_initOnce().
U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
UPropertySource src = uprops_getSource(prop);
const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
if (U_FAILURE(errorCode)) {
return;
}

LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
if (intPropIncl.isNull()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
int32_t numRanges = incl->getRangeCount();
int32_t prevValue = 0;
for (int32_t i = 0; i < numRanges; ++i) {
UChar32 rangeEnd = incl->getRangeEnd(i);
for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
// TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
int32_t value = u_getIntPropertyValue(c, prop);
if (value != prevValue) {
intPropIncl->add(c);
prevValue = value;
}
}
}

if (intPropIncl->isBogus()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return;
}
// Compact for caching.
intPropIncl->compact();
gInclusions[inclIndex].fSet = intPropIncl.orphan();
ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
}

} // namespace

U_NAMESPACE_BEGIN

const UnicodeSet *CharacterProperties::getInclusionsForProperty(
UProperty prop, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
UPropertySource src = uprops_getSource(prop);
return getInclusionsForSource(src, errorCode);
if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
Inclusion &i = gInclusions[inclIndex];
umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
return i.fSet;
} else {
UPropertySource src = uprops_getSource(prop);
return getInclusionsForSource(src, errorCode);
}
}

U_NAMESPACE_END
Expand All @@ -216,7 +261,7 @@ namespace {

UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
if (U_FAILURE(errorCode)) { return nullptr; }
icu::LocalPointer<UnicodeSet> set(new UnicodeSet());
LocalPointer<UnicodeSet> set(new UnicodeSet());
if (set.isNull()) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
Expand Down
33 changes: 22 additions & 11 deletions source/common/ucptrie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ UChar32 getRange(const void *t, UChar32 start,
int32_t prevI3Block = -1;
int32_t prevBlock = -1;
UChar32 c = start;
uint32_t value;
uint32_t trieValue, value;
bool haveValue = false;
do {
int32_t i3Block;
Expand Down Expand Up @@ -319,6 +319,7 @@ UChar32 getRange(const void *t, UChar32 start,
return c - 1;
}
} else {
trieValue = trie->nullValue;
value = nullValue;
if (pValue != nullptr) { *pValue = nullValue; }
haveValue = true;
Expand Down Expand Up @@ -357,30 +358,40 @@ UChar32 getRange(const void *t, UChar32 start,
return c - 1;
}
} else {
trieValue = trie->nullValue;
value = nullValue;
if (pValue != nullptr) { *pValue = nullValue; }
haveValue = true;
}
c = (c + dataBlockLength) & ~dataMask;
} else {
int32_t di = block + (c & dataMask);
uint32_t value2 = getValue(trie->data, valueWidth, di);
value2 = maybeFilterValue(value2, trie->nullValue, nullValue,
filter, context);
uint32_t trieValue2 = getValue(trie->data, valueWidth, di);
if (haveValue) {
if (value2 != value) {
return c - 1;
if (trieValue2 != trieValue) {
if (filter == nullptr ||
maybeFilterValue(trieValue2, trie->nullValue, nullValue,
filter, context) != value) {
return c - 1;
}
trieValue = trieValue2; // may or may not help
}
} else {
value = value2;
trieValue = trieValue2;
value = maybeFilterValue(trieValue2, trie->nullValue, nullValue,
filter, context);
if (pValue != nullptr) { *pValue = value; }
haveValue = true;
}
while ((++c & dataMask) != 0) {
if (maybeFilterValue(getValue(trie->data, valueWidth, ++di),
trie->nullValue, nullValue,
filter, context) != value) {
return c - 1;
trieValue2 = getValue(trie->data, valueWidth, ++di);
if (trieValue2 != trieValue) {
if (filter == nullptr ||
maybeFilterValue(trieValue2, trie->nullValue, nullValue,
filter, context) != value) {
return c - 1;
}
trieValue = trieValue2; // may or may not help
}
}
}
Expand Down
43 changes: 29 additions & 14 deletions source/common/umutablecptrie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,41 +304,56 @@ UChar32 MutableCodePointTrie::getRange(
uint32_t nullValue = initialValue;
if (filter != nullptr) { nullValue = filter(context, nullValue); }
UChar32 c = start;
uint32_t value;
uint32_t trieValue, value;
bool haveValue = false;
int32_t i = c >> UCPTRIE_SHIFT_3;
do {
if (flags[i] == ALL_SAME) {
uint32_t value2 = maybeFilterValue(index[i], initialValue, nullValue,
filter, context);
uint32_t trieValue2 = index[i];
if (haveValue) {
if (value2 != value) {
return c - 1;
if (trieValue2 != trieValue) {
if (filter == nullptr ||
maybeFilterValue(trieValue2, initialValue, nullValue,
filter, context) != value) {
return c - 1;
}
trieValue = trieValue2; // may or may not help
}
} else {
value = value2;
trieValue = trieValue2;
value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
if (pValue != nullptr) { *pValue = value; }
haveValue = true;
}
c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
} else /* MIXED */ {
int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
uint32_t value2 = maybeFilterValue(data[di], initialValue, nullValue,
filter, context);
uint32_t trieValue2 = data[di];
if (haveValue) {
if (value2 != value) {
return c - 1;
if (trieValue2 != trieValue) {
if (filter == nullptr ||
maybeFilterValue(trieValue2, initialValue, nullValue,
filter, context) != value) {
return c - 1;
}
trieValue = trieValue2; // may or may not help
}
} else {
value = value2;
trieValue = trieValue2;
value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
if (pValue != nullptr) { *pValue = value; }
haveValue = true;
}
while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
if (maybeFilterValue(data[++di], initialValue, nullValue,
filter, context) != value) {
return c - 1;
trieValue2 = data[++di];
if (trieValue2 != trieValue) {
if (filter == nullptr ||
maybeFilterValue(trieValue2, initialValue, nullValue,
filter, context) != value) {
return c - 1;
}
}
trieValue = trieValue2; // may or may not help
}
}
++i;
Expand Down
Loading

0 comments on commit d13a96f

Please sign in to comment.