Skip to content

Commit

Permalink
ICU-22122 Support Locale Tags (ms, mu and rg)
Browse files Browse the repository at this point in the history
See #2182
  • Loading branch information
younies authored and markusicu committed Sep 20, 2022
1 parent f5367be commit dbfe830
Show file tree
Hide file tree
Showing 15 changed files with 361 additions and 67 deletions.
2 changes: 1 addition & 1 deletion icu4c/source/i18n/number_usageprefs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ UsagePrefsHandler::UsagePrefsHandler(const Locale &locale,
const StringPiece usage,
const MicroPropsGenerator *parent,
UErrorCode &status)
: fUnitsRouter(inputUnit, StringPiece(locale.getCountry()), usage, status),
: fUnitsRouter(inputUnit, locale, usage, status),
fParent(parent) {
}

Expand Down
103 changes: 89 additions & 14 deletions icu4c/source/i18n/units_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@

#if !UCONFIG_NO_FORMATTING

#include "bytesinkutil.h"
#include "cstring.h"
#include "number_decimalquantity.h"
#include "resource.h"
#include "uassert.h"
#include "unicode/locid.h"
#include "unicode/unistr.h"
#include "unicode/ures.h"
#include "units_data.h"
Expand Down Expand Up @@ -387,24 +389,97 @@ U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
}

// TODO: make outPreferences const?
//
// TODO: consider replacing `UnitPreference **&outPreferences` with slice class
// of some kind.
void U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
StringPiece region,
const UnitPreference *const *&outPreferences,
int32_t &preferenceCount, UErrorCode &status) const {
int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status);
CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) {
CharString result;
if (U_FAILURE(status)) { return result; }
{
CharStringByteSink sink(&result);
locale.getKeywordValue(kw, sink, status);
}
if (U_SUCCESS(status) && result.isEmpty()) {
status = U_MISSING_RESOURCE_ERROR;
}
return result;
}

MaybeStackVector<UnitPreference>
U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
const Locale &locale, UErrorCode &status) const {

MaybeStackVector<UnitPreference> result;

// TODO: remove this once all the categories are allowed.
UErrorCode internalMuStatus = U_ZERO_ERROR;
if (category.compare("temperature") == 0) {
CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus);
if (U_SUCCESS(internalMuStatus)) {
// TODO: use the unit category as Java especially when all the categories are allowed..
if (localeUnitCharString == "celsius" //
|| localeUnitCharString == "fahrenheit" //
|| localeUnitCharString == "kelvin" //
) {
UnitPreference unitPref;
unitPref.unit.append(localeUnitCharString, status);
result.emplaceBackAndCheckErrorCode(status, unitPref);
return result;
}
}
}

CharString region(locale.getCountry(), status);

// Check the locale system tag, e.g `ms=metric`.
UErrorCode internalMeasureTagStatus = U_ZERO_ERROR;
CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus);
bool isLocaleSystem = false;
if (U_SUCCESS(internalMeasureTagStatus)) {
if (localeSystem == "metric") {
region.clear();
region.append("001", status);
isLocaleSystem = true;
} else if (localeSystem == "ussystem") {
region.clear();
region.append("US", status);
isLocaleSystem = true;
} else if (localeSystem == "uksystem") {
region.clear();
region.append("GB", status);
isLocaleSystem = true;
}
}

// Check the region tag, e.g. `rg=uszzz`.
if (!isLocaleSystem) {
UErrorCode internalRgTagStatus = U_ZERO_ERROR;
CharString localeRegion = getKeyWordValue(locale, "rg", internalRgTagStatus);
if (U_SUCCESS(internalRgTagStatus) && localeRegion.length() >= 3) {
if (localeRegion == "default") {
region.clear();
region.append(localeRegion, status);
} else if (localeRegion[0] >= '0' && localeRegion[0] <= '9') {
region.clear();
region.append(localeRegion.data(), 3, status);
} else {
// Take the first two character and capitalize them.
region.clear();
region.append(uprv_toupper(localeRegion[0]), status);
region.append(uprv_toupper(localeRegion[1]), status);
}
}
}

int32_t idx =
getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
if (U_FAILURE(status)) {
outPreferences = nullptr;
preferenceCount = 0;
return;
return result;
}

U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
const UnitPreferenceMetadata *m = metadata_[idx];
outPreferences = unitPrefs_.getAlias() + m->prefsOffset;
preferenceCount = m->prefsCount;
for (int32_t i = 0; i < m->prefsCount; i++) {
result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset]));
}
return result;
}

} // namespace units
Expand Down
16 changes: 11 additions & 5 deletions icu4c/source/i18n/units_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ struct U_I18N_API UnitPreference : public UMemory {
CharString unit;
double geq;
UnicodeString skeleton;

UnitPreference(const UnitPreference &other) {
UErrorCode status = U_ZERO_ERROR;
this->unit.append(other.unit, status);
this->geq = other.geq;
this->skeleton = other.skeleton;
}
};

/**
Expand Down Expand Up @@ -189,12 +196,11 @@ class U_I18N_API UnitPreferences {
* @param preferenceCount The number of unit preferences that belong to the
* result set.
* @param status Receives status.
*
* TODO(hugovdm): maybe replace `UnitPreference **&outPreferences` with a slice class?
*/
void getPreferencesFor(StringPiece category, StringPiece usage, StringPiece region,
const UnitPreference *const *&outPreferences, int32_t &preferenceCount,
UErrorCode &status) const;
MaybeStackVector<UnitPreference> getPreferencesFor(StringPiece category, StringPiece usage,
const Locale &locale,

UErrorCode &status) const;

protected:
// Metadata about the sets of preferences, this is the index for looking up
Expand Down
29 changes: 13 additions & 16 deletions icu4c/source/i18n/units_router.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,17 @@ Precision UnitsRouter::parseSkeletonToPrecision(icu::UnicodeString precisionSkel
return result;
}

UnitsRouter::UnitsRouter(StringPiece inputUnitIdentifier, StringPiece region, StringPiece usage,
UnitsRouter::UnitsRouter(StringPiece inputUnitIdentifier, const Locale &locale, StringPiece usage,
UErrorCode &status) {
this->init(MeasureUnit::forIdentifier(inputUnitIdentifier, status), region, usage, status);
this->init(MeasureUnit::forIdentifier(inputUnitIdentifier, status), locale, usage, status);
}

UnitsRouter::UnitsRouter(const MeasureUnit &inputUnit, StringPiece region, StringPiece usage,
UnitsRouter::UnitsRouter(const MeasureUnit &inputUnit, const Locale &locale, StringPiece usage,
UErrorCode &status) {
this->init(std::move(inputUnit), region, usage, status);
this->init(std::move(inputUnit), locale, usage, status);
}

void UnitsRouter::init(const MeasureUnit &inputUnit, StringPiece region, StringPiece usage,
void UnitsRouter::init(const MeasureUnit &inputUnit, const Locale &locale, StringPiece usage,
UErrorCode &status) {

if (U_FAILURE(status)) {
Expand All @@ -73,22 +73,19 @@ void UnitsRouter::init(const MeasureUnit &inputUnit, StringPiece region, StringP
return;
}

const UnitPreference *const *unitPreferences;
int32_t preferencesCount = 0;
prefs.getPreferencesFor(category.toStringPiece(), usage, region, unitPreferences, preferencesCount,
status);

for (int i = 0; i < preferencesCount; ++i) {
U_ASSERT(unitPreferences[i] != nullptr);
const auto &preference = *unitPreferences[i];
const MaybeStackVector<UnitPreference> unitPrefs =
prefs.getPreferencesFor(category.toStringPiece(), usage, locale, status);
for (int32_t i = 0, n = unitPrefs.length(); i < n; ++i) {
U_ASSERT(unitPrefs[i] != nullptr);
const auto preference = unitPrefs[i];

MeasureUnitImpl complexTargetUnitImpl =
MeasureUnitImpl::forIdentifier(preference.unit.data(), status);
MeasureUnitImpl::forIdentifier(preference->unit.data(), status);
if (U_FAILURE(status)) {
return;
}

UnicodeString precision = preference.skeleton;
UnicodeString precision = preference->skeleton;

// For now, we only have "precision-increment" in Units Preferences skeleton.
// Therefore, we check if the skeleton starts with "precision-increment" and force the program to
Expand All @@ -103,7 +100,7 @@ void UnitsRouter::init(const MeasureUnit &inputUnit, StringPiece region, StringP
outputUnits_.emplaceBackAndCheckErrorCode(status,
complexTargetUnitImpl.copy(status).build(status));
converterPreferences_.emplaceBackAndCheckErrorCode(status, inputUnitImpl, complexTargetUnitImpl,
preference.geq, std::move(precision),
preference->geq, std::move(precision),
conversionRates, status);

if (U_FAILURE(status)) {
Expand Down
8 changes: 5 additions & 3 deletions icu4c/source/i18n/units_router.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include "cmemory.h"
#include "measunit_impl.h"
#include "unicode/locid.h"
#include "unicode/measunit.h"
#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
Expand Down Expand Up @@ -118,9 +119,10 @@ namespace units {
*/
class U_I18N_API UnitsRouter {
public:
UnitsRouter(StringPiece inputUnitIdentifier, StringPiece locale, StringPiece usage,
UnitsRouter(StringPiece inputUnitIdentifier, const Locale &locale, StringPiece usage,
UErrorCode &status);
UnitsRouter(const MeasureUnit &inputUnit, const Locale &locale, StringPiece usage,
UErrorCode &status);
UnitsRouter(const MeasureUnit &inputUnit, StringPiece locale, StringPiece usage, UErrorCode &status);

/**
* Performs locale and usage sensitive unit conversion.
Expand Down Expand Up @@ -153,7 +155,7 @@ class U_I18N_API UnitsRouter {
static number::Precision parseSkeletonToPrecision(icu::UnicodeString precisionSkeleton,
UErrorCode &status);

void init(const MeasureUnit &inputUnit, StringPiece locale, StringPiece usage, UErrorCode &status);
void init(const MeasureUnit &inputUnit, const Locale &locale, StringPiece usage, UErrorCode &status);
};

} // namespace units
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/numbertest.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ class NumberFormatterApiTest : public IntlTestWithFieldPosition {
void unitGender();
void unitNotConvertible();
void unitPercent();
void unitLocaleTags();
void percentParity();
void roundingFraction();
void roundingFigures();
Expand Down
85 changes: 85 additions & 0 deletions icu4c/source/test/intltest/numbertest_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ void NumberFormatterApiTest::runIndexedTest(int32_t index, UBool exec, const cha
TESTCASE_AUTO(unitNounClass);
TESTCASE_AUTO(unitNotConvertible);
TESTCASE_AUTO(unitPercent);
TESTCASE_AUTO(unitLocaleTags);
if (!quick) {
// Slow test: run in exhaustive mode only
TESTCASE_AUTO(percentParity);
Expand Down Expand Up @@ -2948,6 +2949,90 @@ void NumberFormatterApiTest::unitPercent() {
u"50 meters per percent");
}

void NumberFormatterApiTest::unitLocaleTags() {
IcuTestErrorCode status(*this, "unitLocaleTags");

const struct TestCase {
const UnicodeString message;
const char *locale;
const char *inputUnit;
const double inputValue;
const char *usage;
const char *expectedOutputUnit;
const double expectedOutputValue;
const UnicodeString expectedFormattedNumber;
} cases[] = {
// Test without any tag behaviour
{u"Test the locale without any addition and without usage", "en-US", "celsius", 0, nullptr,
"celsius", 0.0, u"0 degrees Celsius"},
{u"Test the locale without any addition and usage", "en-US", "celsius", 0, "default",
"fahrenheit", 32.0, u"32 degrees Fahrenheit"},

// Test the behaviour of the `mu` tag.
{u"Test the locale with mu = celsius and without usage", "en-US-u-mu-celsius", "fahrenheit", 0,
nullptr, "fahrenheit", 0.0, u"0 degrees Fahrenheit"},
{u"Test the locale with mu = celsius and with usage", "en-US-u-mu-celsius", "fahrenheit", 0,
"default", "celsius", -18.0, u"-18 degrees Celsius"},
{u"Test the locale with mu = calsius (wrong spelling) and with usage", "en-US-u-mu-calsius",
"fahrenheit", 0, "default", "fahrenheit", 0.0, u"0 degrees Fahrenheit"},
{u"Test the locale with mu = meter (only temprature units are supported) and with usage",
"en-US-u-mu-meter", "foot", 0, "default", "inch", 0.0, u"0 inches"},

// Test the behaviour of the `ms` tag
{u"Test the locale with ms = metric and without usage", "en-US-u-ms-metric", "fahrenheit", 0,
nullptr, "fahrenheit", 0.0, u"0 degrees Fahrenheit"},
{u"Test the locale with ms = metric and with usage", "en-US-u-ms-metric", "fahrenheit", 0,
"default", "celsius", -18, u"-18 degrees Celsius"},
{u"Test the locale with ms = Matric (wrong spelling) and with usage", "en-US-u-ms-Matric",
"fahrenheit", 0, "default", "fahrenheit", 0.0, u"0 degrees Fahrenheit"},

// Test the behaviour of the `rg` tag
{u"Test the locale with rg = UK and without usage", "en-US-u-rg-ukzzzz", "fahrenheit", 0,
nullptr, "fahrenheit", 0.0, u"0 degrees Fahrenheit"},
{u"Test the locale with rg = UK and with usage", "en-US-u-rg-ukzzzz", "fahrenheit", 0, "default",
"celsius", -18, u"-18 degrees Celsius"},
{"Test the locale with mu = fahrenheit and without usage", "en-US-u-mu-fahrenheit", "celsius", 0,
nullptr, "celsius", 0.0, "0 degrees Celsius"},
{"Test the locale with mu = fahrenheit and with usage", "en-US-u-mu-fahrenheit", "celsius", 0,
"default", "fahrenheit", 32.0, "32 degrees Fahrenheit"},
{u"Test the locale with rg = UKOI and with usage", "en-US-u-rg-ukoizzzz", "fahrenheit", 0,
"default", "celsius", -18.0, u"-18 degrees Celsius"},

// Test the priorities
{u"Test the locale with mu,ms,rg --> mu tag wins", "en-US-u-mu-celsius-ms-ussystem-rg-uszzzz",
"celsius", 0, "default", "celsius", 0.0, u"0 degrees Celsius"},
{u"Test the locale with ms,rg --> ms tag wins", "en-US-u-ms-metric-rg-uszzzz", "foot", 1,
"default", "centimeter", 30.0, u"30 centimeters"},
};

for (const auto &testCase : cases) {
UnicodeString message = testCase.message;
Locale locale(testCase.locale);
auto inputUnit = MeasureUnit::forIdentifier(testCase.inputUnit, status);
auto inputValue = testCase.inputValue;
auto usage = testCase.usage;
auto expectedOutputUnit = MeasureUnit::forIdentifier(testCase.expectedOutputUnit, status);
UnicodeString expectedFormattedNumber = testCase.expectedFormattedNumber;

auto nf = NumberFormatter::with()
.locale(locale) //
.unit(inputUnit) //
.unitWidth(UNUM_UNIT_WIDTH_FULL_NAME); //
if (usage != nullptr) {
nf = nf.usage(usage);
}
auto fn = nf.formatDouble(inputValue, status);
if (status.errIfFailureAndReset()) {
continue;
}

assertEquals(message, fn.toString(status), expectedFormattedNumber);
// TODO: ICU-22154
// assertEquals(message, fn.getOutputUnit(status).getIdentifier(),
// expectedOutputUnit.getIdentifier());
}
}

void NumberFormatterApiTest::percentParity() {
IcuTestErrorCode status(*this, "percentParity");
UnlocalizedNumberFormatter uNoUnitPercent = NumberFormatter::with().unit(NoUnit::percent());
Expand Down
Loading

1 comment on commit dbfe830

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Performance Alert ⚠️

Possible performance regression was detected for benchmark.
Benchmark result of this commit is worse than the previous benchmark result exceeding threshold 2.

Benchmark suite Current: dbfe830 Previous: f5367be Ratio
TestScan1 101.1714 ns/iter 49.151 ns/iter 2.06

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.