Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17407 check for anomalies and fix #3522

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion common/properties/coverageLevels.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ th ; modern ; Thai
ti ; basic ; Tigrinya
tk ; modern ; Turkmen
to ; basic ; Tongan
tok ; basic ; Toki Pona
tr ; modern ; Turkish
tt ; basic ; Tatar
ug ; basic ; Uyghur
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ private Map<String, List<String>> getCodeData(CodeType type) {
return type_code_data.get(type);
}

public Set<String> getCodes(CodeType type) {
return type_code_data.get(type).keySet();
}

/**
* Get at the language registry values, as a Map from label to value.
*
Expand Down Expand Up @@ -275,7 +279,7 @@ public Set<String> getGoodAvailableCodes(CodeType type) {
case script:
return sd.getCLDRScriptCodes();
case tzid:
break; // nothing special
return sd.getCLDRTimezoneCodes();
default:
for (Iterator<String> it = result.iterator(); it.hasNext(); ) {
String code = it.next();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@

import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSetMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.impl.IterableComparator;
import com.ibm.icu.impl.Relation;
Expand Down Expand Up @@ -74,6 +77,7 @@
import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
import org.unicode.cldr.util.Rational.RationalParser;
import org.unicode.cldr.util.StandardCodes.CodeType;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.SupplementalDataInfo.BasicLanguageData.Type;
import org.unicode.cldr.util.SupplementalDataInfo.NumberingSystemInfo.NumberingSystemType;
Expand Down Expand Up @@ -981,6 +985,10 @@ public enum RBNFGroup {
public Map<Row.R2<String, String>, String> bcp47Since = new TreeMap<>();
public Map<Row.R2<String, String>, String> bcp47Preferred = new TreeMap<>();
public Map<Row.R2<String, String>, String> bcp47Deprecated = new TreeMap<>();

Map<String, Map<String, Bcp47KeyInfo>> bcp47KeyToSubtypeToInfo = new TreeMap<>();
Map<String, Map<String, String>> bcp47KeyToAliasToSubtype = new TreeMap<>();

public Map<String, String> bcp47ValueType = new TreeMap<>();

public Map<String, Row.R2<String, String>> validityInfo = new LinkedHashMap<>();
Expand Down Expand Up @@ -1145,6 +1153,34 @@ private SupplementalDataInfo(File directory) {
this.validity = Validity.getInstance(directory.toString() + "/../validity/");
} // hide

public static class Bcp47KeyInfo {
public Bcp47KeyInfo(
Set<String> aliases,
String description,
String since,
String preferred,
String deprecated) {
this.description = description;
this.deprecated = !(deprecated == null || deprecated.equals("false"));
this.preferred = preferred;
this.since = since == null ? null : VersionInfo.getInstance(since);
this.aliases = aliases;
}

final String description;
final VersionInfo since;
final String preferred;
final boolean deprecated;
final Set<String> aliases;

@Override
public String toString() {
return String.format(
"{description=«%s» since=%s preferred=%s deprecated=%s aliases=%s}",
description, since, preferred, deprecated, aliases);
}
}

private void makeStuffSafe() {
// now make stuff safe
allLanguages.addAll(languageToPopulation.keySet());
Expand Down Expand Up @@ -1224,19 +1260,54 @@ private void makeStuffSafe() {
}
typeToLocaleToDayPeriodInfo = CldrUtility.protectCollection(typeToLocaleToDayPeriodInfo);
languageMatch = CldrUtility.protectCollection(languageMatch);
bcp47Key2Subtypes.freeze();

bcp47Extension2Keys.freeze();
bcp47Aliases.freeze();
bcp47Key2Subtypes.freeze();
CldrUtility.protectCollection(bcp47ValueType);
if (bcp47Key2Subtypes.isEmpty()) {
throw new InternalError(
"No BCP47 key 2 subtype data was loaded from bcp47 dir "
+ getBcp47Directory().getAbsolutePath());
}

bcp47Aliases.freeze();
CldrUtility.protectCollection(bcp47Descriptions);
CldrUtility.protectCollection(bcp47Since);
CldrUtility.protectCollection(bcp47Preferred);
CldrUtility.protectCollection(bcp47Deprecated);
CldrUtility.protectCollection(bcp47ValueType);

// create clean structure

for (Entry<String, Set<String>> entry : bcp47Extension2Keys.keyValuesSet()) {
for (String key : entry.getValue()) {
Map<String, Bcp47KeyInfo> subtypeToInfo = bcp47KeyToSubtypeToInfo.get(key);
if (subtypeToInfo == null) {
bcp47KeyToSubtypeToInfo.put(key, subtypeToInfo = new TreeMap<>());
}
Map<String, String> aliasToRegular = bcp47KeyToAliasToSubtype.get(key);
if (aliasToRegular == null) {
bcp47KeyToAliasToSubtype.put(key, aliasToRegular = new TreeMap<>());
}
for (String subtype : bcp47Key2Subtypes.get(key)) {
final R2<String, String> pair = R2.of(key, subtype);
final Set<String> aliases = bcp47Aliases.get(pair);
final Bcp47KeyInfo info =
new Bcp47KeyInfo(
aliases,
bcp47Descriptions.get(pair),
bcp47Since.get(pair),
bcp47Preferred.get(pair),
bcp47Deprecated.get(pair));
subtypeToInfo.put(subtype, info);
final Map<String, String> aliasToRegularFinal = aliasToRegular;
if (aliases != null) {
aliases.forEach(x -> aliasToRegularFinal.put(x, subtype));
}
}
}
}
bcp47KeyToSubtypeToInfo = CldrUtility.protectCollection(bcp47KeyToSubtypeToInfo);
bcp47KeyToAliasToSubtype = CldrUtility.protectCollection(bcp47KeyToAliasToSubtype);

CoverageLevelInfo.fixEU(coverageLevels, this);
coverageLevels = Collections.unmodifiableSortedSet(coverageLevels);
Expand Down Expand Up @@ -5173,4 +5244,64 @@ public UnitPrefixInfo getUnitPrefixInfo(String prefix) {
public Set<String> getUnitPrefixes() {
return unitPrefixInfo.keySet();
}

/**
* Filter out deprecated items. This is more complicated than it seems. The deprecation is in
* timezones.xml, eg: <type name="cathu" description="Thunder Bay, Canada" deprecated="true"
* preferred="cator"/> <type name="cator" description="Toronto, Canada" alias="America/Toronto
* America/Montreal Canada/Eastern America/Nipigon America/Thunder_Bay"/> We need to find the
* short id's that are deprecated, put there is a problem due to
* https://unicode-org.atlassian.net/browse/CLDR-17412.
*
* <p>America/Nipigon, America/Thunder_Bay, America/Rainy_River
*/
Supplier<Set<String>> goodTimezones =
Suppliers.memoize(
new Supplier<Set<String>>() {

@Override
public Set<String> get() {
Set<String> availableLongTz = sc.getAvailableCodes(CodeType.tzid);
Set<String> result = null;
if (true) { // hack for now
final Set<String> hack =
Set.of(
"America/Santa_Isabel",
"Australia/Currie",
"America/Yellowknife",
"America/Rainy_River",
"America/Thunder_Bay",
"America/Nipigon",
"America/Pangnirtung",
"Europe/Uzhgorod",
"Europe/Zaporozhye",
"Pacific/Johnston");
result = Set.copyOf(Sets.difference(availableLongTz, hack));
} else { // TODO restore when CLDR-17412 is fixed
Map<String, String> aliasToRegular =
bcp47KeyToAliasToSubtype.get("tz");
Map<String, Bcp47KeyInfo> subtypeToInfo =
bcp47KeyToSubtypeToInfo.get("tz");
result =
availableLongTz.stream()
.filter(
x -> {
String shortId = aliasToRegular.get(x);
Bcp47KeyInfo info =
subtypeToInfo.get(shortId);
System.out.println(
String.format(
"%s %s %s",
x, shortId, info));
return !info.deprecated;
})
.collect(Collectors.toUnmodifiableSet());
}
return result;
}
});

public Set<String> getCLDRTimezoneCodes() {
return goodTimezones.get();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ Google ; nn ; Modern ; Nynorsk
Google ; no ; modern ; T2 Norwegian (Bokmål)
Google ; or ; modern ; T5 Odia
Google ; pa ; modern ; T4.1 Punjabi
Google ; pcm ; modern ; Nigerian Pidgin
Google ; pcm ; moderate ; Nigerian Pidgin
Google ; pl ; modern ; T1 Polish
Google ; ps ; modern ; T5 Pashto
Google ; pt ; modern ; T1 Brazilian Portuguese
Expand Down Expand Up @@ -283,7 +283,7 @@ Apple ; kn ; modern
Apple ; ko ; modern
Apple ; lt ; modern
Apple ; lv ; modern
Apple ; mi ; modern
Apple ; mi ; moderate
Apple ; mk ; modern
Apple ; ml ; modern
Apple ; mr ; modern
Expand Down Expand Up @@ -485,7 +485,7 @@ Cldr ; cv ; basic
Cldr ; en_AU ; modern
Cldr ; es_MX ; modern
Cldr ; fr_CA ; modern
Cldr ; mi ; modern
Cldr ; mi ; moderate
Cldr ; zh_Hant_HK ; modern

#Cldr other (from Google)
Expand Down Expand Up @@ -516,7 +516,7 @@ Cldr ; su ; basic ; Sundanese (script TBD)
Cldr ; ks_Deva ; basic ; Kashmiri (Devanagari)
Cldr ; sd_Deva ; basic ; Sindhi (Devanagari script)
# Cldr ; cad ; basic ; Caddo
Cldr ; pcm ; modern ; Nigerian Pidgin
Cldr ; pcm ; moderate ; Nigerian Pidgin

Cldr ; bgc ; basic ; Haryanvi
Cldr ; bho ; basic ; Bhojpuri
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -840,14 +840,12 @@ public void TestFallbackFormat() {

public void Test4897() {
ExampleGenerator exampleGenerator = getExampleGenerator("it");
final CLDRFile cldrFile = exampleGenerator.getCldrFile();
for (String xpath :
With.in(
exampleGenerator
.getCldrFile()
.iterator(
"//ldml/dates/timeZoneNames",
exampleGenerator.getCldrFile().getComparator()))) {
String value = exampleGenerator.getCldrFile().getStringValue(xpath);
cldrFile.iterator(
"//ldml/dates/timeZoneNames", cldrFile.getComparator()))) {
String value = cldrFile.getStringValue(xpath);
String actual = exampleGenerator.getExampleHtml(xpath, value);
if (actual == null) {
if (!xpath.contains("singleCountries") && !xpath.contains("gmtZeroFormat")) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package org.unicode.cldr.util;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.Set;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

Expand Down Expand Up @@ -30,4 +34,12 @@ void testTargetCoverageLevel(final String locale, final String level) {
"Expected getTargetCoverageLevel(%s)=%s but was %s",
locale, expectLevel, actualLevel));
}

@Test
void testTimezoneExclusions() {
SupplementalDataInfo sdi = SupplementalDataInfo.getInstance();
Set<String> timezones = sdi.getCLDRTimezoneCodes();
assertTrue(timezones.contains("Europe/Andorra"));
assertFalse(timezones.contains("America/Nipigon"));
}
}
Loading