Skip to content

Commit

Permalink
Compute collation fallback supplement from CLDR (#4720)
Browse files Browse the repository at this point in the history
  • Loading branch information
robertbastian authored Mar 26, 2024
1 parent 9b6d8ee commit 8fe90f1
Show file tree
Hide file tree
Showing 13 changed files with 117 additions and 54 deletions.
3 changes: 1 addition & 2 deletions components/locid_transform/src/fallback/algorithms.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,8 +457,7 @@ mod tests {
requires_data: true,
extension_key: None,
fallback_supplement: Some(LocaleFallbackSupplement::Collation),
// TODO(#1964): add "zh" as a target.
expected_language_chain: &["yue-HK", "yue", "zh-Hant"],
expected_language_chain: &["yue-HK", "yue", "zh-Hant", "zh"],
expected_region_chain: &["yue-HK", "und-HK"],
},
];
Expand Down
Binary file modified provider/adapters/tests/data/blob.postcard
Binary file not shown.
10 changes: 5 additions & 5 deletions provider/baked/collator/data/macros/collator_data_v1.rs.data

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions provider/baked/collator/data/macros/collator_meta_v1.rs.data

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions provider/baked/collator/data/macros/collator_reord_v1.rs.data

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion provider/core/src/fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,14 @@ pub struct LocaleFallbackConfig {
/// .fallback_for(locale!("yue-HK").into());
///
/// // Run the algorithm and check the results.
/// // TODO(#1964): add "zh" as a target.
/// assert_eq!(fallback_iterator.get(), &locale!("yue-HK").into());
/// fallback_iterator.step();
/// assert_eq!(fallback_iterator.get(), &locale!("yue").into());
/// fallback_iterator.step();
/// assert_eq!(fallback_iterator.get(), &locale!("zh-Hant").into());
/// fallback_iterator.step();
/// assert_eq!(fallback_iterator.get(), &locale!("zh").into());
/// fallback_iterator.step();
/// assert_eq!(fallback_iterator.get(), &locale!("und").into());
/// ```
pub fallback_supplement: Option<LocaleFallbackSupplement>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,13 @@
use icu_locid::LanguageIdentifier;
use serde::Deserialize;
use std::collections::HashMap;
use std::collections::{BTreeMap, HashMap};

#[derive(PartialEq, Debug, Deserialize)]
pub struct ParentLocales {
#[serde(rename = "parentLocale")]
pub parent_locale: HashMap<LanguageIdentifier, LanguageIdentifier>,
pub collations: BTreeMap<String, LanguageIdentifier>,
}

#[derive(PartialEq, Debug, Deserialize)]
Expand Down
37 changes: 0 additions & 37 deletions provider/datagen/src/transform/cldr/fallback/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ use crate::transform::cldr::cldr_serde;

use super::locale_canonicalizer::likely_subtags::LikelySubtagsResources;
use icu_locid::{
extensions::unicode::{key, Key},
langid,
subtags::{Language, Region, Script},
LanguageIdentifier,
};
Expand Down Expand Up @@ -53,35 +51,6 @@ impl DataProvider<LocaleFallbackParentsV1Marker> for crate::DatagenProvider {
}
}

impl DataProvider<CollationFallbackSupplementV1Marker> for crate::DatagenProvider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<CollationFallbackSupplementV1Marker>, DataError> {
self.check_req::<CollationFallbackSupplementV1Marker>(req)?;
// TODO(#1964): Load this data from its proper sources. For now, it is copied from:
// https://github.com/unicode-org/icu/blob/main/tools/cldr/cldr-to-icu/build-icu-data.xml
// as well as from CLDR XML.
#[allow(clippy::type_complexity)]
let parents_list: [(&UnvalidatedStr, (Language, Option<Script>, Option<Region>));
1] = [
("yue".into(), (&langid!("zh-Hant")).into()), //
];
let unicode_extension_defaults_list: [(Key, &UnvalidatedStr, &UnvalidatedStr); 2] = [
(key!("co"), "zh".into(), "pinyin".into()),
(key!("co"), "zh-Hant".into(), "stroke".into()),
];
let data = LocaleFallbackSupplementV1 {
parents: parents_list.into_iter().collect(),
unicode_extension_defaults: unicode_extension_defaults_list.into_iter().collect(),
};
Ok(DataResponse {
metadata: Default::default(),
payload: Some(DataPayload::from_owned(data)),
})
}
}

impl IterableDataProvider<LocaleFallbackLikelySubtagsV1Marker> for crate::DatagenProvider {
fn supported_locales(&self) -> Result<Vec<DataLocale>, DataError> {
Ok(vec![Default::default()])
Expand All @@ -94,12 +63,6 @@ impl IterableDataProvider<LocaleFallbackParentsV1Marker> for crate::DatagenProvi
}
}

impl IterableDataProvider<CollationFallbackSupplementV1Marker> for crate::DatagenProvider {
fn supported_locales(&self) -> Result<Vec<DataLocale>, DataError> {
Ok(vec![Default::default()])
}
}

fn transform<'x>(
it: impl Iterator<Item = (&'x LanguageIdentifier, &'x LanguageIdentifier)> + 'x,
) -> LocaleFallbackLikelySubtagsV1<'static> {
Expand Down
59 changes: 59 additions & 0 deletions provider/datagen/src/transform/icuexport/collator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,76 @@ use icu_collections::codepointtrie::CodePointTrie;
use icu_locid::extensions::unicode::key;
use icu_locid::extensions::unicode::Value;
use icu_locid::subtags::language;
use icu_locid::subtags::Language;
use icu_locid::subtags::Region;
use icu_locid::subtags::Script;
use icu_locid::LanguageIdentifier;
use icu_locid::Locale;
use icu_locid_transform::provider::CollationFallbackSupplementV1Marker;
use icu_locid_transform::provider::LocaleFallbackSupplementV1;
use icu_provider::prelude::*;
use std::collections::HashSet;
use std::convert::TryFrom;
use std::str::FromStr;
use writeable::Writeable;
use zerovec::ule::UnvalidatedStr;
use zerovec::ZeroVec;

mod collator_serde;

impl DataProvider<CollationFallbackSupplementV1Marker> for crate::DatagenProvider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<CollationFallbackSupplementV1Marker>, DataError> {
self.check_req::<CollationFallbackSupplementV1Marker>(req)?;

let data = LocaleFallbackSupplementV1 {
parents: self
.cldr()?
.core()
.read_and_parse::<super::super::cldr::cldr_serde::parent_locales::Resource>(
"supplemental/parentLocales.json",
)?
.supplemental
.parent_locales
.collations
.iter()
.map(|(from, to)| {
(
<&UnvalidatedStr>::from(from.as_str()),
<(Language, Option<Script>, Option<Region>)>::from(to),
)
})
.collect(),
unicode_extension_defaults: [
(
key!("co"),
<&UnvalidatedStr>::from("zh"),
<&UnvalidatedStr>::from("pinyin"),
),
(
key!("co"),
<&UnvalidatedStr>::from("zh-Hant"),
<&UnvalidatedStr>::from("stroke"),
),
]
.into_iter()
.collect(),
};
Ok(DataResponse {
metadata: Default::default(),
payload: Some(DataPayload::from_owned(data)),
})
}
}

impl IterableDataProviderInternal<CollationFallbackSupplementV1Marker> for crate::DatagenProvider {
fn supported_locales_impl(&self) -> Result<HashSet<DataLocale>, DataError> {
Ok(HashSet::from_iter([Default::default()]))
}
}

impl crate::DatagenProvider {
/// Backward compatibility for https://unicode-org.atlassian.net/browse/CLDR-15603
fn has_legacy_swedish_variants(&self) -> bool {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 8fe90f1

Please sign in to comment.