-
Notifications
You must be signed in to change notification settings - Fork 182
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Compute collation fallback supplement from CLDR #4720
Compute collation fallback supplement from CLDR #4720
Conversation
@@ -4948,7 +4948,7 @@ displaynames/variants@1, th, 3709B, ed1d48e400b1c35c | |||
displaynames/variants@1, tr, 1094B, f75a0ab8f0d0bc98 | |||
fallback/likelysubtags@1, und, 1963B, ea452a518b2defb6 | |||
fallback/parents@1, und, 2553B, 8f3b5fb7bff27d97 | |||
fallback/supplement/co@1, und, 70B, 71e01f2099eca417 | |||
fallback/supplement/co@1, und, 246B, 78151ac4e67429c5 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is that size increase expected?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes if you look at the json it adds a handful of parent relationships
@@ -457,8 +457,7 @@ mod tests { | |||
requires_data: true, | |||
extension_key: None, | |||
fallback_supplement: Some(LocaleFallbackSupplement::Collation), | |||
// TODO(#1964): add "zh" as a target. | |||
expected_language_chain: &["yue-HK", "yue", "zh-Hant"], | |||
expected_language_chain: &["yue-HK", "yue", "zh-Hant", "zh"], |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
static VALUES: [&<icu::collator::provider::CollationReorderingV1Marker as icu_provider::DataMarker>::Yokeable; 50usize] = [&AM, &AR, &AS, &AZ, &BE, &BE, &AS, &AZ, &BE, &CHR, &EL, &AR, &GU, &HE, &HI, &AZ, &HY, &JA, &KA, &BE, &KM, &KN, &KO, &HI, &KU, &BE, &LO, &BE, &ML, &MN, &HI, &MY, &NE, &OR, &PA, &AR, &BE, &SI, &BE, &AZ, &TA, &TE, &TH, &AR, &BE, &AR, &ZH, &ZH_U_CO_STROKE, &ZH_U_CO_STROKE, &ZH_U_CO_STROKE]; | ||
static KEYS: [&str; 50usize] = ["am", "ar", "as", "az", "be", "bg", "bn", "bs", "bs-Cyrl", "chr", "el", "fa", "gu", "he", "hi", "hr", "hy", "ja", "ka", "kk", "km", "kn", "ko", "kok", "ku", "ky", "lo", "mk", "ml", "mn", "mr", "my", "ne", "or", "pa", "ps", "ru", "si", "sr", "sr-Latn", "ta", "te", "th", "ug", "uk", "ur", "zh", "zh-u-co-stroke", "zh-u-co-unihan", "zh-u-co-zhuyin"]; | ||
static YUE_U_CO_STROKE: <icu::collator::provider::CollationReorderingV1Marker as icu_provider::DataMarker>::Yokeable = icu::collator::provider::CollationReorderingV1 { min_high_no_reorder: 4261412864u32, reorder_table: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b"\0\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0B\x0C\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'(\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDD\xDE\xDF\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\0\xF9\xFA'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xFE\xFF") }, reorder_ranges: unsafe { zerovec::ZeroVec::from_bytes_unchecked(b",\0v{}\0\0~\xA9\xFF\0\xFE") } }; | ||
static VALUES: [&<icu::collator::provider::CollationReorderingV1Marker as icu_provider::DataMarker>::Yokeable; 56usize] = [&AM, &AR, &AS, &AZ, &BE, &BE, &AS, &AZ, &BE, &CHR, &EL, &AR, &GU, &HE, &HI, &AZ, &HY, &JA, &KA, &BE, &KM, &KN, &KO, &HI, &KU, &BE, &LO, &BE, &ML, &MN, &HI, &MY, &NE, &OR, &PA, &AR, &BE, &SI, &BE, &AZ, &TA, &TE, &TH, &AR, &BE, &AR, &YUE_U_CO_STROKE, &YUE_U_CO_STROKE, &YUE_U_CO_STROKE, &ZH, &YUE_U_CO_STROKE, &YUE_U_CO_STROKE, &YUE_U_CO_STROKE, &YUE_U_CO_STROKE, &YUE_U_CO_STROKE, &YUE_U_CO_STROKE]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this seems wrong
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I think this is most likely because unicode_extension_defaults
is not being used anywhere, so all of these would inherit zh
== pinyin
if allowed to fall back.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Want to replace this with the other logic we sketched out last week but this is a step in the right direction
#1964