diff --git a/Cargo.lock b/Cargo.lock index 48ca866dc1e..8bca0e07f74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -855,14 +855,17 @@ dependencies = [ name = "icu_datetime" version = "0.1.0" dependencies = [ + "bincode", "criterion", "icu_benchmark_macros", "icu_locid", "icu_locid_macros", "icu_provider", "icu_testdata", + "litemap", "serde", "serde_json", + "smallvec", "tinystr", "writeable", ] @@ -944,12 +947,14 @@ dependencies = [ "icu_provider", "icu_testdata", "json", + "litemap", "log", "mktemp", "reqwest", "serde", "serde-tuple-vec-map", "serde_json", + "smallvec", "tinystr", "unzip", "urlencoding", diff --git a/components/datetime/Cargo.toml b/components/datetime/Cargo.toml index 73ab04a1972..be42f742a00 100644 --- a/components/datetime/Cargo.toml +++ b/components/datetime/Cargo.toml @@ -27,8 +27,10 @@ skip_optional_dependencies = true icu_locid = { version = "0.1", path = "../locid" } icu_provider = { version = "0.1", path = "../provider" } writeable = { version = "0.2", path = "../../utils/writeable" } +litemap = { version = "0.1.1", path = "../../utils/litemap" } tinystr = { version = "0.4.1" } serde = { version = "1.0", features = ["derive"], optional = true } +smallvec = "1.4" [dev-dependencies] criterion = "0.3" @@ -38,6 +40,7 @@ icu_testdata = { version = "0.1", path = "../../resources/testdata" } icu_locid_macros = { version = "0.1", path = "../locid/macros" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +bincode = "1.3" [lib] bench = false # This option is required for Benchmark CI @@ -45,7 +48,7 @@ bench = false # This option is required for Benchmark CI [features] default = ["provider_serde"] bench = [] -provider_serde = ["serde"] +provider_serde = ["serde", "litemap/serde"] serialize_none = [] [[bench]] diff --git a/components/datetime/src/fields/length.rs b/components/datetime/src/fields/length.rs index 847a1a73051..469f1c604d5 100644 --- a/components/datetime/src/fields/length.rs +++ b/components/datetime/src/fields/length.rs @@ -2,14 +2,21 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use std::convert::TryFrom; +use std::{ + cmp::{Ord, PartialOrd}, + convert::TryFrom, +}; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum LengthError { TooLong, } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum FieldLength { One = 1, TwoDigit = 2, diff --git a/components/datetime/src/fields/mod.rs b/components/datetime/src/fields/mod.rs index bfb4d83a27f..fa6c56d208c 100644 --- a/components/datetime/src/fields/mod.rs +++ b/components/datetime/src/fields/mod.rs @@ -3,26 +3,40 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). mod length; -mod symbols; +pub(crate) mod symbols; -pub use length::FieldLength; +pub use length::{FieldLength, LengthError}; pub use symbols::*; -use std::convert::{TryFrom, TryInto}; +use std::{ + cmp::{Ord, PartialOrd}, + convert::{TryFrom, TryInto}, + fmt, +}; #[derive(Debug)] pub enum Error { TooLong(FieldSymbol), } -#[derive(Debug, PartialEq, Clone, Copy)] +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::TooLong(symbol) => write!(f, "field {:?} is too long", symbol), + } + } +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub struct Field { pub symbol: FieldSymbol, pub length: FieldLength, } -impl Field {} - impl From<(FieldSymbol, FieldLength)> for Field { fn from(input: (FieldSymbol, FieldLength)) -> Self { Self { diff --git a/components/datetime/src/fields/symbols.rs b/components/datetime/src/fields/symbols.rs index 66f2150e4d1..dae218a29c0 100644 --- a/components/datetime/src/fields/symbols.rs +++ b/components/datetime/src/fields/symbols.rs @@ -2,9 +2,9 @@ // called LICENSE at the top level of the ICU4X source tree // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). -use std::convert::TryFrom; +use std::{cmp::Ordering, convert::TryFrom}; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum SymbolError { /// Unknown field symbol Unknown(u8), @@ -12,7 +12,11 @@ pub enum SymbolError { Invalid(char), } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum FieldSymbol { Year(Year), Month(Month), @@ -24,6 +28,43 @@ pub enum FieldSymbol { Second(Second), } +impl FieldSymbol { + /// Skeletons are a Vec, and represent the Fields that can be used to match to a + /// specific pattern. The order of the Vec does not affect the Pattern that is output. + /// However, it's more performant when matching these fields, and it's more deterministic + /// when serializing them to present them in a consistent order. + /// + /// This ordering is taken by the order of the fields listed in the [UTS 35 Date Field Symbol Table] + /// (https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table), and are generally + /// ordered most significant to least significant. + /// + fn get_canonical_order(&self) -> u8 { + match self { + FieldSymbol::Year(Year::Calendar) => 0, + FieldSymbol::Year(Year::WeekOf) => 1, + FieldSymbol::Month(Month::Format) => 2, + FieldSymbol::Month(Month::StandAlone) => 3, + FieldSymbol::Day(Day::DayOfMonth) => 4, + FieldSymbol::Day(Day::DayOfYear) => 5, + FieldSymbol::Day(Day::DayOfWeekInMonth) => 6, + FieldSymbol::Day(Day::ModifiedJulianDay) => 7, + FieldSymbol::Weekday(Weekday::Format) => 8, + FieldSymbol::Weekday(Weekday::Local) => 9, + FieldSymbol::Weekday(Weekday::StandAlone) => 10, + FieldSymbol::DayPeriod(DayPeriod::AmPm) => 11, + FieldSymbol::DayPeriod(DayPeriod::NoonMidnight) => 12, + FieldSymbol::Hour(Hour::H11) => 13, + FieldSymbol::Hour(Hour::H12) => 14, + FieldSymbol::Hour(Hour::H23) => 15, + FieldSymbol::Hour(Hour::H24) => 16, + FieldSymbol::Minute => 17, + FieldSymbol::Second(Second::Second) => 18, + FieldSymbol::Second(Second::FractionalSecond) => 19, + FieldSymbol::Second(Second::Millisecond) => 20, + } + } +} + impl TryFrom for FieldSymbol { type Error = SymbolError; fn try_from(b: u8) -> Result { @@ -53,7 +94,65 @@ impl TryFrom for FieldSymbol { } } -#[derive(Debug, PartialEq, Clone, Copy)] +impl From for char { + fn from(symbol: FieldSymbol) -> Self { + match symbol { + FieldSymbol::Year(year) => match year { + Year::Calendar => 'y', + Year::WeekOf => 'Y', + }, + FieldSymbol::Month(month) => match month { + Month::Format => 'M', + Month::StandAlone => 'L', + }, + FieldSymbol::Day(day) => match day { + Day::DayOfMonth => 'd', + Day::DayOfYear => 'D', + Day::DayOfWeekInMonth => 'F', + Day::ModifiedJulianDay => 'g', + }, + FieldSymbol::Weekday(weekday) => match weekday { + Weekday::Format => 'E', + Weekday::Local => 'e', + Weekday::StandAlone => 'c', + }, + FieldSymbol::DayPeriod(dayperiod) => match dayperiod { + DayPeriod::AmPm => 'a', + DayPeriod::NoonMidnight => 'b', + }, + FieldSymbol::Hour(hour) => match hour { + Hour::H11 => 'K', + Hour::H12 => 'h', + Hour::H23 => 'H', + Hour::H24 => 'k', + }, + FieldSymbol::Minute => 'm', + FieldSymbol::Second(second) => match second { + Second::Second => 's', + Second::FractionalSecond => 'S', + Second::Millisecond => 'A', + }, + } + } +} + +impl PartialOrd for FieldSymbol { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for FieldSymbol { + fn cmp(&self, other: &Self) -> Ordering { + self.get_canonical_order().cmp(&other.get_canonical_order()) + } +} + +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum Year { Calendar, WeekOf, @@ -76,7 +175,11 @@ impl From for FieldSymbol { } } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum Month { Format, StandAlone, @@ -99,7 +202,11 @@ impl From for FieldSymbol { } } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum Day { DayOfMonth, DayOfYear, @@ -126,7 +233,11 @@ impl From for FieldSymbol { } } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum Hour { H11, H12, @@ -153,7 +264,11 @@ impl From for FieldSymbol { } } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum Second { Second, FractionalSecond, @@ -178,7 +293,11 @@ impl From for FieldSymbol { } } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum Weekday { Format, Local, @@ -203,7 +322,11 @@ impl From for FieldSymbol { } } -#[derive(Debug, PartialEq, Clone, Copy)] +#[derive(Debug, Eq, PartialEq, Clone, Copy)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum DayPeriod { AmPm, NoonMidnight, diff --git a/components/datetime/src/lib.rs b/components/datetime/src/lib.rs index a35bb4bb2e7..75a0d08b4c1 100644 --- a/components/datetime/src/lib.rs +++ b/components/datetime/src/lib.rs @@ -83,6 +83,7 @@ pub mod options; #[doc(hidden)] pub mod pattern; pub mod provider; +pub mod skeleton; use crate::provider::helpers::DateTimePatterns; use date::DateTimeInput; diff --git a/components/datetime/src/pattern/error.rs b/components/datetime/src/pattern/error.rs index a824d20e24b..9ca49af91d6 100644 --- a/components/datetime/src/pattern/error.rs +++ b/components/datetime/src/pattern/error.rs @@ -3,6 +3,7 @@ // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). use crate::fields; +use std::fmt; #[derive(Debug, PartialEq)] pub enum Error { @@ -12,6 +13,22 @@ pub enum Error { UnclosedPlaceholder, } +/// These strings follow the recommendations for the serde::de::Unexpected::Other type. +/// https://docs.serde.rs/serde/de/enum.Unexpected.html#variant.Other +/// +/// Serde will generate an error such as: +/// "invalid value: unclosed literal in pattern, expected a valid UTS 35 pattern string at line 1 column 12" +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Error::FieldTooLong(symbol) => write!(f, "{:?} field too long in pattern", symbol), + Error::UnknownSubstitution(ch) => write!(f, "unknown substitution {} in pattern", ch), + Error::UnclosedLiteral => write!(f, "unclosed literal in pattern"), + Error::UnclosedPlaceholder => write!(f, "unclosed placeholder in pattern"), + } + } +} + impl From for Error { fn from(input: fields::Error) -> Self { match input { diff --git a/components/datetime/src/pattern/mod.rs b/components/datetime/src/pattern/mod.rs index 794e9afaa74..e59dd74195d 100644 --- a/components/datetime/src/pattern/mod.rs +++ b/components/datetime/src/pattern/mod.rs @@ -8,10 +8,21 @@ mod parser; use crate::fields::{self, Field, FieldLength, FieldSymbol}; pub use error::Error; use parser::Parser; -use std::convert::TryFrom; -use std::iter::FromIterator; +use std::{convert::TryFrom, fmt}; +use std::{fmt::Write, iter::FromIterator}; + +#[cfg(feature = "provider_serde")] +use serde::{ + de, + ser::{self, SerializeSeq}, + Deserialize, Deserializer, Serialize, +}; #[derive(Debug, PartialEq, Clone)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub enum PatternItem { Field(fields::Field), Literal(String), @@ -52,6 +63,10 @@ impl<'p> From for PatternItem { /// The granularity of time represented in a pattern item. /// Ordered from least granular to most granular for comparsion. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) +)] pub(super) enum TimeGranularity { Hours, Minutes, @@ -108,8 +123,159 @@ impl From> for Pattern { } } +/// This trait is implemented in order to provide the machinery to convert a `Pattern` to a UTS 35 +/// pattern string. It could also be implemented as the Writeable trait, but at the time of writing +/// this was not done, as this code would need to implement the `write_len` method, which would +/// need to duplicate the branching logic of the `fmt` method here. This code is used in generating +/// the data providers and is not as performance sensitive. +impl fmt::Display for Pattern { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + for pattern_item in self.items().iter() { + match pattern_item { + PatternItem::Field(field) => { + let ch: char = field.symbol.into(); + for _ in 0..field.length as usize { + formatter.write_char(ch)?; + } + } + PatternItem::Literal(literal) => { + // Determine if the literal contains any characters that would need to be escaped. + let mut needs_escaping = false; + for ch in literal.chars() { + if ch.is_ascii_alphabetic() || ch == '\'' { + needs_escaping = true; + break; + } + } + + if needs_escaping { + let mut ch_iter = literal.trim_end().chars().peekable(); + + // Do not escape the leading whitespace. + while let Some(ch) = ch_iter.peek() { + if ch.is_whitespace() { + formatter.write_char(*ch)?; + ch_iter.next(); + } else { + break; + } + } + + // Wrap in "'" and escape "'". + formatter.write_char('\'')?; + for ch in ch_iter { + if ch == '\'' { + // Escape a single quote. + formatter.write_char('\\')?; + } + formatter.write_char(ch)?; + } + formatter.write_char('\'')?; + + // Add the trailing whitespace + for ch in literal.chars().rev() { + if ch.is_whitespace() { + formatter.write_char(ch)?; + } else { + break; + } + } + } else { + formatter.write_str(literal)?; + } + } + } + } + Ok(()) + } +} + impl FromIterator for Pattern { fn from_iter>(iter: I) -> Self { Self::from(iter.into_iter().collect::>()) } } + +#[cfg(feature = "provider_serde")] +struct DeserializePatternUTS35String; + +#[cfg(feature = "provider_serde")] +impl<'de> de::Visitor<'de> for DeserializePatternUTS35String { + type Value = Pattern; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "Expected to find a valid pattern.") + } + + fn visit_str(self, pattern_string: &str) -> Result + where + E: de::Error, + { + // Parse a string into a list of fields. + Pattern::from_bytes(pattern_string).map_err(|err| { + de::Error::invalid_value( + de::Unexpected::Other(&format!("{}", err)), + &"a valid UTS 35 pattern string", + ) + }) + } +} + +#[cfg(feature = "provider_serde")] +struct DeserializePatternBincode; + +#[cfg(feature = "provider_serde")] +impl<'de> de::Visitor<'de> for DeserializePatternBincode { + type Value = Pattern; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "Unable to deserialize a bincode Pattern.") + } + + fn visit_seq(self, mut seq: V) -> Result + where + V: de::SeqAccess<'de>, + { + let mut items = Vec::new(); + while let Some(item) = seq.next_element()? { + items.push(item) + } + Ok(Pattern::from(items)) + } +} + +#[cfg(feature = "provider_serde")] +impl<'de> Deserialize<'de> for Pattern { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + deserializer.deserialize_str(DeserializePatternUTS35String) + } else { + deserializer.deserialize_seq(DeserializePatternBincode) + } + } +} + +#[cfg(feature = "provider_serde")] +impl Serialize for Pattern { + fn serialize(&self, serializer: S) -> Result + where + S: ser::Serializer, + { + if serializer.is_human_readable() { + // Serialize into the UTS 35 string representation. + let string: String = self.to_string(); + serializer.serialize_str(&string) + } else { + // Serialize into a bincode-friendly representation. This means that pattern parsing + // will not be needed when deserializing. + let mut seq = serializer.serialize_seq(Some(self.items.len()))?; + for item in self.items.iter() { + seq.serialize_element(item)?; + } + seq.end() + } + } +} diff --git a/components/datetime/src/provider/helpers.rs b/components/datetime/src/provider/helpers.rs index b1086028b6d..bdd016e0ed2 100644 --- a/components/datetime/src/provider/helpers.rs +++ b/components/datetime/src/provider/helpers.rs @@ -89,10 +89,10 @@ impl DateTimePatterns for provider::gregory::PatternsV1 { ) -> Result { let date_time = &self.date_time; let s = match style { - style::Date::Full => &date_time.full, - style::Date::Long => &date_time.long, - style::Date::Medium => &date_time.medium, - style::Date::Short => &date_time.short, + style::Date::Full => &date_time.style_patterns.full, + style::Date::Long => &date_time.style_patterns.long, + style::Date::Medium => &date_time.style_patterns.medium, + style::Date::Short => &date_time.style_patterns.short, }; Ok(Pattern::from_bytes_combination(s, date, time)?) } diff --git a/components/datetime/src/provider/mod.rs b/components/datetime/src/provider/mod.rs index 7b85ace1ace..db9e9a09716 100644 --- a/components/datetime/src/provider/mod.rs +++ b/components/datetime/src/provider/mod.rs @@ -50,7 +50,7 @@ pub mod gregory { pub time: patterns::StylePatternsV1, - pub date_time: patterns::StylePatternsV1, + pub date_time: patterns::DateTimeFormatsV1, } macro_rules! symbols { @@ -182,6 +182,13 @@ pub mod gregory { pub mod patterns { use super::*; + use crate::{ + pattern::{self, Pattern}, + skeleton::{Skeleton, SkeletonError}, + }; + use litemap::LiteMap; + use std::convert::TryFrom; + #[derive(Debug, PartialEq, Clone, Default)] #[cfg_attr( feature = "provider_serde", @@ -193,5 +200,77 @@ pub mod gregory { pub medium: Cow<'static, str>, pub short: Cow<'static, str>, } + + /// This struct is a public wrapper around the internal Pattern struct. This allows + /// access to the serialization and deserialization capabilities, without exposing the + /// internals of the pattern machinery. + /// + /// The Pattern is an "exotic type" in the serialization process, and handles its own + /// custom serialization practices. + #[derive(Debug, PartialEq, Clone, Default)] + #[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) + )] + pub struct PatternV1(pub Pattern); + + impl From for PatternV1 { + fn from(pattern: Pattern) -> Self { + Self(pattern) + } + } + + impl TryFrom<&str> for PatternV1 { + type Error = pattern::Error; + + fn try_from(pattern_string: &str) -> Result { + let pattern = Pattern::from_bytes(pattern_string); + match pattern { + Ok(pattern) => Ok(PatternV1::from(pattern)), + Err(err) => Err(err), + } + } + } + + /// This struct is a public wrapper around the internal Skeleton struct. This allows + /// access to the serialization and deserialization capabilities, without exposing the + /// internals of the skeleton machinery. + /// + /// The Skeleton is an "exotic type" in the serialization process, and handles its own + /// custom serialization practices. + #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] + #[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) + )] + pub struct SkeletonV1(pub Skeleton); + + impl TryFrom<&str> for SkeletonV1 { + type Error = SkeletonError; + + fn try_from(skeleton_string: &str) -> Result { + match Skeleton::try_from(skeleton_string) { + Ok(skeleton) => Ok(SkeletonV1(skeleton)), + Err(err) => Err(err), + } + } + } + + #[derive(Debug, PartialEq, Clone, Default)] + #[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) + )] + pub struct SkeletonsV1(pub LiteMap); + + #[derive(Debug, PartialEq, Clone, Default)] + #[cfg_attr( + feature = "provider_serde", + derive(serde::Serialize, serde::Deserialize) + )] + pub struct DateTimeFormatsV1 { + pub style_patterns: StylePatternsV1, + pub skeletons: SkeletonsV1, + } } } diff --git a/components/datetime/src/skeleton.rs b/components/datetime/src/skeleton.rs new file mode 100644 index 00000000000..4a1072119df --- /dev/null +++ b/components/datetime/src/skeleton.rs @@ -0,0 +1,471 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +//! Skeletons are used for pattern matching. See the [`Skeleton`] struct for more information. + +use smallvec::SmallVec; +use std::{convert::TryFrom, fmt}; + +use crate::fields::{self, Field, FieldLength, FieldSymbol}; + +#[cfg(feature = "provider_serde")] +use serde::{ + de, + ser::{self, SerializeSeq}, + Deserialize, Deserializer, Serialize, +}; + +#[derive(Debug, PartialEq)] +struct FieldIndex(usize); + +/// A `Skeleton` is used to represent what types of `Field`s are present in a `Pattern`. The +/// ordering of the `Skeleton`'s `Field`s have no bearing on the ordering of the `Field`s and +/// `Literal`s in the `Pattern`. +/// +/// A `Skeleton` is a `Vec`, but with the invariant that it is sorted according to the canonical +/// sort order. This order is sorted according to the most significant `Field` to the least significant. +/// For example, a field with a `Minute` symbol would preceed a field with a `Second` symbol. +/// This order is documented as the order of fields as presented in the +/// [UTS 35 Date Field Symbol Table](https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table) +/// +/// The `Field`s are only sorted in the `Skeleton` in order to provide a deterministic +/// serialization strategy, and to provide a faster `Skeleton` matching operation. +#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd)] +pub struct Skeleton(SmallVec<[fields::Field; 5]>); + +impl Skeleton { + fn fields_iter<'a>(&'a self) -> impl Iterator + 'a { + self.0.iter() + } + + fn fields_len(&self) -> usize { + self.0.len() + } +} + +/// This is an implementation of the serde deserialization visitor pattern. +#[cfg(feature = "provider_serde")] +struct DeserializeSkeletonFieldsUTS35String; + +#[cfg(feature = "provider_serde")] +impl<'de> de::Visitor<'de> for DeserializeSkeletonFieldsUTS35String { + type Value = Skeleton; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "Expected to find a valid skeleton.") + } + + /// A skeleton serialized into a string follows UTS 35. + /// https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table + /// This string consists of a symbol that is repeated N times. This string is + /// deserialized here into the Skeleton format which is used in memory + /// when working with formatting date times. + fn visit_str(self, skeleton_string: &str) -> Result + where + E: de::Error, + { + Skeleton::try_from(skeleton_string).map_err(|err| { + de::Error::invalid_value( + de::Unexpected::Other(&format!("{:?} {}", skeleton_string, err)), + &"field symbols representing a skeleton", + ) + }) + } +} + +#[cfg(feature = "provider_serde")] +struct DeserializeSkeletonBincode; + +#[cfg(feature = "provider_serde")] +impl<'de> de::Visitor<'de> for DeserializeSkeletonBincode { + type Value = Skeleton; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "Unable to deserialize a bincode Pattern.") + } + + fn visit_seq(self, mut seq: V) -> Result + where + V: de::SeqAccess<'de>, + { + let mut items: SmallVec<[fields::Field; 5]> = SmallVec::new(); + while let Some(item) = seq.next_element()? { + if let Some(prev_item) = items.last() { + if prev_item > &item { + return Err(de::Error::invalid_value( + de::Unexpected::Other(&format!("field item out of order: {:?}", item)), + &"ordered field symbols representing a skeleton", + )); + } + if prev_item == &item { + return Err(de::Error::invalid_value( + de::Unexpected::Other(&format!("duplicate field: {:?}", item)), + &"ordered field symbols representing a skeleton", + )); + } + } + items.push(item) + } + Ok(Skeleton(items)) + } +} + +#[cfg(feature = "provider_serde")] +impl<'de> Deserialize<'de> for Skeleton { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + if deserializer.is_human_readable() { + deserializer.deserialize_str(DeserializeSkeletonFieldsUTS35String) + } else { + deserializer.deserialize_seq(DeserializeSkeletonBincode) + } + } +} + +#[cfg(feature = "provider_serde")] +impl Serialize for Skeleton { + fn serialize(&self, serializer: S) -> Result + where + S: ser::Serializer, + { + if serializer.is_human_readable() { + // Serialize into the UTS 35 string representation. + let mut string = String::new(); + + for field in self.0.iter() { + let ch: char = field.symbol.into(); + for _ in 0..field.length as usize { + string.push(ch); + } + } + + serializer.serialize_str(&string) + } else { + // Serialize into a bincode-friendly representation. This means that pattern parsing + // will not be needed when deserializing. + let mut seq = serializer.serialize_seq(Some(self.fields_len()))?; + for item in self.fields_iter() { + seq.serialize_element(item)?; + } + seq.end() + } + } +} + +/// Parse a string into a list of fields. This trait implementation validates the input string to +/// verify that fields are correct. If the fields are out of order, this returns an error that +/// contains the fields, which gives the callee a chance to sort the fields with the +/// `From> for Skeleton` trait. +impl TryFrom<&str> for Skeleton { + type Error = SkeletonError; + fn try_from(skeleton_string: &str) -> Result { + let mut fields: SmallVec<[fields::Field; 5]> = SmallVec::new(); + + let mut iter = skeleton_string.bytes().peekable(); + while let Some(byte) = iter.next() { + // Convert the byte to a valid field symbol. + let field_symbol = FieldSymbol::try_from(byte)?; + + // Go through the bytes to count how often it's repeated. + let mut field_length: u8 = 1; + while let Some(next_byte) = iter.peek() { + if *next_byte != byte { + break; + } + field_length += 1; + iter.next(); + } + + let field = Field::from((field_symbol, FieldLength::try_from(field_length)?)); + + match fields.binary_search(&field) { + Ok(_) => return Err(SkeletonError::DuplicateField), + Err(pos) => fields.insert(pos, field), + } + } + + Ok(Skeleton(fields)) + } +} + +#[derive(Debug)] +pub enum SkeletonError { + FieldLengthTooLong, + DuplicateField, + SymbolUnknown(char), + SymbolInvalid(char), + SymbolUnimplemented(char), + UnimplementedField(char), + Fields(fields::Error), +} + +/// These strings follow the recommendations for the serde::de::Unexpected::Other type. +/// https://docs.serde.rs/serde/de/enum.Unexpected.html#variant.Other +/// +/// Serde will generate an error such as: +/// "invalid value: unclosed literal in pattern, expected a valid UTS 35 pattern string at line 1 column 12" +impl fmt::Display for SkeletonError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SkeletonError::FieldLengthTooLong => write!(f, "field too long in skeleton"), + SkeletonError::DuplicateField => write!(f, "duplicate field in skeleton"), + SkeletonError::SymbolUnknown(ch) => write!(f, "symbol unknown {} in skeleton", ch), + SkeletonError::SymbolInvalid(ch) => write!(f, "symbol invalid {} in skeleton", ch), + SkeletonError::SymbolUnimplemented(ch) => { + write!(f, "symbol unimplemented {} in skeleton", ch) + } + SkeletonError::UnimplementedField(ch) => { + write!(f, "unimplemented field {} in skeleton", ch) + } + SkeletonError::Fields(err) => write!(f, "{} in skeleton", err), + } + } +} + +impl From for SkeletonError { + fn from(fields_error: fields::Error) -> Self { + SkeletonError::Fields(fields_error) + } +} + +impl From for SkeletonError { + fn from(_: fields::LengthError) -> Self { + SkeletonError::FieldLengthTooLong + } +} + +impl From for SkeletonError { + fn from(symbol_error: fields::SymbolError) -> Self { + match symbol_error { + fields::SymbolError::Invalid(ch) => SkeletonError::SymbolInvalid(ch), + fields::SymbolError::Unknown(byte) => { + match byte { + // TODO(#487) - Flexible day periods + b'B' + // TODO(#486) - Era + | b'G' + // TODO(#418) - Timezones + | b'Z' | b'v' + // TODO(#502) - Week of month + | b'W' + // TODO(#501) - Quarters + | b'Q' + // TODO (#488) - Week of year + | b'w' + => SkeletonError::SymbolUnimplemented(byte.into()), + _ => SkeletonError::SymbolUnknown(byte.into()), + } + } + } + } +} + +#[cfg(all(test, feature = "provider_serde"))] +mod test { + use super::*; + use crate::fields::{Day, Field, FieldLength, Month, Weekday}; + + // These were all of the skeletons from the "available formats" in the CLDR as of 2021-01 + // Generated with: + // https://gist.github.com/gregtatum/1d76bbdb87132f71a969a10f0c1d2d9c + + #[rustfmt::skip] + const SUPPORTED_STRING_SKELETONS: [&str; 51] = [ + "E", "dEEEE", "EHm", "EHms", "dE", "Ehm", "Ehms", "H", "HHmm", "HHmmss", "Hm", "Hms", "M", + "MdEEEE", "MdE", "MMM", "MMMdEEEE", "MMMdE", "MMMM", "MMMMdEEEE", "MMMMdE", "MMMMd", + "MMMMdd", "MMMd", "MMMdd", "MMd", "MMdd", "Md", "Mdd", "d", "h", "hm", "hms", "mmss", "ms", + "y", "yM", "yMdEEEE", "yMdE", "yMM", "yMMM", "yMMMdEEEE", "yMMMdE", "yMMMM", "yMMMMdEEEE", + "yMMMMdE", "yMMMMdcccc", "yMMMMd", "yMMMd", "yMMdd", "yMd", + ]; + + #[rustfmt::skip] + const UNSUPPORTED_STRING_SKELETONS: [&str; 28] = [ + // TODO(#487) - Flexible day periods + "Bh", "Bhm", "Bhms", "EBhm", "EBhms", + // TODO(#486) - Era + "Gy", "GyM", "GyMMM", "GyMMMdEEEE", "GyMMMdE", "GyMMMM", "GyMMMMdE", "GyMMMMd", "GyMMMd", + // TODO(#418) - Timezones + "HHmmZ", "Hmsv", "Hmsvvvv", "Hmv", "Hmvvvv", "hmsv", "hmsvvvv", "hmv", "hmvvvv", + // TODO(#502) - Week of month + "MMMMW", + // TODO(#501) - Quarters + "yQ", "yQQQ", "yQQQQ", + // TODO (#488) - Week of year + "yw" + ]; + + #[test] + fn test_known_skeletons_ok() { + for string_skeleton in &SUPPORTED_STRING_SKELETONS { + match Skeleton::try_from(*string_skeleton) { + Ok(_) => {} + Err(err) => { + panic!( + "Unable to parse string_skeleton {:?} with error, {:?}", + string_skeleton, err + ) + } + } + } + } + + #[test] + fn test_unsupported_skeletons_skeletons_err() { + for string_skeleton in &UNSUPPORTED_STRING_SKELETONS { + match Skeleton::try_from(*string_skeleton) { + Ok(_) => { + panic!( + "An unsupported field is now supported, consider moving {:?} to the \ + supported skeletons, and ensure the skeleton is properly implemented.", + string_skeleton + ) + } + Err(err) => match err { + SkeletonError::SymbolUnimplemented(_) => { + // Every skeleton should return this error. + } + _ => panic!("{}", err), + }, + } + } + } + + #[test] + fn test_skeleton_deserialization() { + assert_eq!( + Skeleton::try_from("MMMMdEEEE").unwrap(), + Skeleton( + vec![ + Field { + symbol: Month::Format.into(), + length: FieldLength::Wide + }, + Field { + symbol: Day::DayOfMonth.into(), + length: FieldLength::One + }, + Field { + symbol: Weekday::Format.into(), + length: FieldLength::Wide + }, + ] + .into() + ) + ); + } + + #[test] + fn test_skeleton_tuple_ordering() { + let skeletons_strings = Vec::from([ + "y", "yM", "yMdE", "yMdEEEE", "yMMM", "M", "Md", "Mdd", "MMd", "MMdd", "d", "h", "hm", + "hms", "Hm", "Hms", "ms", "mmss", + ]); + + let skeleton_fields: Vec = skeletons_strings + .iter() + .map(|skeleton_string| Skeleton::try_from(*skeleton_string).unwrap()) + .collect(); + + for (strings, fields) in skeletons_strings.windows(2).zip(skeleton_fields.windows(2)) { + if fields[0].cmp(&fields[1]) != std::cmp::Ordering::Less { + panic!("Expected {:?} < {:?}", strings[0], strings[1]); + } + } + } + + #[test] + fn test_skeleton_json_reordering() { + let unordered_skeleton = "EEEEyMd"; + let ordered_skeleton = "yMdEEEE"; + + // Wrap the string in quotes so it's a JSON string. + let json: String = serde_json::to_string(unordered_skeleton).unwrap(); + + // Wrap the string in quotes so it's a JSON string. + let skeleton = serde_json::from_str::(&json) + .expect("Unable to parse an unordered skeletons."); + + assert_eq!( + serde_json::to_string(&skeleton).unwrap(), + serde_json::to_string(ordered_skeleton).unwrap() + ); + } + + /// This test handles a branch in the skeleton serialization code that takes into account + /// duplicate field errors when deserializing from string. + #[test] + fn test_skeleton_json_duplicate_fields() { + // Wrap the string in quotes so it's a JSON string. + let json: String = serde_json::to_string("EEEEyMdEEEE").unwrap(); + let err = + serde_json::from_str::(&json).expect_err("Expected a duplicate field error."); + + assert_eq!( + format!("{}", err), + "invalid value: \"EEEEyMdEEEE\" duplicate field in skeleton, expected field symbols representing a skeleton at line 1 column 13" + ); + } + + /// Skeletons are represented in bincode as a vec of field, but bincode shouldn't be completely + /// trusted, test that the bincode gets validated correctly. + struct TestInvalidSkeleton(Vec); + + #[cfg(feature = "provider_serde")] + impl Serialize for TestInvalidSkeleton { + fn serialize(&self, serializer: S) -> Result + where + S: ser::Serializer, + { + let fields = &self.0; + let mut seq = serializer.serialize_seq(Some(fields.len()))?; + for item in fields.iter() { + seq.serialize_element(item)?; + } + seq.end() + } + } + + #[test] + fn test_skeleton_bincode_reordering() { + let unordered_skeleton = TestInvalidSkeleton(vec![ + Field::from((FieldSymbol::Day(Day::DayOfMonth), FieldLength::One)), + Field::from((FieldSymbol::Month(Month::Format), FieldLength::One)), + ]); + + let mut buffer: Vec = Vec::new(); + + bincode::serialize_into(&mut buffer, &unordered_skeleton).unwrap(); + + let err = + bincode::deserialize::(&buffer).expect_err("Expected an unordered error"); + + assert_eq!( + format!("{}", err), + "invalid value: field item out of order: Field { symbol: Month(Format), length: One }, expected ordered field symbols representing a skeleton" + ); + } + + #[test] + fn test_skeleton_bincode_duplicate_field() { + let unordered_skeleton = TestInvalidSkeleton(vec![ + Field::from((FieldSymbol::Month(Month::Format), FieldLength::One)), + Field::from((FieldSymbol::Day(Day::DayOfMonth), FieldLength::One)), + Field::from((FieldSymbol::Day(Day::DayOfMonth), FieldLength::One)), + ]); + + let mut buffer: Vec = Vec::new(); + + bincode::serialize_into(&mut buffer, &unordered_skeleton).unwrap(); + + let err = bincode::deserialize::(&buffer) + .expect_err("Expected a duplicate field error"); + + assert_eq!( + format!("{}", err), + "invalid value: duplicate field: Field { symbol: Day(DayOfMonth), length: One }, expected ordered field symbols representing a skeleton" + ); + } +} diff --git a/components/datetime/tests/datetime.rs b/components/datetime/tests/datetime.rs index 8e1fdc7e2b5..f7738f5851c 100644 --- a/components/datetime/tests/datetime.rs +++ b/components/datetime/tests/datetime.rs @@ -21,7 +21,10 @@ use std::{borrow::Cow, fmt::Write}; fn test_fixture(fixture_name: &str) { let provider = icu_testdata::get_provider(); - for fx in fixtures::get_fixture(fixture_name).unwrap().0 { + for fx in fixtures::get_fixture(fixture_name) + .expect("Unable to get fixture.") + .0 + { let locale: Locale = fx.input.locale.parse().unwrap(); let options = fixtures::get_options(&fx.input.options); let dtf = DateTimeFormat::try_new(locale, &provider, &options).unwrap(); @@ -65,7 +68,13 @@ fn test_dayperiod_patterns() { .payload .take() .unwrap(); - *data.to_mut().patterns.date_time.long.to_mut() = String::from("{0}"); + *data + .to_mut() + .patterns + .date_time + .style_patterns + .long + .to_mut() = String::from("{0}"); for test_case in &test.test_cases { for dt_input in &test_case.date_times { let date_time: MockDateTime = dt_input.parse().unwrap(); diff --git a/components/datetime/tests/fixtures/tests/patterns.bin b/components/datetime/tests/fixtures/tests/patterns.bin new file mode 100644 index 00000000000..f808adaa6c6 Binary files /dev/null and b/components/datetime/tests/fixtures/tests/patterns.bin differ diff --git a/components/datetime/tests/fixtures/tests/patterns.json b/components/datetime/tests/fixtures/tests/patterns.json new file mode 100644 index 00000000000..08d65a0b255 --- /dev/null +++ b/components/datetime/tests/fixtures/tests/patterns.json @@ -0,0 +1,36 @@ +{ + "valid_patterns": [ + "d", + "E, M/d/y", + "h:mm:ss a", + "'week' d 'of' MMMM", + "الأسبوع d من MMMM", + "ᏒᎾᏙᏓᏆᏍᏗ’ d ’ᎾᎿ’ MMMM", + "y yy yyy yyyy yyyyy", + "Y YY YYY YYYY YYYYY", + "M MM MMM MMMM MMMMM", + "L LL LLL LLLL LLLLL", + "d dd", + "D DD DDD", + "F", + "g gg ggg gggg ggggg", + "e ee eee eeee eeeee eeeeee", + "c cc ccc cccc ccccc cccccc", + "a aa aaa aaaa aaaaa", + "b bb bbb bbbb bbbbb", + "m mm", + "s ss", + "S SS SSS SSSS SSSSS SSSSS", + "A AA AAA AAAA AAAAA AAAAA" + ], + "invalid_patterns": [ + { + "pattern": "yyyyyyy", + "error": "invalid value: Year(Calendar) field too long in pattern, expected a valid UTS 35 pattern string at line 1 column 9" + }, + { + "pattern": " 'unclosed", + "error": "invalid value: unclosed literal in pattern, expected a valid UTS 35 pattern string at line 1 column 12" + } + ] +} diff --git a/components/datetime/tests/fixtures/tests/skeletons.bin b/components/datetime/tests/fixtures/tests/skeletons.bin new file mode 100644 index 00000000000..a13d16d822a Binary files /dev/null and b/components/datetime/tests/fixtures/tests/skeletons.bin differ diff --git a/components/datetime/tests/fixtures/tests/skeletons.json b/components/datetime/tests/fixtures/tests/skeletons.json new file mode 100644 index 00000000000..4b61ec27452 --- /dev/null +++ b/components/datetime/tests/fixtures/tests/skeletons.json @@ -0,0 +1,22 @@ +{ + "skeletons": [ + "y", + "yM", + "yMdE", + "yMdEEEE", + "yMMM", + "M", + "Md", + "Mdd", + "MMd", + "MMdd", + "d", + "h", + "hm", + "hms", + "Hm", + "Hms", + "ms", + "mmss" + ] +} diff --git a/components/datetime/tests/pattern_serialization.rs b/components/datetime/tests/pattern_serialization.rs new file mode 100644 index 00000000000..aefa70cd644 --- /dev/null +++ b/components/datetime/tests/pattern_serialization.rs @@ -0,0 +1,158 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![cfg(all(test, feature = "provider_serde"))] + +use icu_datetime::pattern::Pattern; +use std::{fs::File, io::BufReader}; + +#[derive(serde::Serialize, serde::Deserialize)] +struct InvalidPatternFixture { + pub pattern: String, + pub error: String, +} + +#[derive(serde::Serialize, serde::Deserialize)] +struct PatternFixtures { + pub valid_patterns: Vec, + pub invalid_patterns: Vec, +} + +fn get_pattern_fixtures() -> PatternFixtures { + let file = File::open("./tests/fixtures/tests/patterns.json".to_string()) + .expect("Unable to open ./tests/fixtures/tests/patterns.json"); + let reader = BufReader::new(file); + serde_json::from_reader(reader).expect("Unable to deserialize pattern fixtures.") +} + +fn get_pattern_strings() -> Vec { + get_pattern_fixtures().valid_patterns +} + +fn get_invalid_pattern_strings() -> Vec { + get_pattern_fixtures().invalid_patterns +} + +fn get_pattern_bincode_write_handle() -> File { + File::create("./tests/fixtures/tests/patterns.bin") + .expect("Unable to create ./tests/fixtures/tests/patterns.bin") +} + +fn get_pattern_bincode_from_file() -> Vec> { + bincode::deserialize_from( + File::open("./tests/fixtures/tests/patterns.bin") + .expect("Unable to ./tests/fixtures/tests/patterns.bin"), + ) + .expect("Unable to deserialize bytes.") +} + +#[test] +fn test_pattern_json_serialization_roundtrip() { + for pattern_string in &get_pattern_strings() { + // Wrap the string in quotes so it's a JSON string. + let json_in: String = serde_json::to_string(pattern_string).unwrap(); + + let pattern: Pattern = match serde_json::from_str(&json_in) { + Ok(p) => p, + Err(err) => { + panic!( + "Unable to parse the pattern {:?}. {:?}", + pattern_string, err + ); + } + }; + + let json_out = match serde_json::to_string(&pattern) { + Ok(s) => s, + Err(err) => { + panic!( + "Unable to re-serialize the pattern {:?}. {:?}", + pattern_string, err + ); + } + }; + + assert_eq!( + json_in, json_out, + "The roundtrip serialization for the pattern matched." + ); + } +} + +/// Bincode representation of patterns need to be stable across time. This test checks the +/// current serialization against historic serialization to ensure that this remains stable. +#[test] +fn test_pattern_bincode_serialization_roundtrip() { + let patterns = get_pattern_strings(); + let update_bincode = std::env::var_os("ICU4X_REGEN_FIXTURE").is_some(); + let mut result_vec = Vec::new(); + let expect_vec = if update_bincode { + None + } else { + Some(get_pattern_bincode_from_file()) + }; + + if let Some(ref expect_vec) = expect_vec { + if expect_vec.len() != patterns.len() { + panic!( + "Expected the bincode to have the same number of entries as the string patterns. \ + The bincode can be re-generated by re-running the test with the environment + variable ICU4X_REGEN_FIXTURE set." + ); + } + } + + for (i, pattern_string) in patterns.iter().enumerate() { + // Wrap the string in quotes so it's a JSON string. + let json_in: String = serde_json::to_string(pattern_string).unwrap(); + + let pattern: Pattern = match serde_json::from_str(&json_in) { + Ok(p) => p, + Err(err) => { + panic!( + "Unable to parse the pattern {:?}. {:?}", + pattern_string, err + ); + } + }; + + let bincode: Vec = bincode::serialize(&pattern).unwrap(); + + if let Some(ref expect_vec) = expect_vec { + if bincode != *expect_vec.get(i).unwrap() { + panic!( + "The bincode representations of the pattern {:?} did not match the stored \ + representation. Patterns are supposed to have stable bincode representations. \ + Something changed to make it different than what it was in the past. If this is \ + expected, then the bincode can be updated by re-running the test with the \ + environment variable ICU4X_REGEN_FIXTURE set.", + json_in + ) + } + } + result_vec.push(bincode); + } + if update_bincode { + eprintln!("Writing the bincode into a file"); + bincode::serialize_into(&mut get_pattern_bincode_write_handle(), &result_vec).unwrap(); + } +} + +/// Test that pattern serialization produces sensible error messages given the Serde +/// serde::de::Unexpected type and the use of fmt::Display traits on the Error objects. +#[test] +fn test_pattern_json_errors() { + for InvalidPatternFixture { pattern, error } in &get_invalid_pattern_strings() { + // Wrap the string in quotes so it's a JSON string. + let json_in: String = serde_json::to_string(pattern).unwrap(); + + // Wrap the string in quotes so it's a JSON string. + match serde_json::from_str::(&json_in) { + Ok(_) => panic!("Expected an invalid pattern. {}", json_in), + Err(serde_err) => { + assert_eq!(format!("{}", serde_err), *error); + } + }; + } +} diff --git a/components/datetime/tests/skeleton_serialization.rs b/components/datetime/tests/skeleton_serialization.rs new file mode 100644 index 00000000000..ef8e8282fb5 --- /dev/null +++ b/components/datetime/tests/skeleton_serialization.rs @@ -0,0 +1,130 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +#![cfg(all(test, feature = "provider_serde"))] + +use icu_datetime::skeleton::Skeleton; +use std::{fs::File, io::BufReader}; + +/// Note that this file tests only valid skeleton cases for the stability of the serialization +/// pipeline. For tests failure cases see the file where skeletons are defined. + +#[derive(serde::Serialize, serde::Deserialize)] +struct SkeletonFixtures { + pub skeletons: Vec, +} + +fn get_skeleton_fixtures() -> Vec { + let file = File::open("./tests/fixtures/tests/skeletons.json".to_string()) + .expect("Unable to open ./tests/fixtures/tests/skeletons.json"); + let reader = BufReader::new(file); + let fixtures: SkeletonFixtures = + serde_json::from_reader(reader).expect("Unable to deserialize skeleton fixtures."); + fixtures.skeletons +} + +fn get_skeleton_bincode_write_handle() -> File { + File::create("./tests/fixtures/tests/skeletons.bin") + .expect("Unable to create ./tests/fixtures/tests/skeletons.bin") +} + +fn get_skeleton_bincode_from_file() -> Vec> { + bincode::deserialize_from( + File::open("./tests/fixtures/tests/skeletons.bin") + .expect("Unable to ./tests/fixtures/tests/skeletons.bin"), + ) + .expect("Unable to deserialize bytes.") +} + +#[test] +fn test_skeleton_json_serialization_roundtrip() { + for skeleton_string in &get_skeleton_fixtures() { + // Wrap the string in quotes so it's a JSON string. + let json_in: String = serde_json::to_string(skeleton_string).unwrap(); + + let skeleton: Skeleton = match serde_json::from_str(&json_in) { + Ok(p) => p, + Err(err) => { + panic!( + "Unable to parse the skeleton {:?}. {:?}", + skeleton_string, err + ); + } + }; + + let json_out = match serde_json::to_string(&skeleton) { + Ok(s) => s, + Err(err) => { + panic!( + "Unable to re-serialize the skeleton {:?}. {:?}", + skeleton_string, err + ); + } + }; + + assert_eq!( + json_in, json_out, + "The roundtrip serialization for the skeleton matched." + ); + } +} + +/// Bincode representation of skeletons need to be stable across time. This test checks the +/// current serialization against historic serialization to ensure that this remains stable. +#[test] +fn test_skeleton_bincode_serialization_roundtrip() { + let skeletons = get_skeleton_fixtures(); + let update_bincode = std::env::var_os("ICU4X_REGEN_FIXTURE").is_some(); + let mut result_vec = Vec::new(); + let expect_vec = if update_bincode { + None + } else { + Some(get_skeleton_bincode_from_file()) + }; + + if let Some(ref expect_vec) = expect_vec { + if expect_vec.len() != skeletons.len() { + panic!( + "Expected the bincode to have the same number of entries as the string skeletons. \ + The bincode can be re-generated by re-running the test with the environment + variable ICU4X_REGEN_FIXTURE set." + ); + } + } + + for (i, skeleton_string) in skeletons.iter().enumerate() { + // Wrap the string in quotes so it's a JSON string. + let json_in: String = serde_json::to_string(skeleton_string).unwrap(); + + let skeleton: Skeleton = match serde_json::from_str(&json_in) { + Ok(p) => p, + Err(err) => { + panic!( + "Unable to parse the skeleton {:?}. {:?}", + skeleton_string, err + ); + } + }; + + let bincode: Vec = bincode::serialize(&skeleton).unwrap(); + + if let Some(ref expect_vec) = expect_vec { + if bincode != *expect_vec.get(i).unwrap() { + panic!( + "The bincode representations of the skeleton {:?} did not match the stored \ + representation. Skeletons are supposed to have stable bincode representations. \ + Something changed to make it different than what it was in the past. If this is \ + expected, then the bincode can be updated by re-running the test with the \ + environment variable ICU4X_REGEN_FIXTURE set.", + json_in + ) + } + } + result_vec.push(bincode); + } + if update_bincode { + eprintln!("Writing the bincode into a file"); + bincode::serialize_into(&mut get_skeleton_bincode_write_handle(), &result_vec).unwrap(); + } +} diff --git a/components/provider_cldr/Cargo.toml b/components/provider_cldr/Cargo.toml index 5049d50e237..99d3ff526e9 100644 --- a/components/provider_cldr/Cargo.toml +++ b/components/provider_cldr/Cargo.toml @@ -38,6 +38,8 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde-tuple-vec-map = "1.0" tinystr = "0.4" +smallvec = "1.4" +litemap = { version = "0.1.1", path = "../../utils/litemap" } # Dependencies for the download feature urlencoding = { version = "1.1", optional = true } diff --git a/components/provider_cldr/src/transform/dates.rs b/components/provider_cldr/src/transform/dates.rs index df8c5d247e5..8e5b14a455e 100644 --- a/components/provider_cldr/src/transform/dates.rs +++ b/components/provider_cldr/src/transform/dates.rs @@ -6,7 +6,7 @@ use crate::cldr_langid::CldrLangID; use crate::error::Error; use crate::reader::{get_subdirectories, open_reader}; use crate::CldrPaths; -use icu_datetime::provider::*; +use icu_datetime::{provider::*, skeleton::SkeletonError}; use icu_provider::iter::{IterableDataProviderCore, KeyedDataProvider}; use icu_provider::prelude::*; use std::borrow::Cow; @@ -119,6 +119,67 @@ impl From<&cldr_json::StylePatterns> for gregory::patterns::StylePatternsV1 { } } +impl From<&cldr_json::DateTimeFormats> for gregory::patterns::DateTimeFormatsV1 { + fn from(other: &cldr_json::DateTimeFormats) -> Self { + use gregory::patterns::{PatternV1, SkeletonV1, SkeletonsV1}; + use litemap::LiteMap; + + // TODO(#308): Support numbering system variations. We currently throw them away. + Self { + style_patterns: gregory::patterns::StylePatternsV1 { + full: other.full.get_pattern().clone(), + long: other.long.get_pattern().clone(), + medium: other.medium.get_pattern().clone(), + short: other.short.get_pattern().clone(), + }, + skeletons: { + let mut skeletons = SkeletonsV1(LiteMap::new()); + + // The CLDR keys for available_formats can have duplicate skeletons with either + // an additional variant, or with multiple variants for different plurals. + for (skeleton_str, pattern_str) in other.available_formats.0.iter() { + let mut unique_skeleton = None; + let mut variant_parts = Vec::new(); + + for part in skeleton_str.split('-') { + match unique_skeleton { + None => { + unique_skeleton = Some(part); + } + Some(_) => variant_parts.push(part), + } + } + + let unique_skeleton = unique_skeleton.expect("Expected to find a skeleton."); + + let skeleton_fields_v1 = match SkeletonV1::try_from(unique_skeleton) { + Ok(s) => s, + Err(err) => match err { + // Ignore unimplemented fields for now. + SkeletonError::SymbolUnimplemented(_) => continue, + _ => panic!("{:?} {}", unique_skeleton, err), + }, + }; + + if !variant_parts.is_empty() { + unimplemented!( + "This skeleton string is not yet supported: {:?}", + skeleton_str + ); + } + + let pattern_v1 = PatternV1::try_from(pattern_str as &str) + .expect("Unable to parse a pattern"); + + skeletons.0.insert(skeleton_fields_v1, pattern_v1); + } + + skeletons + }, + } + } +} + impl From<&cldr_json::Dates> for gregory::DatesV1 { fn from(other: &cldr_json::Dates) -> Self { Self { @@ -378,6 +439,26 @@ pub(self) mod cldr_json { pub short: StylePattern, } + #[derive(PartialEq, Debug, Deserialize)] + pub struct DateTimeFormats { + pub full: StylePattern, + pub long: StylePattern, + pub medium: StylePattern, + pub short: StylePattern, + #[serde(rename = "availableFormats")] + pub available_formats: AvailableFormats, + } + + #[derive(PartialEq, Clone, Debug, Deserialize)] + pub struct AvailableFormats( + #[serde(with = "tuple_vec_map")] pub(crate) Vec<(Cow<'static, str>, Cow<'static, str>)>, + ); + + /// This struct represents a 1:1 mapping of the CLDR ca-gregorian.json data at the key + /// "main.LANGID.dates.calendars.gregorian" where "LANGID" is the identifier. + /// + /// e.g. + /// https://github.com/unicode-org/cldr-json/blob/master/cldr-json/cldr-dates-full/main/en/ca-gregorian.json #[derive(PartialEq, Debug, Deserialize)] pub struct GregoryDates { pub months: months::Contexts, @@ -389,7 +470,7 @@ pub(self) mod cldr_json { #[serde(rename = "timeFormats")] pub time_formats: StylePatterns, #[serde(rename = "dateTimeFormats")] - pub date_time_formats: StylePatterns, + pub date_time_formats: DateTimeFormats, } #[derive(PartialEq, Debug, Deserialize)] diff --git a/resources/testdata/data/json/dates/gregory@1/ar-EG.json b/resources/testdata/data/json/dates/gregory@1/ar-EG.json index ff240b1cf51..f6e6c2460cc 100644 --- a/resources/testdata/data/json/dates/gregory@1/ar-EG.json +++ b/resources/testdata/data/json/dates/gregory@1/ar-EG.json @@ -123,10 +123,46 @@ "short": "h:mm a" }, "date_time": { - "full": "{1} في {0}", - "long": "{1} في {0}", - "medium": "{1}, {0}", - "short": "{1}, {0}" + "style_patterns": { + "full": "{1} في {0}", + "long": "{1} في {0}", + "medium": "{1}, {0}", + "short": "{1}, {0}" + }, + "skeletons": { + "y": "y", + "yM": "M‏/y", + "yMd": "d‏/M‏/y", + "yMdE": "E، d/‏M/‏y", + "yMM": "MM‏/y", + "yMMM": "MMM y", + "yMMMd": "d MMM y", + "yMMMdE": "E، d MMM y", + "yMMMM": "MMMM y", + "M": "L", + "Md": "d/‏M", + "MdE": "E، d/‏M", + "MMdd": "dd‏/MM", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "E، d MMM", + "MMMMd": "d MMMM", + "MMMMdE": "E، d MMMM", + "d": "d", + "dE": "E، d", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/ar.json b/resources/testdata/data/json/dates/gregory@1/ar.json index ff240b1cf51..f6e6c2460cc 100644 --- a/resources/testdata/data/json/dates/gregory@1/ar.json +++ b/resources/testdata/data/json/dates/gregory@1/ar.json @@ -123,10 +123,46 @@ "short": "h:mm a" }, "date_time": { - "full": "{1} في {0}", - "long": "{1} في {0}", - "medium": "{1}, {0}", - "short": "{1}, {0}" + "style_patterns": { + "full": "{1} في {0}", + "long": "{1} في {0}", + "medium": "{1}, {0}", + "short": "{1}, {0}" + }, + "skeletons": { + "y": "y", + "yM": "M‏/y", + "yMd": "d‏/M‏/y", + "yMdE": "E، d/‏M/‏y", + "yMM": "MM‏/y", + "yMMM": "MMM y", + "yMMMd": "d MMM y", + "yMMMdE": "E، d MMM y", + "yMMMM": "MMMM y", + "M": "L", + "Md": "d/‏M", + "MdE": "E، d/‏M", + "MMdd": "dd‏/MM", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "E، d MMM", + "MMMMd": "d MMMM", + "MMMMdE": "E، d MMMM", + "d": "d", + "dE": "E، d", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/bn.json b/resources/testdata/data/json/dates/gregory@1/bn.json index 326d3da0eec..8cd6e12d90f 100644 --- a/resources/testdata/data/json/dates/gregory@1/bn.json +++ b/resources/testdata/data/json/dates/gregory@1/bn.json @@ -133,10 +133,46 @@ "short": "h:mm a" }, "date_time": { - "full": "{1} {0}", - "long": "{1} {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} {0}", + "long": "{1} {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y", + "yM": "M/y", + "yMd": "d/M/y", + "yMdE": "E, d/M/y", + "yMM": "MM-y", + "yMMM": "MMM y", + "yMMMd": "d MMM, y", + "yMMMdE": "E, d MMM, y", + "yMMMM": "MMMM y", + "M": "L", + "Md": "d/M", + "MdE": "E, d-M", + "MMdd": "dd-MM", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "E d MMM", + "MMMMd": "d MMMM", + "MMMMdE": "E d MMMM", + "d": "d", + "dE": "d E", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/ccp.json b/resources/testdata/data/json/dates/gregory@1/ccp.json index dbc2db536da..3b63ab16a67 100644 --- a/resources/testdata/data/json/dates/gregory@1/ccp.json +++ b/resources/testdata/data/json/dates/gregory@1/ccp.json @@ -147,10 +147,46 @@ "short": "h:mm a" }, "date_time": { - "full": "{1} {0}", - "long": "{1} {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} {0}", + "long": "{1} {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y", + "yM": "M/y", + "yMd": "d/M/y", + "yMdE": "E, d/M/y", + "yMM": "MM-y", + "yMMM": "MMM y", + "yMMMd": "d MMM, y", + "yMMMdE": "E, d MMM, y", + "yMMMM": "MMMM y", + "M": "L", + "Md": "d/M", + "MdE": "E, d-M", + "MMdd": "dd-MM", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "E d MMM", + "MMMMd": "d MMMM", + "MMMMdE": "E d MMMM", + "d": "d", + "dE": "d E", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/en-US-posix.json b/resources/testdata/data/json/dates/gregory@1/en-US-posix.json index 9f2d6455431..c623dc2e869 100644 --- a/resources/testdata/data/json/dates/gregory@1/en-US-posix.json +++ b/resources/testdata/data/json/dates/gregory@1/en-US-posix.json @@ -131,10 +131,43 @@ "short": "h:mm a" }, "date_time": { - "full": "{1} 'at' {0}", - "long": "{1} 'at' {0}", - "medium": "{1}, {0}", - "short": "{1}, {0}" + "style_patterns": { + "full": "{1} 'at' {0}", + "long": "{1} 'at' {0}", + "medium": "{1}, {0}", + "short": "{1}, {0}" + }, + "skeletons": { + "y": "y", + "yM": "M/y", + "yMd": "M/d/y", + "yMdE": "E, M/d/y", + "yMMM": "MMM y", + "yMMMd": "MMM d, y", + "yMMMdE": "E, MMM d, y", + "yMMMM": "MMMM y", + "M": "L", + "Md": "M/d", + "MdE": "E, M/d", + "MMM": "LLL", + "MMMd": "MMM d", + "MMMdE": "E, MMM d", + "MMMMd": "MMMM d", + "d": "d", + "dE": "d E", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/en-ZA.json b/resources/testdata/data/json/dates/gregory@1/en-ZA.json index 00092e2f8dd..e252c0509b4 100644 --- a/resources/testdata/data/json/dates/gregory@1/en-ZA.json +++ b/resources/testdata/data/json/dates/gregory@1/en-ZA.json @@ -131,10 +131,44 @@ "short": "HH:mm" }, "date_time": { - "full": "{1} 'at' {0}", - "long": "{1} 'at' {0}", - "medium": "{1}, {0}", - "short": "{1}, {0}" + "style_patterns": { + "full": "{1} 'at' {0}", + "long": "{1} 'at' {0}", + "medium": "{1}, {0}", + "short": "{1}, {0}" + }, + "skeletons": { + "y": "y", + "yM": "MM/y", + "yMd": "y/MM/dd", + "yMdE": "E, y/MM/dd", + "yMMM": "MMM y", + "yMMMd": "dd MMM y", + "yMMMdE": "E, dd MMM y", + "yMMMM": "MMMM y", + "M": "L", + "Md": "MM/dd", + "MdE": "E, MM/dd", + "MMdd": "dd/MM", + "MMM": "LLL", + "MMMd": "dd MMM", + "MMMdE": "E, dd MMM", + "MMMMd": "d MMMM", + "d": "d", + "dE": "E d", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/en.json b/resources/testdata/data/json/dates/gregory@1/en.json index 9f2d6455431..c623dc2e869 100644 --- a/resources/testdata/data/json/dates/gregory@1/en.json +++ b/resources/testdata/data/json/dates/gregory@1/en.json @@ -131,10 +131,43 @@ "short": "h:mm a" }, "date_time": { - "full": "{1} 'at' {0}", - "long": "{1} 'at' {0}", - "medium": "{1}, {0}", - "short": "{1}, {0}" + "style_patterns": { + "full": "{1} 'at' {0}", + "long": "{1} 'at' {0}", + "medium": "{1}, {0}", + "short": "{1}, {0}" + }, + "skeletons": { + "y": "y", + "yM": "M/y", + "yMd": "M/d/y", + "yMdE": "E, M/d/y", + "yMMM": "MMM y", + "yMMMd": "MMM d, y", + "yMMMdE": "E, MMM d, y", + "yMMMM": "MMMM y", + "M": "L", + "Md": "M/d", + "MdE": "E, M/d", + "MMM": "LLL", + "MMMd": "MMM d", + "MMMdE": "E, MMM d", + "MMMMd": "MMMM d", + "d": "d", + "dE": "d E", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/es-AR.json b/resources/testdata/data/json/dates/gregory@1/es-AR.json index 4bd729e48d0..6c8db988304 100644 --- a/resources/testdata/data/json/dates/gregory@1/es-AR.json +++ b/resources/testdata/data/json/dates/gregory@1/es-AR.json @@ -138,10 +138,50 @@ "short": "HH:mm" }, "date_time": { - "full": "{1}, {0}", - "long": "{1}, {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1}, {0}", + "long": "{1}, {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y", + "yM": "M-y", + "yMd": "d/M/y", + "yMdE": "E, d/M/y", + "yMM": "M/y", + "yMMM": "MMM y", + "yMMMd": "d 'de' MMM 'de' y", + "yMMMdE": "E, d MMM y", + "yMMMM": "MMMM 'de' y", + "yMMMMd": "d 'de' MMMM 'de' y", + "yMMMMdE": "EEE, d 'de' MMMM 'de' y", + "M": "L", + "Md": "d/M", + "MdE": "E d-M", + "MMd": "d/M", + "MMdd": "d/M", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "E, d MMM", + "MMMdd": "dd-MMM", + "MMMMd": "d 'de' MMMM", + "MMMMdE": "E, d 'de' MMMM", + "d": "d", + "dE": "E d", + "E": "ccc", + "Ehm": "E, h:mm a", + "Ehms": "E, h:mm:ss a", + "EHm": "E, HH:mm", + "EHms": "E, HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "hh:mm:ss", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/es.json b/resources/testdata/data/json/dates/gregory@1/es.json index 4f65f34304a..0d65e5b83b2 100644 --- a/resources/testdata/data/json/dates/gregory@1/es.json +++ b/resources/testdata/data/json/dates/gregory@1/es.json @@ -137,10 +137,49 @@ "short": "H:mm" }, "date_time": { - "full": "{1}, {0}", - "long": "{1}, {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1}, {0}", + "long": "{1}, {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y", + "yM": "M/y", + "yMd": "d/M/y", + "yMdE": "EEE, d/M/y", + "yMM": "M/y", + "yMMM": "MMM y", + "yMMMd": "d MMM y", + "yMMMdE": "EEE, d MMM y", + "yMMMM": "MMMM 'de' y", + "yMMMMd": "d 'de' MMMM 'de' y", + "yMMMMdE": "EEE, d 'de' MMMM 'de' y", + "M": "L", + "Md": "d/M", + "MdE": "E, d/M", + "MMd": "d/M", + "MMdd": "d/M", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "E, d MMM", + "MMMMd": "d 'de' MMMM", + "MMMMdE": "E, d 'de' MMMM", + "d": "d", + "dE": "E d", + "E": "ccc", + "Ehm": "E, h:mm a", + "Ehms": "E, h:mm:ss a", + "EHm": "E, H:mm", + "EHms": "E, H:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "H", + "Hm": "H:mm", + "Hms": "H:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/fr.json b/resources/testdata/data/json/dates/gregory@1/fr.json index 7460fc17138..ad8a6939d66 100644 --- a/resources/testdata/data/json/dates/gregory@1/fr.json +++ b/resources/testdata/data/json/dates/gregory@1/fr.json @@ -123,10 +123,43 @@ "short": "HH:mm" }, "date_time": { - "full": "{1} 'à' {0}", - "long": "{1} 'à' {0}", - "medium": "{1}, {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} 'à' {0}", + "long": "{1} 'à' {0}", + "medium": "{1}, {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y", + "yM": "MM/y", + "yMd": "dd/MM/y", + "yMdE": "E dd/MM/y", + "yMMM": "MMM y", + "yMMMd": "d MMM y", + "yMMMdE": "E d MMM y", + "yMMMM": "MMMM y", + "M": "L", + "Md": "dd/MM", + "MdE": "E dd/MM", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "E d MMM", + "MMMMd": "d MMMM", + "d": "d", + "dE": "E d", + "E": "E", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH 'h'", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/ja.json b/resources/testdata/data/json/dates/gregory@1/ja.json index e601cccb4a6..a9ab5921761 100644 --- a/resources/testdata/data/json/dates/gregory@1/ja.json +++ b/resources/testdata/data/json/dates/gregory@1/ja.json @@ -123,10 +123,49 @@ "short": "H:mm" }, "date_time": { - "full": "{1} {0}", - "long": "{1} {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} {0}", + "long": "{1} {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y年", + "yM": "y/M", + "yMd": "y/M/d", + "yMdE": "y/M/d(E)", + "yMdEEEE": "y/M/dEEEE", + "yMM": "y/MM", + "yMMM": "y年M月", + "yMMMd": "y年M月d日", + "yMMMdE": "y年M月d日(E)", + "yMMMdEEEE": "y年M月d日EEEE", + "yMMMM": "y年M月", + "M": "M月", + "Md": "M/d", + "MdE": "M/d(E)", + "MdEEEE": "M/dEEEE", + "MMM": "M月", + "MMMd": "M月d日", + "MMMdE": "M月d日(E)", + "MMMdEEEE": "M月d日EEEE", + "MMMMd": "M月d日", + "d": "d日", + "dE": "d日(E)", + "dEEEE": "d日EEEE", + "E": "ccc", + "Ehm": "aK:mm (E)", + "Ehms": "aK:mm:ss (E)", + "EHm": "H:mm (E)", + "EHms": "H:mm:ss (E)", + "h": "aK時", + "hm": "aK:mm", + "hms": "aK:mm:ss", + "H": "H時", + "Hm": "H:mm", + "Hms": "H:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/ru.json b/resources/testdata/data/json/dates/gregory@1/ru.json index 6e68c510889..4cbfca94c63 100644 --- a/resources/testdata/data/json/dates/gregory@1/ru.json +++ b/resources/testdata/data/json/dates/gregory@1/ru.json @@ -164,10 +164,45 @@ "short": "HH:mm" }, "date_time": { - "full": "{1}, {0}", - "long": "{1}, {0}", - "medium": "{1}, {0}", - "short": "{1}, {0}" + "style_patterns": { + "full": "{1}, {0}", + "long": "{1}, {0}", + "medium": "{1}, {0}", + "short": "{1}, {0}" + }, + "skeletons": { + "y": "y", + "yM": "MM.y", + "yMd": "dd.MM.y", + "yMdE": "ccc, dd.MM.y г.", + "yMM": "MM.y", + "yMMM": "LLL y г.", + "yMMMd": "d MMM y г.", + "yMMMdE": "E, d MMM y г.", + "yMMMM": "LLLL y г.", + "M": "L", + "Md": "dd.MM", + "MdE": "E, dd.MM", + "MMdd": "dd.MM", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "ccc, d MMM", + "MMMMd": "d MMMM", + "d": "d", + "dE": "ccc, d", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/sr-Cyrl.json b/resources/testdata/data/json/dates/gregory@1/sr-Cyrl.json index 739d33f3a25..6bcc921548b 100644 --- a/resources/testdata/data/json/dates/gregory@1/sr-Cyrl.json +++ b/resources/testdata/data/json/dates/gregory@1/sr-Cyrl.json @@ -131,10 +131,48 @@ "short": "HH:mm" }, "date_time": { - "full": "{1} {0}", - "long": "{1} {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} {0}", + "long": "{1} {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y.", + "yM": "M.y.", + "yMd": "d.M.y.", + "yMdE": "E, d.M.y.", + "yMM": "MM.y.", + "yMMdd": "dd.MM.y.", + "yMMM": "MMM y.", + "yMMMd": "d. MMM y.", + "yMMMdE": "E, d. MMM y.", + "yMMMM": "MMMM y.", + "M": "L", + "Md": "d.M.", + "MdE": "E, d.M.", + "MMdd": "dd.MM.", + "MMM": "LLL", + "MMMd": "d. MMM", + "MMMdE": "E d. MMM", + "MMMdd": "dd.MMM", + "MMMMd": "d. MMMM", + "MMMMdE": "E, d. MMMM", + "d": "d", + "dE": "E d.", + "E": "E", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/sr-Latn.json b/resources/testdata/data/json/dates/gregory@1/sr-Latn.json index 7d24081d047..9762de52d80 100644 --- a/resources/testdata/data/json/dates/gregory@1/sr-Latn.json +++ b/resources/testdata/data/json/dates/gregory@1/sr-Latn.json @@ -131,10 +131,48 @@ "short": "HH:mm" }, "date_time": { - "full": "{1} {0}", - "long": "{1} {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} {0}", + "long": "{1} {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y.", + "yM": "M.y.", + "yMd": "d.M.y.", + "yMdE": "E, d.M.y.", + "yMM": "MM.y.", + "yMMdd": "dd.MM.y.", + "yMMM": "MMM y.", + "yMMMd": "d. MMM y.", + "yMMMdE": "E, d. MMM y.", + "yMMMM": "MMMM y.", + "M": "L", + "Md": "d.M.", + "MdE": "E, d.M.", + "MMdd": "dd.MM.", + "MMM": "LLL", + "MMMd": "d. MMM", + "MMMdE": "E d. MMM", + "MMMdd": "dd.MMM", + "MMMMd": "d. MMMM", + "MMMMdE": "E, d. MMMM", + "d": "d", + "dE": "E d.", + "E": "E", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/sr.json b/resources/testdata/data/json/dates/gregory@1/sr.json index 739d33f3a25..6bcc921548b 100644 --- a/resources/testdata/data/json/dates/gregory@1/sr.json +++ b/resources/testdata/data/json/dates/gregory@1/sr.json @@ -131,10 +131,48 @@ "short": "HH:mm" }, "date_time": { - "full": "{1} {0}", - "long": "{1} {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} {0}", + "long": "{1} {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y.", + "yM": "M.y.", + "yMd": "d.M.y.", + "yMdE": "E, d.M.y.", + "yMM": "MM.y.", + "yMMdd": "dd.MM.y.", + "yMMM": "MMM y.", + "yMMMd": "d. MMM y.", + "yMMMdE": "E, d. MMM y.", + "yMMMM": "MMMM y.", + "M": "L", + "Md": "d.M.", + "MdE": "E, d.M.", + "MMdd": "dd.MM.", + "MMM": "LLL", + "MMMd": "d. MMM", + "MMMdE": "E d. MMM", + "MMMdd": "dd.MMM", + "MMMMd": "d. MMMM", + "MMMMdE": "E, d. MMMM", + "d": "d", + "dE": "E d.", + "E": "E", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/th.json b/resources/testdata/data/json/dates/gregory@1/th.json index 1e9c5c2267f..7213ca7e582 100644 --- a/resources/testdata/data/json/dates/gregory@1/th.json +++ b/resources/testdata/data/json/dates/gregory@1/th.json @@ -131,10 +131,51 @@ "short": "HH:mm" }, "date_time": { - "full": "{1} {0}", - "long": "{1} {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} {0}", + "long": "{1} {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y", + "yM": "M/y", + "yMd": "d/M/y", + "yMdE": "E d/M/y", + "yMMM": "MMM y", + "yMMMd": "d MMM y", + "yMMMdE": "E d MMM y", + "yMMMdEEEE": "EEEEที่ d MMM y", + "yMMMM": "MMMM 'G' y", + "yMMMMd": "d MMMM 'G' y", + "yMMMMdE": "E d MMMM 'G' y", + "yMMMMdEEEE": "EEEEที่ d MMMM 'G' y", + "M": "L", + "Md": "d/M", + "MdE": "E d/M", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "E d MMM", + "MMMdEEEE": "EEEEที่ d MMM", + "MMMMd": "d MMMM", + "MMMMdE": "E d MMMM", + "MMMMdEEEE": "EEEEที่ d MMMM", + "d": "d", + "dE": "E d", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm น.", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm น.", + "Hms": "HH:mm:ss", + "ms": "mm:ss", + "mmss": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/tr.json b/resources/testdata/data/json/dates/gregory@1/tr.json index 7751f9f6ee2..029e897a0a5 100644 --- a/resources/testdata/data/json/dates/gregory@1/tr.json +++ b/resources/testdata/data/json/dates/gregory@1/tr.json @@ -131,10 +131,46 @@ "short": "HH:mm" }, "date_time": { - "full": "{1} {0}", - "long": "{1} {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} {0}", + "long": "{1} {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y", + "yM": "MM/y", + "yMd": "dd.MM.y", + "yMdE": "d.M.y E", + "yMM": "MM.y", + "yMMM": "MMM y", + "yMMMd": "d MMM y", + "yMMMdE": "d MMM y E", + "yMMMM": "MMMM y", + "M": "L", + "Md": "d/M", + "MdE": "d/MM E", + "MMM": "LLL", + "MMMd": "d MMM", + "MMMdE": "d MMMM E", + "MMMMd": "d MMMM", + "MMMMdE": "d MMMM E", + "d": "d", + "dE": "d E", + "E": "ccc", + "Ehm": "E a h:mm", + "Ehms": "E a h:mm:ss", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "a h", + "hm": "a h:mm", + "hms": "a h:mm:ss", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss", + "mmss": "mm:ss" + } } } } diff --git a/resources/testdata/data/json/dates/gregory@1/und.json b/resources/testdata/data/json/dates/gregory@1/und.json index 00074b72a6b..4cfcddd7475 100644 --- a/resources/testdata/data/json/dates/gregory@1/und.json +++ b/resources/testdata/data/json/dates/gregory@1/und.json @@ -117,10 +117,43 @@ "short": "HH:mm" }, "date_time": { - "full": "{1} {0}", - "long": "{1} {0}", - "medium": "{1} {0}", - "short": "{1} {0}" + "style_patterns": { + "full": "{1} {0}", + "long": "{1} {0}", + "medium": "{1} {0}", + "short": "{1} {0}" + }, + "skeletons": { + "y": "y", + "yM": "y-MM", + "yMd": "y-MM-dd", + "yMdE": "y-MM-dd, E", + "yMMM": "y MMM", + "yMMMd": "y MMM d", + "yMMMdE": "y MMM d, E", + "yMMMM": "y MMMM", + "M": "L", + "Md": "MM-dd", + "MdE": "MM-dd, E", + "MMM": "LLL", + "MMMd": "MMM d", + "MMMdE": "MMM d, E", + "MMMMd": "MMMM d", + "d": "d", + "dE": "d, E", + "E": "ccc", + "Ehm": "E h:mm a", + "Ehms": "E h:mm:ss a", + "EHm": "E HH:mm", + "EHms": "E HH:mm:ss", + "h": "h a", + "hm": "h:mm a", + "hms": "h:mm:ss a", + "H": "HH", + "Hm": "HH:mm", + "Hms": "HH:mm:ss", + "ms": "mm:ss" + } } } }