Skip to content

Commit

Permalink
Change pattern representation of fractional seconds; remove obsolete …
Browse files Browse the repository at this point in the history
…FieldLength variants (#5392)

#1317

This makes it such that adding or removing fractional second digits
involves changing one FieldSymbol, rather than going and mutating the
choice of fields in the pattern. Simpler and less error-prone at
runtime.

The parsing of "ss.SSS" is moved into the parser, where it belongs,
rather than handling it at format time.
  • Loading branch information
sffc committed Aug 22, 2024
1 parent bb91fb3 commit 12a5038
Show file tree
Hide file tree
Showing 19 changed files with 507 additions and 433 deletions.
67 changes: 0 additions & 67 deletions components/datetime/src/fields/length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,8 @@ pub enum FieldLength {
/// [LDML documentation in UTS 35](https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns)
/// for more details.
Six,
/// A fixed size format for numeric-only fields that is at most 127 digits.
Fixed(u8),
/// FieldLength::One (numeric), but overridden with a different numbering system
NumericOverride(FieldNumericOverrides),
/// A time zone field with non-standard rules.
TimeZoneFallbackOverride(TimeZoneFallbackOverride),
}

/// First index used for numeric overrides in compact FieldLength representation
Expand All @@ -65,12 +61,6 @@ pub enum FieldLength {
const FIRST_NUMERIC_OVERRIDE: u8 = 17;
/// Last index used for numeric overrides
const LAST_NUMERIC_OVERRIDE: u8 = 31;
/// First index used for time zone fallback overrides
const FIRST_TIME_ZONE_FALLBACK_OVERRIDE: u8 = 32;
/// Last index used for time zone fallback overrides
const LAST_TIME_ZONE_FALLBACK_OVERRIDE: u8 = 40;
/// First index used for fixed size formats in compact FieldLength representation
const FIRST_FIXED: u8 = 128;

impl FieldLength {
#[inline]
Expand All @@ -85,10 +75,6 @@ impl FieldLength {
FieldLength::NumericOverride(o) => FIRST_NUMERIC_OVERRIDE
.saturating_add(*o as u8)
.min(LAST_NUMERIC_OVERRIDE),
FieldLength::TimeZoneFallbackOverride(o) => FIRST_TIME_ZONE_FALLBACK_OVERRIDE
.saturating_add(*o as u8)
.min(LAST_TIME_ZONE_FALLBACK_OVERRIDE),
FieldLength::Fixed(p) => FIRST_FIXED.saturating_add(*p), /* truncate to at most 127 digits to avoid overflow */
}
}

Expand All @@ -104,14 +90,6 @@ impl FieldLength {
idx if (FIRST_NUMERIC_OVERRIDE..=LAST_NUMERIC_OVERRIDE).contains(&idx) => {
Self::NumericOverride((idx - FIRST_NUMERIC_OVERRIDE).try_into()?)
}
idx if (FIRST_TIME_ZONE_FALLBACK_OVERRIDE..=LAST_TIME_ZONE_FALLBACK_OVERRIDE)
.contains(&idx) =>
{
Self::TimeZoneFallbackOverride(
(idx - FIRST_TIME_ZONE_FALLBACK_OVERRIDE).try_into()?,
)
}
idx if idx >= FIRST_FIXED => Self::Fixed(idx - FIRST_FIXED),
_ => return Err(LengthError::InvalidLength),
})
}
Expand All @@ -127,10 +105,6 @@ impl FieldLength {
FieldLength::Narrow => 5,
FieldLength::Six => 6,
FieldLength::NumericOverride(o) => FIRST_NUMERIC_OVERRIDE as usize + o as usize,
FieldLength::TimeZoneFallbackOverride(o) => {
FIRST_TIME_ZONE_FALLBACK_OVERRIDE as usize + o as usize
}
FieldLength::Fixed(p) => p as usize,
}
}

Expand Down Expand Up @@ -244,44 +218,3 @@ impl fmt::Display for FieldNumericOverrides {
self.as_str().fmt(f)
}
}

/// Time zone fallback overrides to support configurations not found
/// in the CLDR datetime field symbol table.
#[derive(Debug, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_datetime::fields),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[non_exhaustive]
pub enum TimeZoneFallbackOverride {
/// The short form of this time zone field,
/// but fall back directly to GMT.
ShortOrGmt = 0,
}

impl TryFrom<u8> for TimeZoneFallbackOverride {
type Error = LengthError;
fn try_from(other: u8) -> Result<Self, LengthError> {
Ok(match other {
0 => Self::ShortOrGmt,
_ => return Err(LengthError::InvalidLength),
})
}
}

// impl TimeZoneFallbackOverride {
// /// Convert this to the corresponding string code
// pub fn as_str(self) -> &'static str {
// match self {
// Self::ShortOrGmt => "ShortOrGmt",
// }
// }
// }

// impl fmt::Display for TimeZoneFallbackOverride {
// fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
// self.as_str().fmt(f)
// }
// }
13 changes: 4 additions & 9 deletions components/datetime/src/fields/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mod length;
pub(crate) mod symbols;

use displaydoc::Display;
pub use length::{FieldLength, FieldNumericOverrides, LengthError, TimeZoneFallbackOverride};
pub use length::{FieldLength, FieldNumericOverrides, LengthError};
pub use symbols::*;

use core::{
Expand Down Expand Up @@ -69,6 +69,7 @@ impl Field {
FieldSymbol::Minute => TextOrNumeric::Numeric,
FieldSymbol::Second(second) => second.get_length_type(self.length),
FieldSymbol::TimeZone(zone) => zone.get_length_type(self.length),
FieldSymbol::DecimalSecond(_) => TextOrNumeric::Numeric,
}
}
}
Expand All @@ -94,14 +95,8 @@ impl From<(FieldSymbol, FieldLength)> for Field {
impl TryFrom<(FieldSymbol, usize)> for Field {
type Error = Error;
fn try_from(input: (FieldSymbol, usize)) -> Result<Self, Self::Error> {
let length = if input.0 != FieldSymbol::Second(crate::fields::Second::FractionalSecond) {
FieldLength::from_idx(input.1 as u8).map_err(|_| Self::Error::InvalidLength(input.0))?
} else if input.1 <= 127 {
FieldLength::from_idx(128 + input.1 as u8)
.map_err(|_| Self::Error::InvalidLength(input.0))?
} else {
return Err(Self::Error::InvalidLength(input.0));
};
let length = FieldLength::from_idx(input.1 as u8)
.map_err(|_| Self::Error::InvalidLength(input.0))?;
Ok(Self {
symbol: input.0,
length,
Expand Down
132 changes: 109 additions & 23 deletions components/datetime/src/fields/symbols.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#[cfg(any(feature = "datagen", feature = "experimental"))]
use crate::fields::FieldLength;
#[cfg(any(feature = "datagen", feature = "experimental"))]
use crate::neo_skeleton::FractionalSecondDigits;
use core::{cmp::Ordering, convert::TryFrom};
use displaydoc::Display;
use icu_provider::prelude::*;
Expand Down Expand Up @@ -55,10 +57,13 @@ pub enum FieldSymbol {
Hour(Hour),
/// Minute number within an hour.
Minute,
/// Seconds number within a minute, including fractional seconds, or milliseconds within a day.
/// Seconds integer within a minute or milliseconds within a day.
Second(Second),
/// Time zone as a name, a zone ID, or a ISO 8601 numerical offset.
TimeZone(TimeZone),
/// Seconds with fractional digits. If seconds are an integer,
/// [`FieldSymbol::Second`] is used.
DecimalSecond(DecimalSecond),
}

impl FieldSymbol {
Expand Down Expand Up @@ -110,6 +115,7 @@ impl FieldSymbol {
FieldSymbol::Minute => (8, 0),
FieldSymbol::Second(second) => (9, second.idx()),
FieldSymbol::TimeZone(tz) => (10, tz.idx()),
FieldSymbol::DecimalSecond(second) => (11, second.idx()),
};
let result = high << 4;
result | low
Expand All @@ -134,13 +140,14 @@ impl FieldSymbol {
8 if low == 0 => Self::Minute,
9 => Self::Second(Second::from_idx(low)?),
10 => Self::TimeZone(TimeZone::from_idx(low)?),
11 => Self::DecimalSecond(DecimalSecond::from_idx(low)?),
_ => return Err(SymbolError::InvalidIndex(idx)),
})
}

/// Returns the index associated with this FieldSymbol.
#[cfg(any(feature = "datagen", feature = "experimental"))] // only referenced in experimental code
fn discriminant_idx(&self) -> u8 {
fn idx_for_skeleton(&self) -> u8 {
match self {
FieldSymbol::Era => 0,
FieldSymbol::Year(_) => 1,
Expand All @@ -151,16 +158,37 @@ impl FieldSymbol {
FieldSymbol::DayPeriod(_) => 6,
FieldSymbol::Hour(_) => 7,
FieldSymbol::Minute => 8,
FieldSymbol::Second(_) => 9,
FieldSymbol::Second(_) | FieldSymbol::DecimalSecond(_) => 9,
FieldSymbol::TimeZone(_) => 10,
}
}

/// Compares this enum with other solely based on the enum variant,
/// ignoring the enum's data.
///
/// Second and DecimalSecond are considered equal.
#[cfg(any(feature = "datagen", feature = "experimental"))] // only referenced in experimental code
pub(crate) fn discriminant_cmp(&self, other: &Self) -> Ordering {
self.discriminant_idx().cmp(&other.discriminant_idx())
pub(crate) fn skeleton_cmp(&self, other: &Self) -> Ordering {
self.idx_for_skeleton().cmp(&other.idx_for_skeleton())
}

#[cfg(any(feature = "datagen", feature = "experimental"))]
pub(crate) fn from_fractional_second_digits(
fractional_second_digits: FractionalSecondDigits,
) -> Self {
use FractionalSecondDigits::*;
match fractional_second_digits {
F0 => FieldSymbol::Second(Second::Second),
F1 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF1),
F2 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF2),
F3 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF3),
F4 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF4),
F5 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF5),
F6 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF6),
F7 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF7),
F8 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF8),
F9 => FieldSymbol::DecimalSecond(DecimalSecond::SecondF9),
}
}

/// UTS 35 defines several 1 and 2 symbols to be the same as 3 symbols (abbreviated).
Expand Down Expand Up @@ -273,15 +301,23 @@ impl FieldSymbol {
Self::Hour(Hour::H24) => 21,
Self::Minute => 22,
Self::Second(Second::Second) => 23,
Self::Second(Second::FractionalSecond) => 24,
Self::Second(Second::Millisecond) => 25,
Self::TimeZone(TimeZone::LowerZ) => 26,
Self::TimeZone(TimeZone::UpperZ) => 27,
Self::TimeZone(TimeZone::UpperO) => 28,
Self::TimeZone(TimeZone::LowerV) => 29,
Self::TimeZone(TimeZone::UpperV) => 30,
Self::TimeZone(TimeZone::LowerX) => 31,
Self::TimeZone(TimeZone::UpperX) => 32,
Self::Second(Second::Millisecond) => 24,
Self::DecimalSecond(DecimalSecond::SecondF1) => 31,
Self::DecimalSecond(DecimalSecond::SecondF2) => 32,
Self::DecimalSecond(DecimalSecond::SecondF3) => 33,
Self::DecimalSecond(DecimalSecond::SecondF4) => 34,
Self::DecimalSecond(DecimalSecond::SecondF5) => 35,
Self::DecimalSecond(DecimalSecond::SecondF6) => 36,
Self::DecimalSecond(DecimalSecond::SecondF7) => 37,
Self::DecimalSecond(DecimalSecond::SecondF8) => 38,
Self::DecimalSecond(DecimalSecond::SecondF9) => 39,
Self::TimeZone(TimeZone::LowerZ) => 100,
Self::TimeZone(TimeZone::UpperZ) => 101,
Self::TimeZone(TimeZone::UpperO) => 102,
Self::TimeZone(TimeZone::LowerV) => 103,
Self::TimeZone(TimeZone::UpperV) => 104,
Self::TimeZone(TimeZone::LowerX) => 105,
Self::TimeZone(TimeZone::UpperX) => 106,
}
}
}
Expand Down Expand Up @@ -314,6 +350,7 @@ impl TryFrom<char> for FieldSymbol {
})
.or_else(|_| Second::try_from(ch).map(Self::Second))
.or_else(|_| TimeZone::try_from(ch).map(Self::TimeZone))
// Note: char-to-enum conversion for DecimalSecond is handled directly in the parser
}
}

Expand All @@ -331,6 +368,8 @@ impl From<FieldSymbol> for char {
FieldSymbol::Minute => 'm',
FieldSymbol::Second(second) => second.into(),
FieldSymbol::TimeZone(time_zone) => time_zone.into(),
// Note: This is only used for representing the integer portion
FieldSymbol::DecimalSecond(_) => 's',
}
}
}
Expand Down Expand Up @@ -509,10 +548,6 @@ impl LengthType for Month {
FieldLength::Wide => TextOrNumeric::Text,
FieldLength::Narrow => TextOrNumeric::Text,
FieldLength::Six => TextOrNumeric::Text,
FieldLength::Fixed(_) | FieldLength::TimeZoneFallbackOverride(_) => {
debug_assert!(false, "Invalid field length for month");
TextOrNumeric::Text
}
}
}
}
Expand Down Expand Up @@ -553,19 +588,18 @@ field_type!(
HourULE
);

// NOTE: 'S' FractionalSecond is represented via DecimalSecond,
// so it is not included in the Second enum.

field_type!(
/// An enum for the possible symbols of a second field in a date pattern.
Second; {
/// Field symbol for second (numeric).
's' => Second = 0,
/// Field symbol for fractional second (numeric).
///
/// Produces the number of digits specified by the field length.
'S' => FractionalSecond = 1,
/// Field symbol for milliseconds in day (numeric).
///
/// This field behaves exactly like a composite of all time-related fields, not including the zone fields.
'A' => Millisecond = 2,
'A' => Millisecond = 1,
};
Numeric;
SecondULE
Expand Down Expand Up @@ -697,3 +731,55 @@ impl LengthType for TimeZone {
}
}
}

/// A second field with fractional digits.
#[derive(
Debug, Eq, PartialEq, Ord, PartialOrd, Clone, Copy, yoke::Yokeable, zerofrom::ZeroFrom,
)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize, databake::Bake),
databake(path = icu_datetime::fields),
)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[allow(clippy::enum_variant_names)]
#[repr(u8)]
#[zerovec::make_ule(DecimalSecondULE)]
#[zerovec::derive(Debug)]
#[allow(clippy::exhaustive_enums)] // used in data struct
pub enum DecimalSecond {
/// A second with 1 fractional digit: "1.0"
SecondF1 = 1,
/// A second with 2 fractional digits: "1.00"
SecondF2 = 2,
/// A second with 3 fractional digits: "1.000"
SecondF3 = 3,
/// A second with 4 fractional digits: "1.0000"
SecondF4 = 4,
/// A second with 5 fractional digits: "1.00000"
SecondF5 = 5,
/// A second with 6 fractional digits: "1.000000"
SecondF6 = 6,
/// A second with 7 fractional digits: "1.0000000"
SecondF7 = 7,
/// A second with 8 fractional digits: "1.00000000"
SecondF8 = 8,
/// A second with 9 fractional digits: "1.000000000"
SecondF9 = 9,
}

impl DecimalSecond {
#[inline]
pub(crate) fn idx(self) -> u8 {
self as u8
}
#[inline]
pub(crate) fn from_idx(idx: u8) -> Result<Self, SymbolError> {
Self::new_from_u8(idx).ok_or(SymbolError::InvalidIndex(idx))
}
}
impl From<DecimalSecond> for FieldSymbol {
fn from(input: DecimalSecond) -> Self {
Self::DecimalSecond(input)
}
}
Loading

0 comments on commit 12a5038

Please sign in to comment.