Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Jun 28, 2022
2 parents 409ce0e + 7599633 commit e6236ff
Show file tree
Hide file tree
Showing 23 changed files with 409 additions and 83 deletions.
2 changes: 1 addition & 1 deletion CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ experimental/codepointtrie/ @echeran
experimental/collator/ @hsivonen @echeran
experimental/normalizer/ @hsivonen @echeran
experimental/provider_ppucd/ @echeran
experimental/segmenter/ @aethanyc @makotokato
experimental/segmenter/ @aethanyc @makotokato @sffc
ffi/capi/ @Manishearth
ffi/cpp/ @Manishearth
ffi/ecma402/ @filmil
Expand Down
1 change: 1 addition & 0 deletions components/icu/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
clippy::panic
)
)]
#![warn(missing_docs)]

pub mod calendar {
//! Contains the core types used by ICU4X for dealing
Expand Down
3 changes: 1 addition & 2 deletions components/list/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

#![warn(missing_docs)]

//! [`icu_list`](crate) provides the [`ListFormatter`] which renders sequences of [`Writeable`](
//! writeable::Writeable)s as lists in a locale-sensitive way.
//!
Expand Down Expand Up @@ -60,6 +58,7 @@
clippy::exhaustive_enums
)
)]
#![warn(missing_docs)]

extern crate alloc;

Expand Down
10 changes: 7 additions & 3 deletions components/locid/src/extensions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ pub enum ExtensionType {
}

impl ExtensionType {
#[allow(missing_docs)] // TODO(#1028) - Add missing docs.
pub fn from_byte(key: u8) -> Result<Self, ParserError> {
pub(crate) fn from_byte(key: u8) -> Result<Self, ParserError> {
let key = key.to_ascii_lowercase();
match key {
b'u' => Ok(Self::Unicode),
Expand All @@ -88,12 +87,17 @@ impl ExtensionType {

/// A map of extensions associated with a given [`Locale`](crate::Locale).
#[derive(Debug, Default, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
#[allow(missing_docs)] // TODO(#1028) - Add missing docs.
#[non_exhaustive]
pub struct Extensions {
/// A representation of the data for a Unicode extension, when present in the locale identifer.
pub unicode: Unicode,
/// A representation of the data for a transform extension, when present in the locale identifer.
pub transform: Transform,
/// A representation of the data for a private-use extension, when present in the locale identifer.
pub private: Private,
/// A sequence of any other extensions that are present in the locale identifier but are not formally
/// [defined](https://unicode.org/reports/tr35/) and represented explicitly as [`Unicode`], [`Transform`],
/// and [`Private`] are.
pub other: Vec<Other>,
}

Expand Down
2 changes: 1 addition & 1 deletion components/locid/src/extensions/other/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ const KEY_LENGTH: RangeInclusive<usize> = 2..=8;

impl Key {
#[allow(missing_docs)] // TODO(#1028) - Add missing docs.
pub fn valid_key(v: &[u8]) -> bool {
pub(crate) fn valid_key(v: &[u8]) -> bool {
KEY_LENGTH.contains(&v.len())
}

Expand Down
4 changes: 3 additions & 1 deletion components/locid/src/extensions/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,12 @@ use litemap::LiteMap;
/// [`RFC 6497`]: https://www.ietf.org/rfc/rfc6497.txt
/// [`Unicode Locale Identifier`]: https://unicode.org/reports/tr35/#Unicode_locale_identifier
#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
#[allow(missing_docs)] // TODO(#1028) - Add missing docs.
#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
pub struct Transform {
/// The [`LanguageIdentifier`] specified with this locale extension, or `None` if not present.
pub lang: Option<LanguageIdentifier>,
/// The key-value pairs present in this locale extension, with each extension key subtag
/// associated to its provided value subtag.
pub fields: Fields,
}

Expand Down
13 changes: 6 additions & 7 deletions components/locid/src/extensions/transform/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,6 @@ use core::ops::RangeInclusive;
use core::str::FromStr;
use tinystr::TinyAsciiStr;

#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
#[allow(missing_docs)] // TODO(#1028) - Add missing docs.
pub struct Value(Vec<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>);

const TYPE_LENGTH: RangeInclusive<usize> = 3..=8;
const TRUE_TVALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");

/// A value used in a list of [`Fields`](super::Fields).
///
/// The value has to be a sequence of one or more alphanumerical strings
Expand All @@ -35,6 +28,12 @@ const TRUE_TVALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");
/// assert_eq!(&value1.to_string(), "hybrid");
/// assert_eq!(&value2.to_string(), "hybrid-foobar");
/// ```
#[derive(Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
pub struct Value(Vec<TinyAsciiStr<{ *TYPE_LENGTH.end() }>>);

const TYPE_LENGTH: RangeInclusive<usize> = 3..=8;
const TRUE_TVALUE: TinyAsciiStr<8> = tinystr::tinystr!(8, "true");

impl Value {
/// A constructor which takes a utf8 slice, parses it and
/// produces a well-formed [`Value`].
Expand Down
92 changes: 92 additions & 0 deletions components/locid/src/extensions/unicode/keywords.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use core::borrow::Borrow;
use core::cmp::Ordering;
use core::iter::FromIterator;
use litemap::LiteMap;

use super::Key;
use super::Value;
use crate::ordering::SubtagOrderingResult;

/// A list of [`Key`]-[`Value`] pairs representing functional information
/// about locale's internationnalization preferences.
Expand Down Expand Up @@ -270,6 +272,96 @@ impl Keywords {
self.0.retain(|k, _| predicate(k))
}

/// Compare this [`Keywords`] with BCP-47 bytes.
///
/// The return value is equivalent to what would happen if you first converted this
/// [`Keywords`] to a BCP-47 string and then performed a byte comparison.
///
/// This function is case-sensitive and results in a *total order*, so it is appropriate for
/// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
///
/// # Examples
///
/// ```
/// use icu::locid::Locale;
/// use icu::locid::extensions::unicode::Keywords;
/// use std::cmp::Ordering;
///
/// let bcp47_strings: &[&str] = &[
/// "ca-hebrew",
/// "ca-japanese",
/// "ca-japanese-nu-latn",
/// "nu-latn",
/// ];
///
/// for ab in bcp47_strings.windows(2) {
/// let a = ab[0];
/// let b = ab[1];
/// assert!(a.cmp(b) == Ordering::Less);
/// let a_kwds = format!("und-u-{}", a).parse::<Locale>().unwrap().extensions.unicode.keywords;
/// assert_eq!(a, a_kwds.to_string());
/// assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal);
/// assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less);
/// }
/// ```
pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
}

/// Compare this [`Keywords`] with an iterator of BCP-47 subtags.
///
/// This function has the same equality semantics as [`Keywords::strict_cmp`]. It is intended as
/// a more modular version that allows multiple subtag iterators to be chained together.
///
/// For an additional example, see [`SubtagOrderingResult`].
///
/// # Examples
///
/// ```
/// use icu::locid::Locale;
/// use icu::locid::extensions::unicode::Keywords;
/// use std::cmp::Ordering;
///
/// let subtags: &[&[u8]] = &[&*b"ca", &*b"buddhist"];
///
/// let kwds = "und-u-ca-buddhist".parse::<Locale>().unwrap().extensions.unicode.keywords;
/// assert_eq!(
/// Ordering::Equal,
/// kwds.strict_cmp_iter(subtags.iter().copied()).end()
/// );
///
/// let kwds = "und".parse::<Locale>().unwrap().extensions.unicode.keywords;
/// assert_eq!(
/// Ordering::Less,
/// kwds.strict_cmp_iter(subtags.iter().copied()).end()
/// );
///
/// let kwds = "und-u-nu-latn".parse::<Locale>().unwrap().extensions.unicode.keywords;
/// assert_eq!(
/// Ordering::Greater,
/// kwds.strict_cmp_iter(subtags.iter().copied()).end()
/// );
/// ```
pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
where
I: Iterator<Item = &'l [u8]>,
{
let r = self.for_each_subtag_str(&mut |subtag| {
if let Some(other) = subtags.next() {
match subtag.as_bytes().cmp(other) {
Ordering::Equal => Ok(()),
not_equal => Err(not_equal),
}
} else {
Err(Ordering::Greater)
}
});
match r {
Ok(_) => SubtagOrderingResult::Subtags(subtags),
Err(o) => SubtagOrderingResult::Ordering(o),
}
}

pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
where
F: FnMut(&str) -> Result<(), E>,
Expand Down
4 changes: 3 additions & 1 deletion components/locid/src/extensions/unicode/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,12 @@ use litemap::LiteMap;
/// assert_eq!(loc.extensions.unicode.keywords.get(&key), Some(&value));
/// ```
#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
#[allow(missing_docs)] // TODO(#1028) - Add missing docs.
#[allow(clippy::exhaustive_structs)] // spec-backed stable datastructure
pub struct Unicode {
/// The key-value pairs present in this locale extension, with each extension key subtag
/// associated to its provided value subtag.
pub keywords: Keywords,
/// A canonically ordered sequence of single standalone subtags for this locale extension.
pub attributes: Attributes,
}

Expand Down
80 changes: 60 additions & 20 deletions components/locid/src/langid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use core::cmp::Ordering;
use core::str::FromStr;

use crate::ordering::SubtagOrderingResult;
use crate::parser::{
get_subtag_iterator, parse_language_identifier, parse_language_identifier_without_variants,
ParserError, ParserMode,
Expand Down Expand Up @@ -159,10 +160,10 @@ impl LanguageIdentifier {
Ok(lang_id.to_string())
}

/// Compare this `LanguageIdentifier` with BCP-47 bytes.
/// Compare this [`LanguageIdentifier`] with BCP-47 bytes.
///
/// The return value is equivalent to what would happen if you first converted this
/// `LanguageIdentifier` to a BCP-47 string and then performed a byte comparison.
/// [`LanguageIdentifier`] to a BCP-47 string and then performed a byte comparison.
///
/// This function is case-sensitive and results in a *total order*, so it is appropriate for
/// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
Expand All @@ -173,28 +174,69 @@ impl LanguageIdentifier {
/// use icu::locid::LanguageIdentifier;
/// use std::cmp::Ordering;
///
/// let bcp47_strings: &[&[u8]] = &[
/// b"pl-Latn-PL",
/// b"und",
/// b"und-Adlm",
/// b"und-GB",
/// b"und-ZA",
/// b"und-fonipa",
/// b"zh",
/// let bcp47_strings: &[&str] = &[
/// "pl-Latn-PL",
/// "und",
/// "und-Adlm",
/// "und-GB",
/// "und-ZA",
/// "und-fonipa",
/// "zh",
/// ];
///
/// for ab in bcp47_strings.windows(2) {
/// let a = ab[0];
/// let b = ab[1];
/// assert!(a.cmp(b) == Ordering::Less);
/// let a_langid = LanguageIdentifier::from_bytes(a).unwrap();
/// assert!(a_langid.strict_cmp(b) == Ordering::Less);
/// let a_langid = a.parse::<LanguageIdentifier>().unwrap();
/// assert_eq!(a, a_langid.to_string());
/// assert!(a_langid.strict_cmp(a.as_bytes()) == Ordering::Equal);
/// assert!(a_langid.strict_cmp(b.as_bytes()) == Ordering::Less);
/// }
/// ```
pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
let mut other_iter = other.split(|b| *b == b'-');
self.strict_cmp_iter(other.split(|b| *b == b'-')).end()
}

/// Compare this [`LanguageIdentifier`] with an iterator of BCP-47 subtags.
///
/// This function has the same equality semantics as [`LanguageIdentifier::strict_cmp`]. It is intended as
/// a more modular version that allows multiple subtag iterators to be chained together.
///
/// For an additional example, see [`SubtagOrderingResult`].
///
/// # Examples
///
/// ```
/// use icu::locid::LanguageIdentifier;
/// use std::cmp::Ordering;
///
/// let subtags: &[&[u8]] = &[&*b"ca", &*b"ES", &*b"valencia"];
///
/// let loc = "ca-ES-valencia".parse::<LanguageIdentifier>().unwrap();
/// assert_eq!(
/// Ordering::Equal,
/// loc.strict_cmp_iter(subtags.iter().copied()).end()
/// );
///
/// let loc = "ca-ES".parse::<LanguageIdentifier>().unwrap();
/// assert_eq!(
/// Ordering::Less,
/// loc.strict_cmp_iter(subtags.iter().copied()).end()
/// );
///
/// let loc = "ca-ZA".parse::<LanguageIdentifier>().unwrap();
/// assert_eq!(
/// Ordering::Greater,
/// loc.strict_cmp_iter(subtags.iter().copied()).end()
/// );
/// ```
pub fn strict_cmp_iter<'l, I>(&self, mut subtags: I) -> SubtagOrderingResult<I>
where
I: Iterator<Item = &'l [u8]>,
{
let r = self.for_each_subtag_str(&mut |subtag| {
if let Some(other) = other_iter.next() {
if let Some(other) = subtags.next() {
match subtag.as_bytes().cmp(other) {
Ordering::Equal => Ok(()),
not_equal => Err(not_equal),
Expand All @@ -203,14 +245,12 @@ impl LanguageIdentifier {
Err(Ordering::Greater)
}
});
if let Err(o) = r {
return o;
}
if other_iter.next().is_some() {
return Ordering::Less;
match r {
Ok(_) => SubtagOrderingResult::Subtags(subtags),
Err(o) => SubtagOrderingResult::Ordering(o),
}
Ordering::Equal
}

/// Compare this `LanguageIdentifier` with a potentially unnormalized BCP-47 string.
///
/// The return value is equivalent to what would happen if you first parsed the
Expand Down
Loading

0 comments on commit e6236ff

Please sign in to comment.