Skip to content

Commit

Permalink
Added POSIX modifiers support for BCP47 conversion
Browse files Browse the repository at this point in the history
1. Renamed function `parse_locale_code` to `posix_to_bcp47`.
2. Changed return type from `Option<String>` to `String`.
3. Changed locale cropping to trim at the first occurrence of either dot (`.`) or at sign (`@`), removing any modifiers.
4. Added tests for modifier cropping.
5. Added doc comment for `posix_to_bcp47`.
  • Loading branch information
pasabanov committed Sep 22, 2024
1 parent 13823e9 commit 43e7b36
Showing 1 changed file with 36 additions and 21 deletions.
57 changes: 36 additions & 21 deletions src/unix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,41 @@ fn _get(env: &impl EnvAccess) -> Option<String> {
.filter(|val| !val.is_empty())
.or_else(|| env.get(LANG))?;

parse_locale_code(&code)
Some(posix_to_bcp47(&code))
}

fn parse_locale_code(code: &str) -> Option<String> {
// Some locales are returned with the char encoding too, such as `en_US.UTF-8`
code.split_once('.')
.map(|(s, _)| s)
.or(Some(code))
.map(|s| s.replace('_', "-"))
/// Converts a POSIX locale string to a BCP47 locale string.
///
/// This function processes the input `code` by removing any character encoding
/// (the part after the first `.`) and any modifiers (the part after the first `@`).
/// It replaces underscores (`_`) with hyphens (`-`) to conform to BCP47 formatting.
///
/// # Examples
///
/// ```do-not-compile
/// let bcp47 = posix_to_bcp47("en_US.UTF-8");
/// assert_eq!(bcp47, "en-US");
///
/// let bcp47 = posix_to_bcp47("fr_FR@dict");
/// assert_eq!(bcp47, "fr-FR");
///
/// let bcp47 = posix_to_bcp47("de_DE.UTF-8@euro");
/// assert_eq!(bcp47, "de-DE");
/// ```
///
/// # TODO
///
/// Implement POSIX to BCP47 modifier conversion.
fn posix_to_bcp47(code: &str) -> String {
code.chars()
.take_while(|&c| c != '.' && c != '@')
.collect::<String>()
.replace('_', "-")
}

#[cfg(test)]
mod tests {
use super::{parse_locale_code, EnvAccess, _get, LANG, LC_ALL, LC_CTYPE};
use super::{posix_to_bcp47, EnvAccess, _get, LANG, LC_ALL, LC_CTYPE};
use std::{
collections::HashMap,
ffi::{OsStr, OsString},
Expand All @@ -64,21 +85,15 @@ mod tests {

#[test]
fn parse_identifier() {
let identifier = "fr_FR.UTF-8";
assert_eq!(parse_locale_code(identifier).as_deref(), Some(PARSE_LOCALE));
assert_eq!(posix_to_bcp47("fr_FR.UTF-8"), "fr-FR");
assert_eq!(posix_to_bcp47("fr_FR@euro"), "fr-FR");
assert_eq!(posix_to_bcp47("fr_FR.UTF-8@euro"), "fr-FR");
}

#[test]
fn parse_non_suffixed_identifier() {
assert_eq!(
parse_locale_code(PARSE_LOCALE).as_deref(),
Some(PARSE_LOCALE)
);

assert_eq!(
parse_locale_code(LANG_PARSE_LOCALE).as_deref(),
Some(PARSE_LOCALE)
);
assert_eq!(posix_to_bcp47(PARSE_LOCALE), PARSE_LOCALE);
assert_eq!(posix_to_bcp47(LANG_PARSE_LOCALE), PARSE_LOCALE);
}

#[test]
Expand Down Expand Up @@ -111,14 +126,14 @@ mod tests {

let set_code = _get(&env).unwrap();
assert_eq!(set_code, PARSE_LOCALE);
assert_eq!(parse_locale_code(&set_code).as_deref(), Some(PARSE_LOCALE));
assert_eq!(posix_to_bcp47(&set_code), PARSE_LOCALE);

// Ensure the 2nd will be skipped when empty as well.
env.insert(LC_CTYPE.into(), String::new());
env.insert(LANG.into(), PARSE_LOCALE.to_owned());

let set_code = _get(&env).unwrap();
assert_eq!(set_code, PARSE_LOCALE);
assert_eq!(parse_locale_code(&set_code).as_deref(), Some(PARSE_LOCALE));
assert_eq!(posix_to_bcp47(&set_code), PARSE_LOCALE);
}
}

0 comments on commit 43e7b36

Please sign in to comment.