diff --git a/components/cldr-json-data-provider/Cargo.toml b/components/cldr-json-data-provider/Cargo.toml index d71c4bcc876..8d2a8de176f 100644 --- a/components/cldr-json-data-provider/Cargo.toml +++ b/components/cldr-json-data-provider/Cargo.toml @@ -23,3 +23,17 @@ serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" serde-tuple-vec-map = "1.0" tinystr = "0.3" + +# Dependencies for the download feature +urlencoding = { version = "1.1", optional = true } +reqwest = { version = "0.10.8", features = ["blocking"], optional = true } +unzip = { version = "0.1.0", optional = true } +dirs = { version = "3.0", optional = true } +log = { version = "0.4", optional = true } + +[dev-dependencies] +mktemp = "0.4" + +[features] +# Automatically download CLDR data from a host +download = ["urlencoding", "reqwest", "unzip", "dirs", "log"] diff --git a/components/cldr-json-data-provider/src/cldr_paths.rs b/components/cldr-json-data-provider/src/cldr_paths.rs index 68d7d2a25ca..298418c97b8 100644 --- a/components/cldr-json-data-provider/src/cldr_paths.rs +++ b/components/cldr-json-data-provider/src/cldr_paths.rs @@ -1,21 +1,50 @@ -use crate::error::MissingSourceError; +use crate::error::{Error, MissingSourceError}; use std::default::Default; use std::path::PathBuf; -/// Struct containing filesystem paths to the CLDR JSON resource directories. +/// Trait returning filesystem paths to CLDR JSON resource directories. /// The fields should be Ok if present. They default to Err when not present. +pub trait CldrPaths { + fn cldr_core(&self) -> Result; + fn cldr_dates(&self) -> Result; +} + +/// Implementation of CldrPaths for data directories already downloaded. +/// +/// # Example +/// +/// ``` +/// use icu_cldr_json_data_provider::CldrPathsLocal; +/// use icu_cldr_json_data_provider::CldrJsonDataProvider; +/// use std::path::PathBuf; +/// +/// let mut paths = CldrPathsLocal::default(); +/// paths.cldr_core = Ok(PathBuf::from("/path/to/cldr-core")); +/// // fill in other paths as necessary +/// +/// let data_provider = CldrJsonDataProvider::new(&paths); +/// ``` #[non_exhaustive] #[derive(Debug, PartialEq)] -pub struct CldrPaths { +pub struct CldrPathsLocal { /// Path to checkout of cldr-core: /// https://github.com/unicode-cldr/cldr-core pub cldr_core: Result, pub cldr_dates: Result, } -impl Default for CldrPaths { - fn default() -> CldrPaths { - CldrPaths { +impl CldrPaths for CldrPathsLocal { + fn cldr_core(&self) -> Result { + self.cldr_core.clone().map_err(|e| e.into()) + } + fn cldr_dates(&self) -> Result { + self.cldr_dates.clone().map_err(|e| e.into()) + } +} + +impl Default for CldrPathsLocal { + fn default() -> CldrPathsLocal { + CldrPathsLocal { cldr_core: Err(MissingSourceError { src: "cldr-core" }), cldr_dates: Err(MissingSourceError { src: "cldr-dates" }), } diff --git a/components/cldr-json-data-provider/src/download/cldr_paths_download.rs b/components/cldr-json-data-provider/src/download/cldr_paths_download.rs new file mode 100644 index 00000000000..7cdd552f53a --- /dev/null +++ b/components/cldr-json-data-provider/src/download/cldr_paths_download.rs @@ -0,0 +1,106 @@ +use super::error::DownloadError; +use super::io_util; +use crate::error::Error; +use crate::CldrPaths; +use std::path::PathBuf; + +/// Implementation of CldrPaths that downloads CLDR data directories on demand. +/// The download artifacts are saved in the user's cache directory; see +/// https://docs.rs/dirs/3.0.0/dirs/fn.cache_dir.html +/// +/// # Example +/// +/// ``` +/// use icu_cldr_json_data_provider::download::CldrPathsDownload; +/// use icu_cldr_json_data_provider::CldrJsonDataProvider; +/// use std::path::PathBuf; +/// +/// let paths = CldrPathsDownload::try_from_github_tag("36.0.0") +/// .expect("Cache directory not found"); +/// +/// let data_provider = CldrJsonDataProvider::new(&paths); +/// +/// fn demo<'d>(data_provider: &'d CldrJsonDataProvider<'d, 'd>) { +/// use std::borrow::Cow; +/// use icu_data_provider::prelude::*; +/// use icu_data_provider::structs::plurals::PluralRuleStringsV1; +/// +/// let data: Cow = data_provider +/// .load(&DataRequest { +/// data_entry: DataEntry { +/// langid: "uk".parse().unwrap(), +/// variant: None, +/// }, +/// data_key: icu_data_key!(plurals: ordinal@1), +/// }) +/// .unwrap() +/// .take_payload() +/// .unwrap(); +/// assert_eq!(data.few, Some(Cow::Borrowed("n % 10 = 3 and n % 100 != 13"))); +/// } +/// +/// // Calling demo(&data_provider) will cause the data to actually get downloaded. +/// //demo(&data_provider); +/// ``` +pub struct CldrPathsDownload { + /// Directory where downloaded files are stored. + pub cache_dir: PathBuf, + + pub cldr_core: CldrZipFileInfo, + pub cldr_dates: CldrZipFileInfo, +} + +// TODO(#297): Implement this async. +impl CldrPaths for CldrPathsDownload { + fn cldr_core(&self) -> Result { + self.cldr_core.download_and_unzip(&self) + } + fn cldr_dates(&self) -> Result { + self.cldr_dates.download_and_unzip(&self) + } +} + +impl CldrPathsDownload { + /// Creates a CldrPathsDownload that downloads files to the system cache directory + /// as determined by dirs::cache_dir(). + /// + /// github_tag should be a tag in the CLDR JSON repositories, such as "36.0.0": + /// https://github.com/unicode-cldr/cldr-core/tags + pub fn try_from_github_tag(github_tag: &str) -> Result { + Ok(Self { + cache_dir: dirs::cache_dir() + .ok_or(DownloadError::NoCacheDir)? + .join("icu4x") + .join("cldr"), + cldr_core: CldrZipFileInfo { + url: format!( + "https://github.com/unicode-cldr/cldr-core/archive/{}.zip", + github_tag + ), + top_dir: format!("cldr-core-{}", github_tag), + }, + cldr_dates: CldrZipFileInfo { + url: format!( + "https://github.com/unicode-cldr/cldr-dates-modern/archive/{}.zip", + github_tag + ), + top_dir: format!("cldr-dates-modern-{}", github_tag), + }, + }) + } +} + +pub struct CldrZipFileInfo { + /// The URL to the remote zip file + pub url: String, + /// The directory name in the unpacked zip fle + pub top_dir: String, +} + +impl CldrZipFileInfo { + fn download_and_unzip(&self, parent: &CldrPathsDownload) -> Result { + io_util::download_and_unzip(&self.url, &parent.cache_dir) + .map(|p| p.join(&self.top_dir)) + .map_err(|e| e.into()) + } +} diff --git a/components/cldr-json-data-provider/src/download/error.rs b/components/cldr-json-data-provider/src/download/error.rs new file mode 100644 index 00000000000..3a3bd60e7fe --- /dev/null +++ b/components/cldr-json-data-provider/src/download/error.rs @@ -0,0 +1,46 @@ +use std::error; +use std::fmt; +use std::io; +use std::path::PathBuf; + +#[derive(Debug)] +pub enum DownloadError { + Io(io::Error, PathBuf), + Reqwest(reqwest::Error), + HttpStatus(reqwest::StatusCode, String), + NoCacheDir, +} + +impl From for DownloadError { + /// Note: Prefer adding the path to Error::Io instead of using this conversion. + fn from(err: io::Error) -> Self { + Self::Io(err, PathBuf::new()) + } +} + +impl From for DownloadError { + fn from(err: reqwest::Error) -> Self { + Self::Reqwest(err) + } +} + +impl fmt::Display for DownloadError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Io(err, path) => write!(f, "{}: {}", err, path.to_string_lossy()), + Self::Reqwest(err) => err.fmt(f), + Self::HttpStatus(status, url) => write!(f, "HTTP request failed: {}: {}", status, url), + Self::NoCacheDir => write!(f, "dirs::cache_dir() returned None"), + } + } +} + +impl error::Error for DownloadError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Io(err, _) => Some(err), + Self::Reqwest(err) => Some(err), + _ => None, + } + } +} diff --git a/components/cldr-json-data-provider/src/download/io_util.rs b/components/cldr-json-data-provider/src/download/io_util.rs new file mode 100644 index 00000000000..dc9414b8d07 --- /dev/null +++ b/components/cldr-json-data-provider/src/download/io_util.rs @@ -0,0 +1,110 @@ +use super::error::DownloadError; +use std::fs::{self, File}; +use std::path::{Path, PathBuf}; +use std::time::Instant; +use unzip::Unzipper; + +macro_rules! map_io_err { + ($path_ref:ident) => { + |err| DownloadError::Io(err, $path_ref.to_owned()) + }; +} + +#[cfg(test)] +fn assert_files_eq(expected_file_path: &Path, actual_file_path: &Path) { + use std::io::Read; + let mut expected_buf = Vec::new(); + File::open(expected_file_path) + .unwrap() + .read_to_end(&mut expected_buf) + .unwrap(); + let mut actual_buf = Vec::new(); + File::open(&actual_file_path) + .unwrap() + .read_to_end(&mut actual_buf) + .unwrap(); + assert_eq!(expected_buf, actual_buf); +} + +// Synchronously download url and save it to destination. +// TODO(#297): Implement this async. +fn download_sync(url: &str, destination: &Path) -> Result<(), DownloadError> { + log::info!("Downloading: {}", url); + let start = Instant::now(); + let mut response = reqwest::blocking::get(url)?; + if !response.status().is_success() { + return Err(DownloadError::HttpStatus( + response.status(), + url.to_string(), + )); + } + log::info!("Status: {}", response.status()); + let mut file = File::create(destination).map_err(map_io_err!(destination))?; + response.copy_to(&mut file)?; + log::info!("Finished in {:.2} seconds", start.elapsed().as_secs_f64()); + Ok(()) +} + +#[test] +fn test_download_sync() -> Result<(), DownloadError> { + let temp_file = mktemp::Temp::new_file()?; + download_sync( + "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", + &temp_file, + )?; + assert_files_eq(&PathBuf::from("./tests/testdata/dummy.pdf"), &temp_file); + Ok(()) +} + +/// Synchronously unpack a zip file into a destination directory. +// TODO(#297): Implement this async. +fn unzip_sync(zip_path: &Path, dir_path: &Path) -> Result<(), DownloadError> { + let reader = File::open(zip_path).map_err(map_io_err!(zip_path))?; + log::info!("Unzipping..."); + let start = Instant::now(); + Unzipper::new(reader, dir_path) + .unzip() + .map_err(map_io_err!(dir_path))?; + log::info!("Unzipped in {:.2} seconds", start.elapsed().as_secs_f64()); + Ok(()) +} + +#[test] +fn test_unzip_sync() -> Result<(), DownloadError> { + let temp_dir = mktemp::Temp::new_dir()?; + unzip_sync(&PathBuf::from("./tests/testdata/dummy.zip"), &temp_dir)?; + assert_files_eq( + &PathBuf::from("./tests/testdata/dummy.pdf"), + &temp_dir.to_path_buf().join("dummy.pdf"), + ); + Ok(()) +} + +/// Downloads and unpacks a zip file, returning the path to the unpacked directory. +/// +/// `cache_dir` is a directory where both the zip file and the unpacked directory will be +/// saved. If the zip file has already been downloaded, it will not be downloaded again. +pub fn download_and_unzip(zip_file_url: &str, cache_dir: &Path) -> Result { + fs::create_dir_all(cache_dir).map_err(map_io_err!(cache_dir))?; + + let zip_dir = cache_dir.to_path_buf().join("zips"); + fs::create_dir_all(&zip_dir).map_err(map_io_err!(zip_dir))?; + + let data_dir = cache_dir.to_path_buf().join("data"); + fs::create_dir_all(&data_dir).map_err(map_io_err!(data_dir))?; + + let basename = urlencoding::encode(zip_file_url); + let mut zip_path = zip_dir.join(&basename); + zip_path.set_extension("zip"); + let dir_path = data_dir.join(&basename); + + if !zip_path.exists() { + download_sync(zip_file_url, &zip_path)?; + } + + if !dir_path.exists() { + unzip_sync(&zip_path, &dir_path)?; + } + + Ok(dir_path) +} diff --git a/components/cldr-json-data-provider/src/download/mod.rs b/components/cldr-json-data-provider/src/download/mod.rs new file mode 100644 index 00000000000..22c83e8a225 --- /dev/null +++ b/components/cldr-json-data-provider/src/download/mod.rs @@ -0,0 +1,6 @@ +mod cldr_paths_download; +mod error; +mod io_util; + +pub use cldr_paths_download::CldrPathsDownload; +pub use error::DownloadError; diff --git a/components/cldr-json-data-provider/src/error.rs b/components/cldr-json-data-provider/src/error.rs index 51a20668a08..4764800c1fa 100644 --- a/components/cldr-json-data-provider/src/error.rs +++ b/components/cldr-json-data-provider/src/error.rs @@ -1,12 +1,17 @@ use std::error; use std::fmt; +#[cfg(feature = "download")] +use crate::download::DownloadError; + #[non_exhaustive] #[derive(Debug)] pub enum Error { JsonError(serde_json::error::Error), IoError(std::io::Error, std::path::PathBuf), MissingSource(MissingSourceError), + #[cfg(feature = "download")] + Download(DownloadError), PoisonError, } @@ -33,12 +38,24 @@ impl From for Error { } } +#[cfg(feature = "download")] +impl From for Error { + fn from(err: DownloadError) -> Error { + match err { + DownloadError::Io(err, path) => Error::IoError(err, path), + _ => Error::Download(err), + } + } +} + impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Error::JsonError(err) => write!(f, "{}", err), Error::IoError(err, path) => write!(f, "{}: {}", err, path.to_string_lossy()), Error::MissingSource(err) => err.fmt(f), + #[cfg(feature = "download")] + Error::Download(err) => err.fmt(f), Error::PoisonError => write!(f, "poisoned lock on CLDR provider"), } } @@ -49,6 +66,8 @@ impl error::Error for Error { match self { Error::JsonError(err) => Some(err), Error::IoError(err, _) => Some(err), + #[cfg(feature = "download")] + Error::Download(err) => Some(err), _ => None, } } diff --git a/components/cldr-json-data-provider/src/lib.rs b/components/cldr-json-data-provider/src/lib.rs index e967c7bc251..c53e77fbf27 100644 --- a/components/cldr-json-data-provider/src/lib.rs +++ b/components/cldr-json-data-provider/src/lib.rs @@ -1,4 +1,18 @@ -// #![feature(type_alias_impl_trait)] +//! `icu-cldr-json-data-provider` is one of the [`ICU4X`] components. +//! +//! It contains implementations of the [`DataProvider`] interface based on the JSON files +//! shipped by CLDR. You create a CldrPaths and then pass it into CldrJsonDataProvider. +//! +//! This crate contains two implementations of CldrPaths: +//! +//! - `CldrPathsLocal` points to local copies of the CLDR JSON repositories. +//! - `CldrPathsDownload` downloads and caches the CLDR JSON repositories. Requires the +//! "download" feature. +//! +//! **Important:** This data provider implementation is not optimized for production use. +//! It is much more efficient if you use [`FsDataProvider`] instead. +//! +//! [`ICU4X`]: https://github.com/unicode-org/icu4x mod cldr_langid; mod cldr_paths; @@ -8,6 +22,10 @@ mod support; pub mod transform; +#[cfg(feature = "download")] +pub mod download; + pub use cldr_paths::CldrPaths; +pub use cldr_paths::CldrPathsLocal; pub use error::Error as CldrError; pub use transform::CldrJsonDataProvider; diff --git a/components/cldr-json-data-provider/src/support.rs b/components/cldr-json-data-provider/src/support.rs index f047251925c..180bc1d9e99 100644 --- a/components/cldr-json-data-provider/src/support.rs +++ b/components/cldr-json-data-provider/src/support.rs @@ -28,14 +28,14 @@ fn map_poison(_err: E) -> DataError { /// A lazy-initialized CLDR JSON data provider. impl<'b, 'd, T> LazyCldrProvider where - T: DataProvider<'d> + DataKeySupport + DataEntryCollection + TryFrom<&'b CldrPaths>, - >::Error: 'static + std::error::Error, + T: DataProvider<'d> + DataKeySupport + DataEntryCollection + TryFrom<&'b dyn CldrPaths>, + >::Error: 'static + std::error::Error, { /// Call T::load, initializing T if necessary. pub fn try_load( &self, req: &DataRequest, - cldr_paths: &'b CldrPaths, + cldr_paths: &'b dyn CldrPaths, ) -> Result>, DataError> { if T::supports_key(&req.data_key).is_err() { return Ok(None); @@ -57,7 +57,7 @@ where pub fn try_iter( &self, data_key: &DataKey, - cldr_paths: &'b CldrPaths, + cldr_paths: &'b dyn CldrPaths, ) -> Result>>, DataError> { if T::supports_key(data_key).is_err() { return Ok(None); diff --git a/components/cldr-json-data-provider/src/transform/dates.rs b/components/cldr-json-data-provider/src/transform/dates.rs index 17e01c1f9e5..74c78954f8c 100644 --- a/components/cldr-json-data-provider/src/transform/dates.rs +++ b/components/cldr-json-data-provider/src/transform/dates.rs @@ -16,12 +16,12 @@ pub struct DatesProvider<'d> { _phantom: PhantomData<&'d ()>, // placeholder for when we need the lifetime param } -impl TryFrom<&CldrPaths> for DatesProvider<'_> { +impl TryFrom<&dyn CldrPaths> for DatesProvider<'_> { type Error = Error; - fn try_from(cldr_paths: &CldrPaths) -> Result { + fn try_from(cldr_paths: &dyn CldrPaths) -> Result { let mut data = vec![]; - let path = cldr_paths.cldr_dates.clone()?.join("main"); + let path = cldr_paths.cldr_dates()?.join("main"); let locale_dirs = get_subdirectories(&path)?; diff --git a/components/cldr-json-data-provider/src/transform/mod.rs b/components/cldr-json-data-provider/src/transform/mod.rs index 39d0cbc6f95..3114a01234a 100644 --- a/components/cldr-json-data-provider/src/transform/mod.rs +++ b/components/cldr-json-data-provider/src/transform/mod.rs @@ -10,13 +10,13 @@ use icu_data_provider::iter::DataEntryCollection; use icu_data_provider::prelude::*; pub struct CldrJsonDataProvider<'a, 'd> { - pub cldr_paths: &'a CldrPaths, + pub cldr_paths: &'a dyn CldrPaths, plurals: LazyCldrProvider>, dates: LazyCldrProvider>, } impl<'a, 'd> CldrJsonDataProvider<'a, 'd> { - pub fn new(cldr_paths: &'a CldrPaths) -> Self { + pub fn new(cldr_paths: &'a dyn CldrPaths) -> Self { CldrJsonDataProvider { cldr_paths, plurals: Default::default(), @@ -27,10 +27,10 @@ impl<'a, 'd> CldrJsonDataProvider<'a, 'd> { impl<'a, 'd> DataProvider<'d> for CldrJsonDataProvider<'a, 'd> { fn load(&self, req: &DataRequest) -> Result, DataError> { - if let Some(result) = self.plurals.try_load(req, &self.cldr_paths)? { + if let Some(result) = self.plurals.try_load(req, self.cldr_paths)? { return Ok(result); } - if let Some(result) = self.dates.try_load(req, &self.cldr_paths)? { + if let Some(result) = self.dates.try_load(req, self.cldr_paths)? { return Ok(result); } Err(DataError::UnsupportedDataKey(req.data_key)) @@ -42,10 +42,10 @@ impl<'a, 'd> DataEntryCollection for CldrJsonDataProvider<'a, 'd> { &self, data_key: &DataKey, ) -> Result>, DataError> { - if let Some(resp) = self.plurals.try_iter(data_key, &self.cldr_paths)? { + if let Some(resp) = self.plurals.try_iter(data_key, self.cldr_paths)? { return Ok(resp); } - if let Some(resp) = self.dates.try_iter(data_key, &self.cldr_paths)? { + if let Some(resp) = self.dates.try_iter(data_key, self.cldr_paths)? { return Ok(resp); } Err(DataError::UnsupportedDataKey(*data_key)) diff --git a/components/cldr-json-data-provider/src/transform/plurals.rs b/components/cldr-json-data-provider/src/transform/plurals.rs index 49b3102f150..9c0ec76e379 100644 --- a/components/cldr-json-data-provider/src/transform/plurals.rs +++ b/components/cldr-json-data-provider/src/transform/plurals.rs @@ -18,13 +18,12 @@ pub struct PluralsProvider<'d> { _phantom: PhantomData<&'d ()>, // placeholder for when we need the lifetime param } -impl TryFrom<&CldrPaths> for PluralsProvider<'_> { +impl TryFrom<&dyn CldrPaths> for PluralsProvider<'_> { type Error = Error; - fn try_from(cldr_paths: &CldrPaths) -> Result { + fn try_from(cldr_paths: &dyn CldrPaths) -> Result { let cardinal_rules = { let path = cldr_paths - .cldr_core - .clone()? + .cldr_core()? .join("supplemental") .join("plurals.json"); let data: cldr_json::Resource = serde_json::from_reader(open_reader(path)?)?; @@ -32,8 +31,7 @@ impl TryFrom<&CldrPaths> for PluralsProvider<'_> { }; let ordinal_rules = { let path = cldr_paths - .cldr_core - .clone()? + .cldr_core()? .join("supplemental") .join("ordinals.json"); let data: cldr_json::Resource = serde_json::from_reader(open_reader(path)?)?; diff --git a/components/cldr-json-data-provider/tests/testdata/dummy.pdf b/components/cldr-json-data-provider/tests/testdata/dummy.pdf new file mode 100644 index 00000000000..774c2ea70c5 Binary files /dev/null and b/components/cldr-json-data-provider/tests/testdata/dummy.pdf differ diff --git a/components/cldr-json-data-provider/tests/testdata/dummy.zip b/components/cldr-json-data-provider/tests/testdata/dummy.zip new file mode 100644 index 00000000000..25c4f53e540 Binary files /dev/null and b/components/cldr-json-data-provider/tests/testdata/dummy.zip differ diff --git a/components/data-provider/src/iter.rs b/components/data-provider/src/iter.rs index fbffa858e4b..dbcd01071ce 100644 --- a/components/data-provider/src/iter.rs +++ b/components/data-provider/src/iter.rs @@ -19,11 +19,16 @@ pub trait IterableDataProvider<'d>: DataProvider<'d> + DataEntryCollection { /// Trait for objects capable of persisting serialized data hunks. pub trait DataExporter { + /// Save `obj` corresponding to `req`. fn put( &mut self, req: &DataRequest, obj: &dyn erased_serde::Serialize, ) -> Result<(), Box>; + + /// Whether to load and dump data for the given entry. This function enables the + /// DataExporter to filter out certain data entries. + fn includes(&self, data_entry: &DataEntry) -> bool; } impl<'d, T> IterableDataProvider<'d> for T @@ -32,9 +37,12 @@ where { fn export_key(&self, data_key: &DataKey, sink: &mut dyn DataExporter) -> Result<(), Error> { for data_entry in self.iter_for_key(data_key)? { + if !sink.includes(&data_entry) { + continue; + } let req = DataRequest { data_key: *data_key, - data_entry: data_entry.clone(), + data_entry, }; let response = self.load(&req)?; let payload = response.borrow_as_serialize(); diff --git a/components/datetime/tests/fixtures/data/icu4x/manifest.json b/components/datetime/tests/fixtures/data/icu4x/manifest.json index a68a792a8e3..c0f2b714d99 100644 --- a/components/datetime/tests/fixtures/data/icu4x/manifest.json +++ b/components/datetime/tests/fixtures/data/icu4x/manifest.json @@ -1,4 +1,5 @@ { "aliasing": "NoAliases", + "locales": "IncludeAll", "syntax": "Json" } diff --git a/components/fs-data-provider/Cargo.toml b/components/fs-data-provider/Cargo.toml index 3a15a380534..b1957dcb62b 100644 --- a/components/fs-data-provider/Cargo.toml +++ b/components/fs-data-provider/Cargo.toml @@ -16,20 +16,22 @@ include = [ [dependencies] icu-data-provider = { path = "../data-provider" } -icu-locale = { path = "../locale" } +icu-locale = { path = "../locale", features = ["serde"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" # Dependencies for the export module erased-serde = { version = "0.3", optional = true } +log = { version = "0.4", optional = true } # Dependencies for the binary clap = { version = "2.33", optional = true } -icu-cldr-json-data-provider = { path = "../cldr-json-data-provider", optional = true } +icu-cldr-json-data-provider = { path = "../cldr-json-data-provider", features = ["download"], optional = true } +simple_logger = { version = "1.10", optional = true } [features] -export = ["erased-serde", "icu-data-provider/invariant"] -export-bin = ["export", "clap", "icu-cldr-json-data-provider"] +export = ["erased-serde", "icu-data-provider/invariant", "log"] +export-bin = ["export", "clap", "icu-cldr-json-data-provider", "simple_logger"] [[bin]] name = "icu4x-cldr-export" diff --git a/components/fs-data-provider/src/bin/icu4x-cldr-export.rs b/components/fs-data-provider/src/bin/icu4x-cldr-export.rs index 7f5c3ae8b4f..20fb4dc7179 100644 --- a/components/fs-data-provider/src/bin/icu4x-cldr-export.rs +++ b/components/fs-data-provider/src/bin/icu4x-cldr-export.rs @@ -1,21 +1,28 @@ +use crate::manifest::LocalesOption; use clap::{App, Arg, ArgGroup}; +use icu_cldr_json_data_provider::download::CldrPathsDownload; use icu_cldr_json_data_provider::CldrJsonDataProvider; use icu_cldr_json_data_provider::CldrPaths; +use icu_cldr_json_data_provider::CldrPathsLocal; use icu_data_provider::icu_data_key; use icu_data_provider::iter::IterableDataProvider; use icu_fs_data_provider::export::fs_exporter; use icu_fs_data_provider::export::serializers; use icu_fs_data_provider::export::FilesystemExporter; use icu_fs_data_provider::manifest; +use icu_locale::LanguageIdentifier; +use simple_logger::SimpleLogger; use std::ffi::OsStr; use std::fmt; use std::path::PathBuf; +use std::str::FromStr; -// #[derive(Debug)] enum Error { Unsupported(&'static str), Export(icu_fs_data_provider::FsDataError), DataProvider(icu_data_provider::DataError), + LocaleParser(icu_locale::ParserError, String), + Setup(Box), } impl fmt::Display for Error { @@ -24,6 +31,8 @@ impl fmt::Display for Error { Error::Unsupported(message) => write!(f, "Unsupported: {}", message), Error::Export(error) => write!(f, "{}", error), Error::DataProvider(error) => write!(f, "{}", error), + Error::LocaleParser(error, s) => write!(f, "{}: {}", error, s), + Error::Setup(error) => write!(f, "{}", error), } } } @@ -46,6 +55,12 @@ impl From for Error { } } +impl From for Error { + fn from(err: icu_cldr_json_data_provider::download::DownloadError) -> Error { + Error::Setup(Box::from(err)) + } +} + fn main() -> Result<(), Error> { let matches = App::new("ICU4X Data Exporter") .version("0.0.1") @@ -55,7 +70,8 @@ fn main() -> Result<(), Error> { Arg::with_name("VERBOSE") .short("v") .long("verbose") - .help("Enable verbose logging."), + .multiple(true) + .help("Sets the level of verbosity (-v or -vv)"), ) .arg( Arg::with_name("DRY_RUN") @@ -78,19 +94,27 @@ fn main() -> Result<(), Error> { .help("Delete the output directory before writing data."), ) .arg( - Arg::with_name("STYLE") - .long("style") - .takes_value(true) - .possible_value("compact") - .possible_value("pretty") - .help("JSON style when printing files."), + Arg::with_name("PRETTY") + .short("p") + .long("pretty") + .help("Whether to pretty-print the output JSON files."), + ) + .arg( + Arg::with_name("CLDR_TAG") + .long("cldr-tag") + .value_name("TAG") + .help( + "Download CLDR JSON data from this GitHub tag: \n\ + https://github.com/unicode-cldr/cldr-core/tags", + ) + .takes_value(true), ) .arg( Arg::with_name("CLDR_CORE") .long("cldr-core") .value_name("PATH") .help( - "Path to cldr-core JSON: \ + "Path to cldr-core. Ignored if '--cldr-tag' is present. \n\ https://github.com/unicode-cldr/cldr-core", ) .takes_value(true), @@ -100,18 +124,21 @@ fn main() -> Result<(), Error> { .long("cldr-dates") .value_name("PATH") .help( - "Path to cldr-dates JSON: \ - https://github.com/unicode-cldr/cldr-dates", + "Path to cldr-dates. Ignored if '--cldr-tag' is present. \n\ + https://github.com/unicode-cldr/cldr-dates-modern", ) .takes_value(true), ) .arg( - Arg::with_name("KEY") + Arg::with_name("KEYS") .short("k") .long("keys") .multiple(true) .takes_value(true) - .help("Include this data key in the output. Also see --key-file."), + .help( + "Include this data key in the output. Accepts multiple arguments. \ + Also see --key-file.", + ), ) .arg( Arg::with_name("KEY_FILE") @@ -130,12 +157,23 @@ fn main() -> Result<(), Error> { .help("Include all keys known to ICU4X."), ) .group( - ArgGroup::with_name("KEYS") - .arg("KEY") + ArgGroup::with_name("KEY_MODE") + .arg("KEYS") .arg("KEY_FILE") .arg("ALL_KEYS") .required(true), ) + .arg( + Arg::with_name("LOCALES") + .short("l") + .long("locales") + .multiple(true) + .takes_value(true) + .help( + "Include this locale in the output. Accepts multiple arguments. \ + Omit this option to include all locales.", + ), + ) .arg( Arg::with_name("OUTPUT") .short("o") @@ -149,6 +187,19 @@ fn main() -> Result<(), Error> { ) .get_matches(); + match matches.occurrences_of("VERBOSE") { + 0 => SimpleLogger::from_env().init().unwrap(), + 1 => SimpleLogger::new() + .with_level(log::LevelFilter::Info) + .init() + .unwrap(), + 2 => SimpleLogger::new() + .with_level(log::LevelFilter::Trace) + .init() + .unwrap(), + _ => return Err(Error::Unsupported("Only -v and -vv are supported")), + } + if !matches.is_present("ALL_KEYS") { return Err(Error::Unsupported( "Lists of keys are not yet supported (see #192)", @@ -172,25 +223,24 @@ fn main() -> Result<(), Error> { .unwrap_or_else(|| OsStr::new("/tmp/icu4x_json")), ); - let mut cldr_paths = CldrPaths::default(); - - if let Some(path) = matches.value_of("CLDR_CORE") { - cldr_paths.cldr_core = Ok(path.into()); - } - - if let Some(path) = matches.value_of("CLDR_DATES") { - cldr_paths.cldr_dates = Ok(path.into()); - } + let cldr_paths: Box = if let Some(tag) = matches.value_of("CLDR_TAG") { + Box::new(CldrPathsDownload::try_from_github_tag(tag)?) + } else { + let mut cldr_paths_local = CldrPathsLocal::default(); + if let Some(path) = matches.value_of("CLDR_CORE") { + cldr_paths_local.cldr_core = Ok(path.into()); + } + if let Some(path) = matches.value_of("CLDR_DATES") { + cldr_paths_local.cldr_dates = Ok(path.into()); + } + Box::new(cldr_paths_local) + }; - let provider = CldrJsonDataProvider::new(&cldr_paths); + let provider = CldrJsonDataProvider::new(cldr_paths.as_ref()); let mut options = serializers::JsonSerializerOptions::default(); - if let Some(value) = matches.value_of("STYLE") { - options.style = match value { - "compact" => serializers::StyleOption::Compact, - "pretty" => serializers::StyleOption::Pretty, - _ => unreachable!(), - }; + if matches.is_present("PRETTY") { + options.style = serializers::StyleOption::Pretty; } let json_serializer = Box::new(serializers::JsonSerializer::new(&options)); @@ -206,8 +256,15 @@ fn main() -> Result<(), Error> { if matches.is_present("OVERWRITE") { options.overwrite = fs_exporter::OverwriteOption::RemoveAndReplace } - options.verbose = matches.is_present("VERBOSE"); - let mut exporter = FilesystemExporter::try_new(json_serializer, &options)?; + if let Some(locale_strs) = matches.values_of("LOCALES") { + let locales_vec = locale_strs + .map(|s| { + LanguageIdentifier::from_str(s).map_err(|e| Error::LocaleParser(e, s.to_string())) + }) + .collect::, Error>>()?; + options.locales = LocalesOption::IncludeList(locales_vec.into_boxed_slice()); + } + let mut exporter = FilesystemExporter::try_new(json_serializer, options)?; for key in keys.iter() { let result = provider.export_key(key, &mut exporter); diff --git a/components/fs-data-provider/src/export/fs_exporter.rs b/components/fs-data-provider/src/export/fs_exporter.rs index 77bcecff25a..f49d0a815e0 100644 --- a/components/fs-data-provider/src/export/fs_exporter.rs +++ b/components/fs-data-provider/src/export/fs_exporter.rs @@ -2,6 +2,7 @@ use super::aliasing::{self, AliasCollection}; use super::serializers::Serializer; use crate::error::Error; use crate::manifest::AliasOption; +use crate::manifest::LocalesOption; use crate::manifest::Manifest; use crate::manifest::SyntaxOption; use crate::manifest::MANIFEST_FILE; @@ -29,21 +30,21 @@ pub enum OverwriteOption { pub struct ExporterOptions { /// Directory in the filesystem to write output. pub root: PathBuf, + /// Strategy for including locales. + pub locales: LocalesOption, /// Strategy for de-duplicating locale data. pub aliasing: AliasOption, /// Option for initializing the output directory. pub overwrite: OverwriteOption, - /// Whether to print progress to stdout. - pub verbose: bool, } impl Default for ExporterOptions { fn default() -> Self { Self { root: PathBuf::from("icu4x_data"), + locales: LocalesOption::IncludeAll, aliasing: AliasOption::NoAliases, overwrite: OverwriteOption::CheckEmpty, - verbose: false, } } } @@ -54,7 +55,6 @@ pub struct FilesystemExporter { root: PathBuf, manifest: Manifest, alias_collection: Option>>, - verbose: bool, serializer: Box, } @@ -75,42 +75,47 @@ impl DataExporter for FilesystemExporter { let mut path_buf = self.root.clone(); path_buf.extend(req.data_key.get_components().iter()); path_buf.extend(req.data_entry.get_components().iter()); - if self.verbose { - println!("Initializing: {}", path_buf.to_string_lossy()); - } + log::trace!("Initializing: {}", path_buf.to_string_lossy()); self.write_to_path(path_buf, obj) } + + fn includes(&self, data_entry: &DataEntry) -> bool { + match self.manifest.locales { + LocalesOption::IncludeAll => true, + LocalesOption::IncludeList(ref list) => list.contains(&data_entry.langid), + } + } } impl FilesystemExporter { pub fn try_new( serializer: Box, - options: &ExporterOptions, + options: ExporterOptions, ) -> Result { let result = FilesystemExporter { - root: options.root.to_path_buf(), + root: options.root, manifest: Manifest { aliasing: options.aliasing, + locales: options.locales, syntax: SyntaxOption::Json, }, alias_collection: None, - verbose: options.verbose, serializer, }; match options.overwrite { OverwriteOption::CheckEmpty => { - if options.root.exists() { - fs::remove_dir(&options.root)?; + if result.root.exists() { + fs::remove_dir(&result.root)?; } } OverwriteOption::RemoveAndReplace => { - if options.root.exists() { - fs::remove_dir_all(&options.root)?; + if result.root.exists() { + fs::remove_dir_all(&result.root)?; } } }; - fs::create_dir_all(&options.root)?; + fs::create_dir_all(&result.root)?; let mut manifest_path = result.root.to_path_buf(); manifest_path.push(MANIFEST_FILE); diff --git a/components/fs-data-provider/src/export/mod.rs b/components/fs-data-provider/src/export/mod.rs index c521d48fbb5..5f64d6ef449 100644 --- a/components/fs-data-provider/src/export/mod.rs +++ b/components/fs-data-provider/src/export/mod.rs @@ -23,7 +23,7 @@ //! let json_serializer = Box::new(serializers::JsonSerializer::new(&options)); //! let mut options = fs_exporter::ExporterOptions::default(); //! options.root = DEMO_PATH.clone(); -//! let mut exporter = fs_exporter::FilesystemExporter::try_new(json_serializer, &options) +//! let mut exporter = fs_exporter::FilesystemExporter::try_new(json_serializer, options) //! .expect("Should successfully initialize data output directory"); //! //! // Export a key diff --git a/components/fs-data-provider/src/manifest.rs b/components/fs-data-provider/src/manifest.rs index 3d8fb571073..8cf954ed59c 100644 --- a/components/fs-data-provider/src/manifest.rs +++ b/components/fs-data-provider/src/manifest.rs @@ -1,3 +1,4 @@ +use icu_locale::LanguageIdentifier; use serde::{Deserialize, Serialize}; /// File name of the manifest. The manifest always uses JSON, even if the serializer isn't JSON. @@ -13,6 +14,15 @@ pub enum AliasOption { // TODO: Alias based on a field in the JSON file } +#[non_exhaustive] +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum LocalesOption { + /// Include all available locales. + IncludeAll, + /// Include only those locales that have an exact match in the given list. + IncludeList(Box<[LanguageIdentifier]>), +} + #[non_exhaustive] #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub enum SyntaxOption { @@ -36,6 +46,10 @@ impl SyntaxOption { #[non_exhaustive] #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub(crate) struct Manifest { + /// Strategy for de-duplicating locale data. pub aliasing: AliasOption, + /// Configuration for including locales in this data provider. + pub locales: LocalesOption, + /// Which data serialization file format is used. pub syntax: SyntaxOption, } diff --git a/components/fs-data-provider/tests/testdata/json_plurals_37/manifest.json b/components/fs-data-provider/tests/testdata/json_plurals_37/manifest.json index a68a792a8e3..c0f2b714d99 100644 --- a/components/fs-data-provider/tests/testdata/json_plurals_37/manifest.json +++ b/components/fs-data-provider/tests/testdata/json_plurals_37/manifest.json @@ -1,4 +1,5 @@ { "aliasing": "NoAliases", + "locales": "IncludeAll", "syntax": "Json" } diff --git a/components/pluralrules/tests/data/json_plurals_37/manifest.json b/components/pluralrules/tests/data/json_plurals_37/manifest.json index a68a792a8e3..c0f2b714d99 100644 --- a/components/pluralrules/tests/data/json_plurals_37/manifest.json +++ b/components/pluralrules/tests/data/json_plurals_37/manifest.json @@ -1,4 +1,5 @@ { "aliasing": "NoAliases", + "locales": "IncludeAll", "syntax": "Json" }