unicode-org · sffc · Oct 6, 2020 · Oct 3, 2020 · Oct 5, 2020 · Oct 5, 2020
diff --git a/components/cldr-json-data-provider/Cargo.toml b/components/cldr-json-data-provider/Cargo.toml
@@ -23,3 +23,17 @@ serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 serde-tuple-vec-map = "1.0"
 tinystr = "0.3"
+
+# Dependencies for the download feature
+urlencoding = { version = "1.1", optional = true }
+reqwest = { version = "0.10.8", features = ["blocking"], optional = true }
+unzip = { version = "0.1.0", optional = true }
+dirs = { version = "3.0", optional = true }
+log = { version = "0.4", optional = true }
+
+[dev-dependencies]
+mktemp = "0.4"
+
+[features]
+# Automatically download CLDR data from a host
+download = ["urlencoding", "reqwest", "unzip", "dirs", "log"]
diff --git a/components/cldr-json-data-provider/src/cldr_paths.rs b/components/cldr-json-data-provider/src/cldr_paths.rs
@@ -1,21 +1,50 @@
-use crate::error::MissingSourceError;
+use crate::error::{Error, MissingSourceError};
 use std::default::Default;
 use std::path::PathBuf;
 
-/// Struct containing filesystem paths to the CLDR JSON resource directories.
+/// Trait returning filesystem paths to CLDR JSON resource directories.
 /// The fields should be Ok if present. They default to Err when not present.
+pub trait CldrPaths {
+    fn cldr_core(&self) -> Result<PathBuf, Error>;
+    fn cldr_dates(&self) -> Result<PathBuf, Error>;
+}
+
+/// Implementation of CldrPaths for data directories already downloaded.
+///
+/// # Example
+///
+/// ```
+/// use icu_cldr_json_data_provider::CldrPathsLocal;
+/// use icu_cldr_json_data_provider::CldrJsonDataProvider;
+/// use std::path::PathBuf;
+///
+/// let mut paths = CldrPathsLocal::default();
+/// paths.cldr_core = Ok(PathBuf::from("/path/to/cldr-core"));
+/// // fill in other paths as necessary
+///
+/// let data_provider = CldrJsonDataProvider::new(&paths);
+/// ```
 #[non_exhaustive]
 #[derive(Debug, PartialEq)]
-pub struct CldrPaths {
+pub struct CldrPathsLocal {
     /// Path to checkout of cldr-core:
     /// https://github.com/unicode-cldr/cldr-core
     pub cldr_core: Result<PathBuf, MissingSourceError>,
     pub cldr_dates: Result<PathBuf, MissingSourceError>,
 }
 
-impl Default for CldrPaths {
-    fn default() -> CldrPaths {
-        CldrPaths {
+impl CldrPaths for CldrPathsLocal {
+    fn cldr_core(&self) -> Result<PathBuf, Error> {
+        self.cldr_core.clone().map_err(|e| e.into())
+    }
+    fn cldr_dates(&self) -> Result<PathBuf, Error> {
+        self.cldr_dates.clone().map_err(|e| e.into())
+    }
+}
+
+impl Default for CldrPathsLocal {
+    fn default() -> CldrPathsLocal {
+        CldrPathsLocal {
             cldr_core: Err(MissingSourceError { src: "cldr-core" }),
             cldr_dates: Err(MissingSourceError { src: "cldr-dates" }),
         }

diff --git a/components/cldr-json-data-provider/src/download/cldr_paths_download.rs b/components/cldr-json-data-provider/src/download/cldr_paths_download.rs
@@ -0,0 +1,106 @@
+use super::error::DownloadError;
+use super::io_util;
+use crate::error::Error;
+use crate::CldrPaths;
+use std::path::PathBuf;
+
+/// Implementation of CldrPaths that downloads CLDR data directories on demand.
+/// The download artifacts are saved in the user's cache directory; see
+/// https://docs.rs/dirs/3.0.0/dirs/fn.cache_dir.html
+///
+/// # Example
+///
+/// ```
+/// use icu_cldr_json_data_provider::download::CldrPathsDownload;
+/// use icu_cldr_json_data_provider::CldrJsonDataProvider;
+/// use std::path::PathBuf;
+///
+/// let paths = CldrPathsDownload::try_from_github_tag("36.0.0")
+///     .expect("Cache directory not found");
+///
+/// let data_provider = CldrJsonDataProvider::new(&paths);
+///
+/// fn demo<'d>(data_provider: &'d CldrJsonDataProvider<'d, 'd>) {
+///     use std::borrow::Cow;
+///     use icu_data_provider::prelude::*;
+///     use icu_data_provider::structs::plurals::PluralRuleStringsV1;
+///
+///     let data: Cow<PluralRuleStringsV1> = data_provider
+///         .load(&DataRequest {
+///             data_entry: DataEntry {
+///                 langid: "uk".parse().unwrap(),
+///                 variant: None,
+///             },
+///             data_key: icu_data_key!(plurals: ordinal@1),
+///         })
+///         .unwrap()
+///         .take_payload()
+///         .unwrap();
+///     assert_eq!(data.few, Some(Cow::Borrowed("n % 10 = 3 and n % 100 != 13")));
+/// }
+///
+/// // Calling demo(&data_provider) will cause the data to actually get downloaded.
+/// //demo(&data_provider);
+/// ```
+pub struct CldrPathsDownload {
+    /// Directory where downloaded files are stored.
+    pub cache_dir: PathBuf,
+
+    pub cldr_core: CldrZipFileInfo,
+    pub cldr_dates: CldrZipFileInfo,
+}
+
+// TODO(#297): Implement this async.
+impl CldrPaths for CldrPathsDownload {
+    fn cldr_core(&self) -> Result<PathBuf, Error> {
+        self.cldr_core.download_and_unzip(&self)
+    }
+    fn cldr_dates(&self) -> Result<PathBuf, Error> {
+        self.cldr_dates.download_and_unzip(&self)
+    }
+}
+
+impl CldrPathsDownload {
+    /// Creates a CldrPathsDownload that downloads files to the system cache directory
+    /// as determined by dirs::cache_dir().
+    ///
+    /// github_tag should be a tag in the CLDR JSON repositories, such as "36.0.0":
+    /// https://github.com/unicode-cldr/cldr-core/tags
+    pub fn try_from_github_tag(github_tag: &str) -> Result<Self, DownloadError> {
+        Ok(Self {
+            cache_dir: dirs::cache_dir()
+                .ok_or(DownloadError::NoCacheDir)?
+                .join("icu4x")
+                .join("cldr"),
+            cldr_core: CldrZipFileInfo {
+                url: format!(
+                    "https://github.com/unicode-cldr/cldr-core/archive/{}.zip",
+                    github_tag
+                ),
+                top_dir: format!("cldr-core-{}", github_tag),
+            },
+            cldr_dates: CldrZipFileInfo {
+                url: format!(
+                    "https://github.com/unicode-cldr/cldr-dates-modern/archive/{}.zip",
+                    github_tag
+                ),
+                top_dir: format!("cldr-dates-modern-{}", github_tag),
+            },
+        })
+    }
+}
+
+pub struct CldrZipFileInfo {
+    /// The URL to the remote zip file
+    pub url: String,
+    /// The directory name in the unpacked zip fle
+    pub top_dir: String,
+}
+
+impl CldrZipFileInfo {
+    fn download_and_unzip(&self, parent: &CldrPathsDownload) -> Result<PathBuf, Error> {
+        io_util::download_and_unzip(&self.url, &parent.cache_dir)
+            .map(|p| p.join(&self.top_dir))
+            .map_err(|e| e.into())
+    }
+}
diff --git a/components/cldr-json-data-provider/src/download/error.rs b/components/cldr-json-data-provider/src/download/error.rs
@@ -0,0 +1,46 @@
+use std::error;
+use std::fmt;
+use std::io;
+use std::path::PathBuf;
+
+#[derive(Debug)]
+pub enum DownloadError {
+    Io(io::Error, PathBuf),
+    Reqwest(reqwest::Error),
+    HttpStatus(reqwest::StatusCode, String),
+    NoCacheDir,
+}
+
+impl From<io::Error> for DownloadError {
+    /// Note: Prefer adding the path to Error::Io instead of using this conversion.
+    fn from(err: io::Error) -> Self {
+        Self::Io(err, PathBuf::new())
+    }
+}
+
+impl From<reqwest::Error> for DownloadError {
+    fn from(err: reqwest::Error) -> Self {
+        Self::Reqwest(err)
+    }
+}
+
+impl fmt::Display for DownloadError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Self::Io(err, path) => write!(f, "{}: {}", err, path.to_string_lossy()),
+            Self::Reqwest(err) => err.fmt(f),
+            Self::HttpStatus(status, url) => write!(f, "HTTP request failed: {}: {}", status, url),
+            Self::NoCacheDir => write!(f, "dirs::cache_dir() returned None"),
+        }
+    }
+}
+
+impl error::Error for DownloadError {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        match self {
+            Self::Io(err, _) => Some(err),
+            Self::Reqwest(err) => Some(err),
+            _ => None,
+        }
+    }
+}
diff --git a/components/cldr-json-data-provider/src/download/io_util.rs b/components/cldr-json-data-provider/src/download/io_util.rs
@@ -0,0 +1,110 @@
+use super::error::DownloadError;
+use std::fs::{self, File};
+use std::path::{Path, PathBuf};
+use std::time::Instant;
+use unzip::Unzipper;
+
+macro_rules! map_io_err {
+    ($path_ref:ident) => {
+        |err| DownloadError::Io(err, $path_ref.to_owned())
+    };
+}
+
+#[cfg(test)]
+fn assert_files_eq(expected_file_path: &Path, actual_file_path: &Path) {
+    use std::io::Read;
+    let mut expected_buf = Vec::new();
+    File::open(expected_file_path)
+        .unwrap()
+        .read_to_end(&mut expected_buf)
+        .unwrap();
+    let mut actual_buf = Vec::new();
+    File::open(&actual_file_path)
+        .unwrap()
+        .read_to_end(&mut actual_buf)
+        .unwrap();
+    assert_eq!(expected_buf, actual_buf);
+}
+
+// Synchronously download url and save it to destination.
+// TODO(#297): Implement this async.
+fn download_sync(url: &str, destination: &Path) -> Result<(), DownloadError> {
+    log::info!("Downloading: {}", url);
+    let start = Instant::now();
+    let mut response = reqwest::blocking::get(url)?;
+    if !response.status().is_success() {
+        return Err(DownloadError::HttpStatus(
+            response.status(),
+            url.to_string(),
+        ));
+    }
+    log::info!("Status: {}", response.status());
+    let mut file = File::create(destination).map_err(map_io_err!(destination))?;
+    response.copy_to(&mut file)?;
+    log::info!("Finished in {:.2} seconds", start.elapsed().as_secs_f64());
+    Ok(())
+}
+
+#[test]
+fn test_download_sync() -> Result<(), DownloadError> {
+    let temp_file = mktemp::Temp::new_file()?;
+    download_sync(
+        "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf",
+        &temp_file,
+    )?;
+    assert_files_eq(&PathBuf::from("./tests/testdata/dummy.pdf"), &temp_file);
+    Ok(())
+}
+
+/// Synchronously unpack a zip file into a destination directory.
+// TODO(#297): Implement this async.
+fn unzip_sync(zip_path: &Path, dir_path: &Path) -> Result<(), DownloadError> {
+    let reader = File::open(zip_path).map_err(map_io_err!(zip_path))?;
+    log::info!("Unzipping...");
+    let start = Instant::now();
+    Unzipper::new(reader, dir_path)
+        .unzip()
+        .map_err(map_io_err!(dir_path))?;
+    log::info!("Unzipped in {:.2} seconds", start.elapsed().as_secs_f64());
+    Ok(())
+}
+
+#[test]
+fn test_unzip_sync() -> Result<(), DownloadError> {
+    let temp_dir = mktemp::Temp::new_dir()?;
+    unzip_sync(&PathBuf::from("./tests/testdata/dummy.zip"), &temp_dir)?;
+    assert_files_eq(
+        &PathBuf::from("./tests/testdata/dummy.pdf"),
+        &temp_dir.to_path_buf().join("dummy.pdf"),
+    );
+    Ok(())
+}
+
+/// Downloads and unpacks a zip file, returning the path to the unpacked directory.
+///
+/// `cache_dir` is a directory where both the zip file and the unpacked directory will be
+/// saved. If the zip file has already been downloaded, it will not be downloaded again.
+pub fn download_and_unzip(zip_file_url: &str, cache_dir: &Path) -> Result<PathBuf, DownloadError> {
+    fs::create_dir_all(cache_dir).map_err(map_io_err!(cache_dir))?;
+
+    let zip_dir = cache_dir.to_path_buf().join("zips");
+    fs::create_dir_all(&zip_dir).map_err(map_io_err!(zip_dir))?;
+
+    let data_dir = cache_dir.to_path_buf().join("data");
+    fs::create_dir_all(&data_dir).map_err(map_io_err!(data_dir))?;
+
+    let basename = urlencoding::encode(zip_file_url);
+    let mut zip_path = zip_dir.join(&basename);
+    zip_path.set_extension("zip");
+    let dir_path = data_dir.join(&basename);
+
+    if !zip_path.exists() {
+        download_sync(zip_file_url, &zip_path)?;
+    }
+
+    if !dir_path.exists() {
+        unzip_sync(&zip_path, &dir_path)?;
+    }
+
+    Ok(dir_path)
+}
diff --git a/components/cldr-json-data-provider/src/download/mod.rs b/components/cldr-json-data-provider/src/download/mod.rs
@@ -0,0 +1,6 @@
+mod cldr_paths_download;
+mod error;
+mod io_util;
+
+pub use cldr_paths_download::CldrPathsDownload;
+pub use error::DownloadError;
diff --git a/components/cldr-json-data-provider/src/error.rs b/components/cldr-json-data-provider/src/error.rs
@@ -1,12 +1,17 @@
 use std::error;
 use std::fmt;
 
+#[cfg(feature = "download")]
+use crate::download::DownloadError;
+
 #[non_exhaustive]
 #[derive(Debug)]
 pub enum Error {
     JsonError(serde_json::error::Error),
     IoError(std::io::Error, std::path::PathBuf),
     MissingSource(MissingSourceError),
+    #[cfg(feature = "download")]
+    Download(DownloadError),
     PoisonError,
 }
 
@@ -33,12 +38,24 @@ impl From<MissingSourceError> for Error {
     }
 }
 
+#[cfg(feature = "download")]
+impl From<DownloadError> for Error {
+    fn from(err: DownloadError) -> Error {
+        match err {
+            DownloadError::Io(err, path) => Error::IoError(err, path),
+            _ => Error::Download(err),
+        }
+    }
+}
+
 impl fmt::Display for Error {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
             Error::JsonError(err) => write!(f, "{}", err),
             Error::IoError(err, path) => write!(f, "{}: {}", err, path.to_string_lossy()),
             Error::MissingSource(err) => err.fmt(f),
+            #[cfg(feature = "download")]
+            Error::Download(err) => err.fmt(f),
             Error::PoisonError => write!(f, "poisoned lock on CLDR provider"),
         }
     }
@@ -49,6 +66,8 @@ impl error::Error for Error {
         match self {
             Error::JsonError(err) => Some(err),
             Error::IoError(err, _) => Some(err),
+            #[cfg(feature = "download")]
+            Error::Download(err) => Some(err),
             _ => None,
         }
     }