-
Notifications
You must be signed in to change notification settings - Fork 176
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding download and locale filtering options #299
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
use super::error::DownloadError; | ||
use super::io_util; | ||
use crate::error::Error; | ||
use crate::CldrPaths; | ||
use std::path::PathBuf; | ||
|
||
/// Implementation of CldrPaths that downloads CLDR data directories on demand. | ||
/// The download artifacts are saved in the user's cache directory; see | ||
/// https://docs.rs/dirs/3.0.0/dirs/fn.cache_dir.html | ||
/// | ||
/// # Example | ||
/// | ||
/// ``` | ||
/// use icu_cldr_json_data_provider::download::CldrPathsDownload; | ||
/// use icu_cldr_json_data_provider::CldrJsonDataProvider; | ||
/// use std::path::PathBuf; | ||
/// | ||
/// let paths = CldrPathsDownload::try_from_github_tag("36.0.0") | ||
/// .expect("Cache directory not found"); | ||
/// | ||
/// let data_provider = CldrJsonDataProvider::new(&paths); | ||
/// | ||
/// fn demo<'d>(data_provider: &'d CldrJsonDataProvider<'d, 'd>) { | ||
/// use std::borrow::Cow; | ||
/// use icu_data_provider::prelude::*; | ||
/// use icu_data_provider::structs::plurals::PluralRuleStringsV1; | ||
/// | ||
/// let data: Cow<PluralRuleStringsV1> = data_provider | ||
/// .load(&DataRequest { | ||
/// data_entry: DataEntry { | ||
/// langid: "uk".parse().unwrap(), | ||
/// variant: None, | ||
/// }, | ||
/// data_key: icu_data_key!(plurals: ordinal@1), | ||
/// }) | ||
/// .unwrap() | ||
/// .take_payload() | ||
/// .unwrap(); | ||
/// assert_eq!(data.few, Some(Cow::Borrowed("n % 10 = 3 and n % 100 != 13"))); | ||
/// } | ||
/// | ||
/// // Calling demo(&data_provider) will cause the data to actually get downloaded. | ||
/// //demo(&data_provider); | ||
/// ``` | ||
pub struct CldrPathsDownload { | ||
/// Directory where downloaded files are stored. | ||
pub cache_dir: PathBuf, | ||
|
||
pub cldr_core: CldrZipFileInfo, | ||
pub cldr_dates: CldrZipFileInfo, | ||
} | ||
|
||
// TODO(#297): Implement this async. | ||
impl CldrPaths for CldrPathsDownload { | ||
fn cldr_core(&self) -> Result<PathBuf, Error> { | ||
self.cldr_core.download_and_unzip(&self) | ||
} | ||
fn cldr_dates(&self) -> Result<PathBuf, Error> { | ||
self.cldr_dates.download_and_unzip(&self) | ||
} | ||
} | ||
|
||
impl CldrPathsDownload { | ||
/// Creates a CldrPathsDownload that downloads files to the system cache directory | ||
/// as determined by dirs::cache_dir(). | ||
/// | ||
/// github_tag should be a tag in the CLDR JSON repositories, such as "36.0.0": | ||
/// https://github.com/unicode-cldr/cldr-core/tags | ||
pub fn try_from_github_tag(github_tag: &str) -> Result<Self, DownloadError> { | ||
Ok(Self { | ||
cache_dir: dirs::cache_dir() | ||
.ok_or(DownloadError::NoCacheDir)? | ||
.join("icu4x") | ||
.join("cldr"), | ||
cldr_core: CldrZipFileInfo { | ||
url: format!( | ||
"https://github.com/unicode-cldr/cldr-core/archive/{}.zip", | ||
github_tag | ||
), | ||
top_dir: format!("cldr-core-{}", github_tag), | ||
}, | ||
cldr_dates: CldrZipFileInfo { | ||
url: format!( | ||
"https://github.com/unicode-cldr/cldr-dates-modern/archive/{}.zip", | ||
github_tag | ||
), | ||
top_dir: format!("cldr-dates-modern-{}", github_tag), | ||
}, | ||
}) | ||
} | ||
} | ||
|
||
pub struct CldrZipFileInfo { | ||
/// The URL to the remote zip file | ||
pub url: String, | ||
/// The directory name in the unpacked zip fle | ||
pub top_dir: String, | ||
} | ||
|
||
impl CldrZipFileInfo { | ||
fn download_and_unzip(&self, parent: &CldrPathsDownload) -> Result<PathBuf, Error> { | ||
io_util::download_and_unzip(&self.url, &parent.cache_dir) | ||
.map(|p| p.join(&self.top_dir)) | ||
.map_err(|e| e.into()) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
use std::error; | ||
use std::fmt; | ||
use std::io; | ||
use std::path::PathBuf; | ||
|
||
#[derive(Debug)] | ||
pub enum DownloadError { | ||
Io(io::Error, PathBuf), | ||
Reqwest(reqwest::Error), | ||
HttpStatus(reqwest::StatusCode, String), | ||
NoCacheDir, | ||
} | ||
|
||
impl From<io::Error> for DownloadError { | ||
/// Note: Prefer adding the path to Error::Io instead of using this conversion. | ||
fn from(err: io::Error) -> Self { | ||
Self::Io(err, PathBuf::new()) | ||
} | ||
} | ||
|
||
impl From<reqwest::Error> for DownloadError { | ||
fn from(err: reqwest::Error) -> Self { | ||
Self::Reqwest(err) | ||
} | ||
} | ||
|
||
impl fmt::Display for DownloadError { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
match self { | ||
Self::Io(err, path) => write!(f, "{}: {}", err, path.to_string_lossy()), | ||
Self::Reqwest(err) => err.fmt(f), | ||
Self::HttpStatus(status, url) => write!(f, "HTTP request failed: {}: {}", status, url), | ||
Self::NoCacheDir => write!(f, "dirs::cache_dir() returned None"), | ||
} | ||
} | ||
} | ||
|
||
impl error::Error for DownloadError { | ||
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { | ||
match self { | ||
Self::Io(err, _) => Some(err), | ||
Self::Reqwest(err) => Some(err), | ||
_ => None, | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
use super::error::DownloadError; | ||
use std::fs::{self, File}; | ||
use std::path::{Path, PathBuf}; | ||
use std::time::Instant; | ||
use unzip::Unzipper; | ||
|
||
macro_rules! map_io_err { | ||
($path_ref:ident) => { | ||
|err| DownloadError::Io(err, $path_ref.to_owned()) | ||
}; | ||
} | ||
|
||
#[cfg(test)] | ||
fn assert_files_eq(expected_file_path: &Path, actual_file_path: &Path) { | ||
use std::io::Read; | ||
let mut expected_buf = Vec::new(); | ||
File::open(expected_file_path) | ||
.unwrap() | ||
.read_to_end(&mut expected_buf) | ||
.unwrap(); | ||
let mut actual_buf = Vec::new(); | ||
File::open(&actual_file_path) | ||
.unwrap() | ||
.read_to_end(&mut actual_buf) | ||
.unwrap(); | ||
assert_eq!(expected_buf, actual_buf); | ||
} | ||
|
||
// Synchronously download url and save it to destination. | ||
// TODO(#297): Implement this async. | ||
fn download_sync(url: &str, destination: &Path) -> Result<(), DownloadError> { | ||
log::info!("Downloading: {}", url); | ||
let start = Instant::now(); | ||
let mut response = reqwest::blocking::get(url)?; | ||
if !response.status().is_success() { | ||
return Err(DownloadError::HttpStatus( | ||
response.status(), | ||
url.to_string(), | ||
)); | ||
} | ||
log::info!("Status: {}", response.status()); | ||
let mut file = File::create(destination).map_err(map_io_err!(destination))?; | ||
response.copy_to(&mut file)?; | ||
log::info!("Finished in {:.2} seconds", start.elapsed().as_secs_f64()); | ||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn test_download_sync() -> Result<(), DownloadError> { | ||
let temp_file = mktemp::Temp::new_file()?; | ||
download_sync( | ||
"https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf", | ||
&temp_file, | ||
)?; | ||
assert_files_eq(&PathBuf::from("./tests/testdata/dummy.pdf"), &temp_file); | ||
Ok(()) | ||
} | ||
|
||
/// Synchronously unpack a zip file into a destination directory. | ||
// TODO(#297): Implement this async. | ||
fn unzip_sync(zip_path: &Path, dir_path: &Path) -> Result<(), DownloadError> { | ||
let reader = File::open(zip_path).map_err(map_io_err!(zip_path))?; | ||
log::info!("Unzipping..."); | ||
let start = Instant::now(); | ||
Unzipper::new(reader, dir_path) | ||
.unzip() | ||
.map_err(map_io_err!(dir_path))?; | ||
log::info!("Unzipped in {:.2} seconds", start.elapsed().as_secs_f64()); | ||
Ok(()) | ||
} | ||
|
||
#[test] | ||
fn test_unzip_sync() -> Result<(), DownloadError> { | ||
let temp_dir = mktemp::Temp::new_dir()?; | ||
unzip_sync(&PathBuf::from("./tests/testdata/dummy.zip"), &temp_dir)?; | ||
assert_files_eq( | ||
&PathBuf::from("./tests/testdata/dummy.pdf"), | ||
&temp_dir.to_path_buf().join("dummy.pdf"), | ||
); | ||
Ok(()) | ||
} | ||
|
||
/// Downloads and unpacks a zip file, returning the path to the unpacked directory. | ||
/// | ||
/// `cache_dir` is a directory where both the zip file and the unpacked directory will be | ||
/// saved. If the zip file has already been downloaded, it will not be downloaded again. | ||
pub fn download_and_unzip(zip_file_url: &str, cache_dir: &Path) -> Result<PathBuf, DownloadError> { | ||
fs::create_dir_all(cache_dir).map_err(map_io_err!(cache_dir))?; | ||
|
||
let zip_dir = cache_dir.to_path_buf().join("zips"); | ||
fs::create_dir_all(&zip_dir).map_err(map_io_err!(zip_dir))?; | ||
|
||
let data_dir = cache_dir.to_path_buf().join("data"); | ||
fs::create_dir_all(&data_dir).map_err(map_io_err!(data_dir))?; | ||
|
||
let basename = urlencoding::encode(zip_file_url); | ||
let mut zip_path = zip_dir.join(&basename); | ||
zip_path.set_extension("zip"); | ||
let dir_path = data_dir.join(&basename); | ||
|
||
if !zip_path.exists() { | ||
download_sync(zip_file_url, &zip_path)?; | ||
} | ||
|
||
if !dir_path.exists() { | ||
unzip_sync(&zip_path, &dir_path)?; | ||
} | ||
|
||
Ok(dir_path) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
mod cldr_paths_download; | ||
mod error; | ||
mod io_util; | ||
|
||
pub use cldr_paths_download::CldrPathsDownload; | ||
pub use error::DownloadError; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,17 @@ | ||
use std::error; | ||
use std::fmt; | ||
|
||
#[cfg(feature = "download")] | ||
use crate::download::DownloadError; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps, but I still need to put the Display and std::error::Error implementations in this file, so it seems odd that From would be in a separate place. It's easy to put it in this file, and I protect it with |
||
#[non_exhaustive] | ||
#[derive(Debug)] | ||
pub enum Error { | ||
JsonError(serde_json::error::Error), | ||
IoError(std::io::Error, std::path::PathBuf), | ||
MissingSource(MissingSourceError), | ||
#[cfg(feature = "download")] | ||
Download(DownloadError), | ||
PoisonError, | ||
} | ||
|
||
|
@@ -33,12 +38,24 @@ impl From<MissingSourceError> for Error { | |
} | ||
} | ||
|
||
#[cfg(feature = "download")] | ||
impl From<DownloadError> for Error { | ||
fn from(err: DownloadError) -> Error { | ||
match err { | ||
DownloadError::Io(err, path) => Error::IoError(err, path), | ||
_ => Error::Download(err), | ||
} | ||
} | ||
} | ||
|
||
impl fmt::Display for Error { | ||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
match self { | ||
Error::JsonError(err) => write!(f, "{}", err), | ||
Error::IoError(err, path) => write!(f, "{}: {}", err, path.to_string_lossy()), | ||
Error::MissingSource(err) => err.fmt(f), | ||
#[cfg(feature = "download")] | ||
Error::Download(err) => err.fmt(f), | ||
Error::PoisonError => write!(f, "poisoned lock on CLDR provider"), | ||
} | ||
} | ||
|
@@ -49,6 +66,8 @@ impl error::Error for Error { | |
match self { | ||
Error::JsonError(err) => Some(err), | ||
Error::IoError(err, _) => Some(err), | ||
#[cfg(feature = "download")] | ||
Error::Download(err) => Some(err), | ||
_ => None, | ||
} | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do you need to return
PathBuf
? Usually in such cases we usePath
(just like we'd return&str
notString
).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you either have an owned
PathBuf
or a borrowed&Path
. If I return&Path
, then someone has to own it.I figured it was fine to return
PathBuf
because the only call site immediately takes it and starts appending more pieces to it, so if we returned&Path
, we'd need to call.to_path_buf()
anyway.