From 46d094553667075a69b2da5cac85db71662cca62 Mon Sep 17 00:00:00 2001 From: Nicholas Yang Date: Mon, 17 Jul 2023 11:07:45 -0400 Subject: [PATCH] feat(turborepo): FS Cache (#5473) ### Description Implements the FS cache on top of CacheItem. ~~This is stacked on top of #5065~~ ### Testing Instructions Uses the same round-trip tests of HTTP cache. --------- Co-authored-by: --global Co-authored-by: Chris Olszewski --- Cargo.lock | 1 + crates/turborepo-cache/Cargo.toml | 1 + .../src/cache_archive/create.rs | 2 +- crates/turborepo-cache/src/fs.rs | 217 ++++++++++++++++++ crates/turborepo-cache/src/http.rs | 62 ++--- crates/turborepo-cache/src/lib.rs | 10 + crates/turborepo-cache/src/test_cases.rs | 78 +++++++ .../src/absolute_system_path.rs | 4 + 8 files changed, 331 insertions(+), 44 deletions(-) create mode 100644 crates/turborepo-cache/src/fs.rs create mode 100644 crates/turborepo-cache/src/test_cases.rs diff --git a/Cargo.lock b/Cargo.lock index 5cbbea5f09347..faeae024b7d04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9594,6 +9594,7 @@ dependencies = [ "camino", "chrono", "dunce", + "futures", "hex", "hmac", "lazy_static", diff --git a/crates/turborepo-cache/Cargo.toml b/crates/turborepo-cache/Cargo.toml index e1159baeb23bc..e932cc6402dcc 100644 --- a/crates/turborepo-cache/Cargo.toml +++ b/crates/turborepo-cache/Cargo.toml @@ -12,6 +12,7 @@ rustls-tls = ["turborepo-api-client/rustls-tls"] [dev-dependencies] anyhow = { workspace = true, features = ["backtrace"] } +futures = { workspace = true } libc = "0.2.146" port_scanner = { workspace = true } tempfile = { workspace = true } diff --git a/crates/turborepo-cache/src/cache_archive/create.rs b/crates/turborepo-cache/src/cache_archive/create.rs index 1ea5139255c2b..dac5aa731e38e 100644 --- a/crates/turborepo-cache/src/cache_archive/create.rs +++ b/crates/turborepo-cache/src/cache_archive/create.rs @@ -46,7 +46,7 @@ impl<'a> CacheWriter<'a> { // Makes a new CacheArchive at the specified path // Wires up the chain of writers: // tar::Builder -> zstd::Encoder (optional) -> BufWriter -> File - fn create(path: &AbsoluteSystemPath) -> Result { + pub fn create(path: &AbsoluteSystemPath) -> Result { let mut options = OpenOptions::new(); options.write(true).create(true).truncate(true); diff --git a/crates/turborepo-cache/src/fs.rs b/crates/turborepo-cache/src/fs.rs new file mode 100644 index 0000000000000..d554d2e143393 --- /dev/null +++ b/crates/turborepo-cache/src/fs.rs @@ -0,0 +1,217 @@ +use std::{backtrace::Backtrace, fs::OpenOptions}; + +use serde::{Deserialize, Serialize}; +use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf, AnchoredSystemPathBuf}; + +use crate::{ + cache_archive::{CacheReader, CacheWriter}, + CacheError, CacheResponse, CacheSource, +}; + +struct FSCache { + cache_directory: AbsoluteSystemPathBuf, +} + +#[derive(Debug, Deserialize, Serialize)] +struct CacheMetadata { + hash: String, + duration: u32, +} + +impl CacheMetadata { + fn read(path: &AbsoluteSystemPath) -> Result { + serde_json::from_str(&path.read_to_string()?) + .map_err(|e| CacheError::InvalidMetadata(e, Backtrace::capture())) + } +} + +impl FSCache { + fn resolve_cache_dir( + repo_root: &AbsoluteSystemPath, + override_dir: Option<&str>, + ) -> AbsoluteSystemPathBuf { + if let Some(override_dir) = override_dir { + AbsoluteSystemPathBuf::from_unknown(repo_root, override_dir) + } else { + repo_root.join_components(&["node_modules", ".cache", "turbo"]) + } + } + + pub fn new( + override_dir: Option<&str>, + repo_root: &AbsoluteSystemPath, + ) -> Result { + let cache_directory = Self::resolve_cache_dir(repo_root, override_dir); + cache_directory.create_dir_all()?; + + Ok(FSCache { cache_directory }) + } + + pub fn fetch( + &self, + anchor: &AbsoluteSystemPath, + hash: &str, + ) -> Result<(CacheResponse, Vec), CacheError> { + let uncompressed_cache_path = self + .cache_directory + .join_component(&format!("{}.tar", hash)); + let compressed_cache_path = self + .cache_directory + .join_component(&format!("{}.tar.zst", hash)); + + let cache_path = if uncompressed_cache_path.exists() { + uncompressed_cache_path + } else if compressed_cache_path.exists() { + compressed_cache_path + } else { + return Err(CacheError::CacheMiss); + }; + + let mut cache_reader = CacheReader::open(&cache_path)?; + + let restored_files = cache_reader.restore(anchor)?; + + let meta = CacheMetadata::read( + &self + .cache_directory + .join_component(&format!("{}-meta.json", hash)), + )?; + + Ok(( + CacheResponse { + time_saved: meta.duration, + source: CacheSource::Local, + }, + restored_files, + )) + } + + fn exists(&self, hash: &str) -> Result { + let uncompressed_cache_path = self + .cache_directory + .join_component(&format!("{}.tar", hash)); + let compressed_cache_path = self + .cache_directory + .join_component(&format!("{}.tar.zst", hash)); + + if !uncompressed_cache_path.exists() && !compressed_cache_path.exists() { + return Err(CacheError::CacheMiss); + } + + let duration = CacheMetadata::read( + &self + .cache_directory + .join_component(&format!("{}-meta.json", hash)), + ) + .map(|meta| meta.duration) + .unwrap_or(0); + + Ok(CacheResponse { + time_saved: duration, + source: CacheSource::Local, + }) + } + + fn put( + &self, + anchor: &AbsoluteSystemPath, + hash: &str, + duration: u32, + files: Vec, + ) -> Result<(), CacheError> { + let cache_path = self + .cache_directory + .join_component(&format!("{}.tar.zst", hash)); + + let mut cache_item = CacheWriter::create(&cache_path)?; + + for file in files { + cache_item.add_file(anchor, &file)?; + } + + let metadata_path = self + .cache_directory + .join_component(&format!("{}-meta.json", hash)); + + let meta = CacheMetadata { + hash: hash.to_string(), + duration, + }; + + let mut metadata_options = OpenOptions::new(); + metadata_options.create(true).write(true); + + let metadata_file = metadata_path.open_with_options(metadata_options)?; + + serde_json::to_writer(metadata_file, &meta) + .map_err(|e| CacheError::InvalidMetadata(e, Backtrace::capture()))?; + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::assert_matches::assert_matches; + + use anyhow::Result; + use futures::future::try_join_all; + use tempfile::tempdir; + + use super::*; + use crate::test_cases::{get_test_cases, TestCase}; + + #[tokio::test] + async fn test_fs_cache() -> Result<()> { + try_join_all(get_test_cases().into_iter().map(round_trip_test)).await?; + + Ok(()) + } + + async fn round_trip_test(test_case: TestCase) -> Result<()> { + let repo_root = tempdir()?; + let repo_root_path = AbsoluteSystemPath::from_std_path(repo_root.path())?; + test_case.initialize(repo_root_path)?; + + let cache = FSCache::new(None, &repo_root_path)?; + + let expected_miss = cache + .exists(&test_case.hash) + .expect_err("Expected cache miss"); + assert_matches!(expected_miss, CacheError::CacheMiss); + + cache.put( + repo_root_path, + &test_case.hash, + test_case.duration, + test_case.files.iter().map(|f| f.path.clone()).collect(), + )?; + + let expected_hit = cache.exists(&test_case.hash)?; + assert_eq!( + expected_hit, + CacheResponse { + time_saved: test_case.duration, + source: CacheSource::Local + } + ); + + let (status, files) = cache.fetch(&repo_root_path, &test_case.hash)?; + assert_eq!( + status, + CacheResponse { + time_saved: test_case.duration, + source: CacheSource::Local + } + ); + + assert_eq!(files.len(), test_case.files.len()); + for (expected, actual) in test_case.files.iter().zip(files.iter()) { + assert_eq!(&expected.path, actual); + let actual_file = repo_root_path.resolve(actual); + assert_eq!(expected.contents, actual_file.read_to_string()?); + } + + Ok(()) + } +} diff --git a/crates/turborepo-cache/src/http.rs b/crates/turborepo-cache/src/http.rs index 332a92f2a6178..d514dd8e654ac 100644 --- a/crates/turborepo-cache/src/http.rs +++ b/crates/turborepo-cache/src/http.rs @@ -172,62 +172,38 @@ impl HttpCache { #[cfg(test)] mod test { use anyhow::Result; + use futures::future::try_join_all; use tempfile::tempdir; - use test_case::test_case; - use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf}; + use turbopath::AbsoluteSystemPathBuf; use turborepo_api_client::APIClient; use vercel_api_mock::start_test_server; - use crate::{http::HttpCache, CacheSource}; + use crate::{ + http::HttpCache, + test_cases::{get_test_cases, TestCase}, + CacheSource, + }; - struct TestFile { - path: AnchoredSystemPathBuf, - contents: &'static str, + #[tokio::test] + async fn test_http_cache() -> Result<()> { + try_join_all(get_test_cases().into_iter().map(round_trip_test)).await?; + + Ok(()) } - #[test_case(vec![ - TestFile { - path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(), - contents: "hello world" - } - ], 58, "Faces Places")] - #[test_case(vec![ - TestFile { - path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(), - contents: "Days of Heaven" - }, - TestFile { - path: AnchoredSystemPathBuf::from_raw("package-lock.json").unwrap(), - contents: "Badlands" - } - ], 1284, "Cleo from 5 to 7")] - #[test_case(vec![ - TestFile { - path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(), - contents: "Days of Heaven" - }, - TestFile { - path: AnchoredSystemPathBuf::from_raw("package-lock.json").unwrap(), - contents: "Badlands" - }, - TestFile { - path: AnchoredSystemPathBuf::from_raw("src/main.js").unwrap(), - contents: "Tree of Life" - } - ], 12845, "The Gleaners and I")] - #[tokio::test] - async fn test_round_trip(files: Vec, duration: u32, hash: &str) -> Result<()> { + async fn round_trip_test(test_case: TestCase) -> Result<()> { let port = port_scanner::request_open_port().unwrap(); let handle = tokio::spawn(start_test_server(port)); let repo_root = tempdir()?; let repo_root_path = AbsoluteSystemPathBuf::try_from(repo_root.path())?; + test_case.initialize(&repo_root_path)?; - for file in &files { - let file_path = repo_root_path.resolve(&file.path); - std::fs::create_dir_all(file_path.parent().unwrap())?; - std::fs::write(file_path, file.contents)?; - } + let TestCase { + hash, + files, + duration, + } = test_case; let api_client = APIClient::new(&format!("http://localhost:{}", port), 200, "2.0.0", true)?; diff --git a/crates/turborepo-cache/src/lib.rs b/crates/turborepo-cache/src/lib.rs index c02861bd7ac4f..8c768cc4c2b59 100644 --- a/crates/turborepo-cache/src/lib.rs +++ b/crates/turborepo-cache/src/lib.rs @@ -1,10 +1,14 @@ #![feature(error_generic_member_access)] #![feature(provide_any)] +#![feature(assert_matches)] #![deny(clippy::all)] pub mod cache_archive; +pub mod fs; pub mod http; pub mod signature_authentication; +#[cfg(test)] +mod test_cases; use std::{backtrace, backtrace::Backtrace}; @@ -51,6 +55,12 @@ pub enum CacheError { WindowsUnsafeName(String, #[backtrace] Backtrace), #[error("tar attempts to write outside of directory: {0}")] LinkOutsideOfDirectory(String, #[backtrace] Backtrace), + #[error("Invalid cache metadata file")] + InvalidMetadata(serde_json::Error, #[backtrace] Backtrace), + #[error("Failed to write cache metadata file")] + MetadataWriteFailure(serde_json::Error, #[backtrace] Backtrace), + #[error("Cache miss")] + CacheMiss, } impl From for CacheError { diff --git a/crates/turborepo-cache/src/test_cases.rs b/crates/turborepo-cache/src/test_cases.rs new file mode 100644 index 0000000000000..fbd99b30c362a --- /dev/null +++ b/crates/turborepo-cache/src/test_cases.rs @@ -0,0 +1,78 @@ +use anyhow::Result; +use turbopath::{AbsoluteSystemPath, AnchoredSystemPathBuf}; + +pub(crate) struct TestFile { + pub path: AnchoredSystemPathBuf, + pub contents: &'static str, +} + +impl TestFile { + pub fn create(&self, repo_root: &AbsoluteSystemPath) -> Result<()> { + let file_path = repo_root.resolve(&self.path); + std::fs::create_dir_all(file_path.parent().unwrap())?; + std::fs::write(file_path, &self.contents)?; + + Ok(()) + } +} + +pub(crate) struct TestCase { + pub files: Vec, + pub duration: u32, + pub hash: &'static str, +} + +impl TestCase { + pub fn initialize(&self, repo_root: &AbsoluteSystemPath) -> Result<()> { + for file in &self.files { + file.create(repo_root)?; + } + + Ok(()) + } +} + +pub(crate) fn get_test_cases() -> Vec { + vec![ + TestCase { + files: vec![TestFile { + path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(), + contents: "hello world", + }], + duration: 58, + hash: "Faces Places", + }, + TestCase { + files: vec![ + TestFile { + path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(), + contents: "Days of Heaven", + }, + TestFile { + path: AnchoredSystemPathBuf::from_raw("package-lock.json").unwrap(), + contents: "Badlands", + }, + ], + duration: 1284, + hash: "Cleo from 5 to 7", + }, + TestCase { + files: vec![ + TestFile { + path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(), + contents: "Days of Heaven", + }, + TestFile { + path: AnchoredSystemPathBuf::from_raw("package-lock.json").unwrap(), + contents: "Badlands", + }, + TestFile { + path: AnchoredSystemPathBuf::from_raw("src/main.js").unwrap(), + contents: "Tree of Life", + }, + ], + duration: 12845, + hash: "The Gleaners and I", + }, + ] +} diff --git a/crates/turborepo-paths/src/absolute_system_path.rs b/crates/turborepo-paths/src/absolute_system_path.rs index 38e1bec9b59ac..fece920f2b746 100644 --- a/crates/turborepo-paths/src/absolute_system_path.rs +++ b/crates/turborepo-paths/src/absolute_system_path.rs @@ -299,6 +299,10 @@ impl AbsoluteSystemPath { open_options.open(&self.0) } + pub fn read_to_string(&self) -> Result { + fs::read_to_string(&self.0) + } + #[cfg(unix)] pub fn set_mode(&self, mode: u32) -> Result<(), io::Error> { use std::{fs::Permissions, os::unix::fs::PermissionsExt};