diff --git a/mountpoint-s3/src/data_cache.rs b/mountpoint-s3/src/data_cache.rs index 1ff408d88..b4c574deb 100644 --- a/mountpoint-s3/src/data_cache.rs +++ b/mountpoint-s3/src/data_cache.rs @@ -11,7 +11,7 @@ use mountpoint_s3_client::types::ETag; use thiserror::Error; pub use crate::checksums::ChecksummedBytes; -pub use crate::data_cache::disk_data_cache::{CacheLimit, DiskDataCache}; +pub use crate::data_cache::disk_data_cache::{CacheLimit, DiskDataCache, DiskDataCacheConfig}; pub use crate::data_cache::in_memory_data_cache::InMemoryDataCache; /// Struct representing a key for accessing an entry in a [DataCache]. diff --git a/mountpoint-s3/src/data_cache/disk_data_cache.rs b/mountpoint-s3/src/data_cache/disk_data_cache.rs index e7d37a7f2..fa6e247ab 100644 --- a/mountpoint-s3/src/data_cache/disk_data_cache.rs +++ b/mountpoint-s3/src/data_cache/disk_data_cache.rs @@ -26,13 +26,30 @@ const HASHED_DIR_SPLIT_INDEX: usize = 2; /// On-disk implementation of [DataCache]. pub struct DiskDataCache { - block_size: u64, cache_directory: PathBuf, - limit: CacheLimit, + config: DiskDataCacheConfig, /// Tracks blocks usage. `None` when no cache limit was set. usage: Option>>, } +/// Configuration for a [DiskDataCache]. +#[derive(Debug)] +pub struct DiskDataCacheConfig { + /// Size of data blocks. + pub block_size: u64, + /// How to limit the cache size. + pub limit: CacheLimit, +} + +impl Default for DiskDataCacheConfig { + fn default() -> Self { + Self { + block_size: 1024 * 1024, // 1 MiB block size + limit: CacheLimit::AvailableSpace { min_ratio: 0.05 }, // Preserve 5% available space + } + } +} + /// Limit the cache size. #[derive(Debug)] pub enum CacheLimit { @@ -186,15 +203,14 @@ impl DiskBlock { impl DiskDataCache { /// Create a new instance of an [DiskDataCache] with the specified configuration. - pub fn new(cache_directory: PathBuf, block_size: u64, limit: CacheLimit) -> Self { - let usage = match limit { + pub fn new(cache_directory: PathBuf, config: DiskDataCacheConfig) -> Self { + let usage = match config.limit { CacheLimit::Unbounded => None, CacheLimit::TotalSize { .. } | CacheLimit::AvailableSpace { .. } => Some(Mutex::new(UsageInfo::new())), }; DiskDataCache { - block_size, cache_directory, - limit, + config, usage, } } @@ -267,7 +283,7 @@ impl DiskDataCache { } fn is_limit_exceeded(&self, size: usize) -> bool { - match self.limit { + match self.config.limit { CacheLimit::Unbounded => false, CacheLimit::TotalSize { max_size } => size > max_size, CacheLimit::AvailableSpace { min_ratio } => { @@ -324,7 +340,7 @@ impl DataCache for DiskDataCache { block_idx: BlockIndex, block_offset: u64, ) -> DataCacheResult> { - if block_offset != block_idx * self.block_size { + if block_offset != block_idx * self.config.block_size { return Err(DataCacheError::InvalidBlockOffset); } let block_key = DiskBlockKey::new(cache_key, block_idx); @@ -358,7 +374,7 @@ impl DataCache for DiskDataCache { block_offset: u64, bytes: ChecksummedBytes, ) -> DataCacheResult<()> { - if block_offset != block_idx * self.block_size { + if block_offset != block_idx * self.config.block_size { return Err(DataCacheError::InvalidBlockOffset); } @@ -381,7 +397,7 @@ impl DataCache for DiskDataCache { } fn block_size(&self) -> u64 { - self.block_size + self.config.block_size } } @@ -518,7 +534,13 @@ mod tests { #[test] fn get_path_for_block_key() { let cache_dir = PathBuf::from("mountpoint-cache/"); - let data_cache = DiskDataCache::new(cache_dir, 1024, CacheLimit::Unbounded); + let data_cache = DiskDataCache::new( + cache_dir, + DiskDataCacheConfig { + block_size: 1024, + limit: CacheLimit::Unbounded, + }, + ); let s3_key = "a".repeat(266); @@ -550,7 +572,13 @@ mod tests { let block_size = 8 * 1024 * 1024; let cache_directory = tempfile::tempdir().unwrap(); - let cache = DiskDataCache::new(cache_directory.into_path(), block_size, CacheLimit::Unbounded); + let cache = DiskDataCache::new( + cache_directory.into_path(), + DiskDataCacheConfig { + block_size, + limit: CacheLimit::Unbounded, + }, + ); let cache_key_1 = CacheKey { s3_key: "a".into(), etag: ETag::for_tests(), @@ -623,7 +651,13 @@ mod tests { let slice = data.slice(1..5); let cache_directory = tempfile::tempdir().unwrap(); - let cache = DiskDataCache::new(cache_directory.into_path(), 8 * 1024 * 1024, CacheLimit::Unbounded); + let cache = DiskDataCache::new( + cache_directory.into_path(), + DiskDataCacheConfig { + block_size: 8 * 1024 * 1024, + limit: CacheLimit::Unbounded, + }, + ); let cache_key = CacheKey { s3_key: "a".into(), etag: ETag::for_tests(), @@ -704,8 +738,10 @@ mod tests { let cache_directory = tempfile::tempdir().unwrap(); let cache = DiskDataCache::new( cache_directory.into_path(), - BLOCK_SIZE as u64, - CacheLimit::TotalSize { max_size: CACHE_LIMIT }, + DiskDataCacheConfig { + block_size: BLOCK_SIZE as u64, + limit: CacheLimit::TotalSize { max_size: CACHE_LIMIT }, + }, ); // Put all of large_object diff --git a/mountpoint-s3/src/main.rs b/mountpoint-s3/src/main.rs index 8ca39d536..985b1cc2c 100644 --- a/mountpoint-s3/src/main.rs +++ b/mountpoint-s3/src/main.rs @@ -223,68 +223,33 @@ struct CliArgs { #[cfg(feature = "caching")] #[clap( long, - help = "Enable caching of file and directory metadata for a configurable time-to-live (TTL)", + help = "Enable caching of object metadata and content to the given directory", help_heading = CACHING_OPTIONS_HEADER, + value_name = "DIRECTORY", )] - pub enable_metadata_caching: bool, + pub cache: Option, - // TODO: What is a sensible default? Should TTL even be configurable? #[cfg(feature = "caching")] #[clap( long, - help = "Override time-to-live (TTL) for cached metadata entries in seconds", + help = "Time-to-live (TTL) for cached metadata in seconds [default: 1s]", value_name = "SECONDS", value_parser = parse_duration_seconds, help_heading = CACHING_OPTIONS_HEADER, - requires = "enable_metadata_caching", - )] - pub metadata_cache_ttl: Option, - - // TODO: Temporary for testing. Review before exposing outside "caching" feature. - #[cfg(feature = "caching")] - #[clap( - long, - help = "Enable caching of object data in a directory", - help_heading = CACHING_OPTIONS_HEADER, - value_name = "DIRECTORY", + requires = "cache", )] - pub data_caching_directory: Option, + pub metadata_ttl: Option, - // TODO: Temporary for testing. Review before exposing outside "caching" feature. #[cfg(feature = "caching")] #[clap( long, - help = "Block size for the data cache", - default_value = "1048576", + help = "Maximum size of the cache directory in MiB [default: preserve 5% of available space]", + value_name = "MiB", value_parser = value_parser!(u64).range(1..), help_heading = CACHING_OPTIONS_HEADER, - requires = "data_caching_directory", + requires = "cache", )] - pub data_cache_block_size: u64, - - // TODO: Temporary for testing. Review before exposing outside "caching" feature. - #[cfg(feature = "caching")] - #[clap( - long, - help = "Maximum size for the data cache in MB", - value_parser = value_parser!(u64).range(1..), - help_heading = CACHING_OPTIONS_HEADER, - requires = "data_caching_directory", - conflicts_with = "data_cache_free_space", - )] - pub data_cache_size_limit: Option, - - // TODO: Temporary for testing. Review before exposing outside "caching" feature. - #[cfg(feature = "caching")] - #[clap( - long, - help = "Minimum available space to maintain (%)", - value_parser = value_parser!(u64).range(0..100), - help_heading = CACHING_OPTIONS_HEADER, - requires = "data_caching_directory", - conflicts_with = "data_cache_size_limit", - )] - pub data_cache_free_space: Option, + pub max_cache_size: Option, #[clap( long, @@ -541,10 +506,9 @@ fn mount(args: CliArgs) -> anyhow::Result { } #[cfg(feature = "caching")] { - // TODO review this when we finalize the available configs - if args.enable_metadata_caching && args.data_caching_directory.is_some() { + if args.cache.is_some() { user_agent.value("mp-cache"); - if let Some(ttl) = args.metadata_cache_ttl { + if let Some(ttl) = args.metadata_ttl { user_agent.key_value("mp-cache-ttl", &ttl.as_secs().to_string()); } } @@ -597,34 +561,25 @@ fn mount(args: CliArgs) -> anyhow::Result { #[cfg(feature = "caching")] { - use mountpoint_s3::data_cache::{CacheLimit, DiskDataCache}; + use mountpoint_s3::data_cache::{CacheLimit, DiskDataCache, DiskDataCacheConfig}; use mountpoint_s3::fs::CacheConfig; use mountpoint_s3::prefetch::caching_prefetch; - if args.enable_metadata_caching { - // TODO: Review default for TTL - let metadata_cache_ttl = args.metadata_cache_ttl.unwrap_or(Duration::from_secs(3600)); + if let Some(path) = args.cache { + let metadata_cache_ttl = args.metadata_ttl.unwrap_or(Duration::from_secs(1)); filesystem_config.cache_config = CacheConfig { serve_lookup_from_cache: true, dir_ttl: metadata_cache_ttl, file_ttl: metadata_cache_ttl, }; - } - - if let Some(path) = args.data_caching_directory { - let limit = { - if let Some(max_size_in_mb) = args.data_cache_size_limit { - let max_size = (max_size_in_mb * 1024 * 1024) as usize; - CacheLimit::TotalSize { max_size } - } else if let Some(per_cent) = args.data_cache_free_space { - let min_ratio = (per_cent as f64) * 0.01; - CacheLimit::AvailableSpace { min_ratio } - } else { - CacheLimit::Unbounded - } - }; - let cache = DiskDataCache::new(path, args.data_cache_block_size, limit); + let mut cache_config = DiskDataCacheConfig::default(); + if let Some(max_size_in_mib) = args.max_cache_size { + cache_config.limit = CacheLimit::TotalSize { + max_size: (max_size_in_mib * 1024 * 1024) as usize, + }; + } + let cache = DiskDataCache::new(path, cache_config); let prefetcher = caching_prefetch(cache, runtime, prefetcher_config); return create_filesystem( client,