Skip to content

Commit

Permalink
Simplify and rename cache configuration flags (#612)
Browse files Browse the repository at this point in the history
* Simplify and rename cache configuration flags

Signed-off-by: Alessandro Passaro <alexpax@amazon.co.uk>

* Add details to the cache flag help

Signed-off-by: Alessandro Passaro <alexpax@amazon.co.uk>

* Shorten help text

Signed-off-by: Alessandro Passaro <alexpax@amazon.co.uk>

* Use MiB and change default to 5% of available space

Signed-off-by: Alessandro Passaro <alexpax@amazon.co.uk>

---------

Signed-off-by: Alessandro Passaro <alexpax@amazon.co.uk>
  • Loading branch information
passaro authored Nov 17, 2023
1 parent b23eaca commit 7d38be7
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 83 deletions.
2 changes: 1 addition & 1 deletion mountpoint-s3/src/data_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use mountpoint_s3_client::types::ETag;
use thiserror::Error;

pub use crate::checksums::ChecksummedBytes;
pub use crate::data_cache::disk_data_cache::{CacheLimit, DiskDataCache};
pub use crate::data_cache::disk_data_cache::{CacheLimit, DiskDataCache, DiskDataCacheConfig};
pub use crate::data_cache::in_memory_data_cache::InMemoryDataCache;

/// Struct representing a key for accessing an entry in a [DataCache].
Expand Down
66 changes: 51 additions & 15 deletions mountpoint-s3/src/data_cache/disk_data_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,30 @@ const HASHED_DIR_SPLIT_INDEX: usize = 2;

/// On-disk implementation of [DataCache].
pub struct DiskDataCache {
block_size: u64,
cache_directory: PathBuf,
limit: CacheLimit,
config: DiskDataCacheConfig,
/// Tracks blocks usage. `None` when no cache limit was set.
usage: Option<Mutex<UsageInfo<DiskBlockKey>>>,
}

/// Configuration for a [DiskDataCache].
#[derive(Debug)]
pub struct DiskDataCacheConfig {
/// Size of data blocks.
pub block_size: u64,
/// How to limit the cache size.
pub limit: CacheLimit,
}

impl Default for DiskDataCacheConfig {
fn default() -> Self {
Self {
block_size: 1024 * 1024, // 1 MiB block size
limit: CacheLimit::AvailableSpace { min_ratio: 0.05 }, // Preserve 5% available space
}
}
}

/// Limit the cache size.
#[derive(Debug)]
pub enum CacheLimit {
Expand Down Expand Up @@ -186,15 +203,14 @@ impl DiskBlock {

impl DiskDataCache {
/// Create a new instance of an [DiskDataCache] with the specified configuration.
pub fn new(cache_directory: PathBuf, block_size: u64, limit: CacheLimit) -> Self {
let usage = match limit {
pub fn new(cache_directory: PathBuf, config: DiskDataCacheConfig) -> Self {
let usage = match config.limit {
CacheLimit::Unbounded => None,
CacheLimit::TotalSize { .. } | CacheLimit::AvailableSpace { .. } => Some(Mutex::new(UsageInfo::new())),
};
DiskDataCache {
block_size,
cache_directory,
limit,
config,
usage,
}
}
Expand Down Expand Up @@ -267,7 +283,7 @@ impl DiskDataCache {
}

fn is_limit_exceeded(&self, size: usize) -> bool {
match self.limit {
match self.config.limit {
CacheLimit::Unbounded => false,
CacheLimit::TotalSize { max_size } => size > max_size,
CacheLimit::AvailableSpace { min_ratio } => {
Expand Down Expand Up @@ -324,7 +340,7 @@ impl DataCache for DiskDataCache {
block_idx: BlockIndex,
block_offset: u64,
) -> DataCacheResult<Option<ChecksummedBytes>> {
if block_offset != block_idx * self.block_size {
if block_offset != block_idx * self.config.block_size {
return Err(DataCacheError::InvalidBlockOffset);
}
let block_key = DiskBlockKey::new(cache_key, block_idx);
Expand Down Expand Up @@ -358,7 +374,7 @@ impl DataCache for DiskDataCache {
block_offset: u64,
bytes: ChecksummedBytes,
) -> DataCacheResult<()> {
if block_offset != block_idx * self.block_size {
if block_offset != block_idx * self.config.block_size {
return Err(DataCacheError::InvalidBlockOffset);
}

Expand All @@ -381,7 +397,7 @@ impl DataCache for DiskDataCache {
}

fn block_size(&self) -> u64 {
self.block_size
self.config.block_size
}
}

Expand Down Expand Up @@ -518,7 +534,13 @@ mod tests {
#[test]
fn get_path_for_block_key() {
let cache_dir = PathBuf::from("mountpoint-cache/");
let data_cache = DiskDataCache::new(cache_dir, 1024, CacheLimit::Unbounded);
let data_cache = DiskDataCache::new(
cache_dir,
DiskDataCacheConfig {
block_size: 1024,
limit: CacheLimit::Unbounded,
},
);

let s3_key = "a".repeat(266);

Expand Down Expand Up @@ -550,7 +572,13 @@ mod tests {

let block_size = 8 * 1024 * 1024;
let cache_directory = tempfile::tempdir().unwrap();
let cache = DiskDataCache::new(cache_directory.into_path(), block_size, CacheLimit::Unbounded);
let cache = DiskDataCache::new(
cache_directory.into_path(),
DiskDataCacheConfig {
block_size,
limit: CacheLimit::Unbounded,
},
);
let cache_key_1 = CacheKey {
s3_key: "a".into(),
etag: ETag::for_tests(),
Expand Down Expand Up @@ -623,7 +651,13 @@ mod tests {
let slice = data.slice(1..5);

let cache_directory = tempfile::tempdir().unwrap();
let cache = DiskDataCache::new(cache_directory.into_path(), 8 * 1024 * 1024, CacheLimit::Unbounded);
let cache = DiskDataCache::new(
cache_directory.into_path(),
DiskDataCacheConfig {
block_size: 8 * 1024 * 1024,
limit: CacheLimit::Unbounded,
},
);
let cache_key = CacheKey {
s3_key: "a".into(),
etag: ETag::for_tests(),
Expand Down Expand Up @@ -704,8 +738,10 @@ mod tests {
let cache_directory = tempfile::tempdir().unwrap();
let cache = DiskDataCache::new(
cache_directory.into_path(),
BLOCK_SIZE as u64,
CacheLimit::TotalSize { max_size: CACHE_LIMIT },
DiskDataCacheConfig {
block_size: BLOCK_SIZE as u64,
limit: CacheLimit::TotalSize { max_size: CACHE_LIMIT },
},
);

// Put all of large_object
Expand Down
89 changes: 22 additions & 67 deletions mountpoint-s3/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,68 +223,33 @@ struct CliArgs {
#[cfg(feature = "caching")]
#[clap(
long,
help = "Enable caching of file and directory metadata for a configurable time-to-live (TTL)",
help = "Enable caching of object metadata and content to the given directory",
help_heading = CACHING_OPTIONS_HEADER,
value_name = "DIRECTORY",
)]
pub enable_metadata_caching: bool,
pub cache: Option<PathBuf>,

// TODO: What is a sensible default? Should TTL even be configurable?
#[cfg(feature = "caching")]
#[clap(
long,
help = "Override time-to-live (TTL) for cached metadata entries in seconds",
help = "Time-to-live (TTL) for cached metadata in seconds [default: 1s]",
value_name = "SECONDS",
value_parser = parse_duration_seconds,
help_heading = CACHING_OPTIONS_HEADER,
requires = "enable_metadata_caching",
)]
pub metadata_cache_ttl: Option<Duration>,

// TODO: Temporary for testing. Review before exposing outside "caching" feature.
#[cfg(feature = "caching")]
#[clap(
long,
help = "Enable caching of object data in a directory",
help_heading = CACHING_OPTIONS_HEADER,
value_name = "DIRECTORY",
requires = "cache",
)]
pub data_caching_directory: Option<PathBuf>,
pub metadata_ttl: Option<Duration>,

// TODO: Temporary for testing. Review before exposing outside "caching" feature.
#[cfg(feature = "caching")]
#[clap(
long,
help = "Block size for the data cache",
default_value = "1048576",
help = "Maximum size of the cache directory in MiB [default: preserve 5% of available space]",
value_name = "MiB",
value_parser = value_parser!(u64).range(1..),
help_heading = CACHING_OPTIONS_HEADER,
requires = "data_caching_directory",
requires = "cache",
)]
pub data_cache_block_size: u64,

// TODO: Temporary for testing. Review before exposing outside "caching" feature.
#[cfg(feature = "caching")]
#[clap(
long,
help = "Maximum size for the data cache in MB",
value_parser = value_parser!(u64).range(1..),
help_heading = CACHING_OPTIONS_HEADER,
requires = "data_caching_directory",
conflicts_with = "data_cache_free_space",
)]
pub data_cache_size_limit: Option<u64>,

// TODO: Temporary for testing. Review before exposing outside "caching" feature.
#[cfg(feature = "caching")]
#[clap(
long,
help = "Minimum available space to maintain (%)",
value_parser = value_parser!(u64).range(0..100),
help_heading = CACHING_OPTIONS_HEADER,
requires = "data_caching_directory",
conflicts_with = "data_cache_size_limit",
)]
pub data_cache_free_space: Option<u64>,
pub max_cache_size: Option<u64>,

#[clap(
long,
Expand Down Expand Up @@ -541,10 +506,9 @@ fn mount(args: CliArgs) -> anyhow::Result<FuseSession> {
}
#[cfg(feature = "caching")]
{
// TODO review this when we finalize the available configs
if args.enable_metadata_caching && args.data_caching_directory.is_some() {
if args.cache.is_some() {
user_agent.value("mp-cache");
if let Some(ttl) = args.metadata_cache_ttl {
if let Some(ttl) = args.metadata_ttl {
user_agent.key_value("mp-cache-ttl", &ttl.as_secs().to_string());
}
}
Expand Down Expand Up @@ -597,34 +561,25 @@ fn mount(args: CliArgs) -> anyhow::Result<FuseSession> {

#[cfg(feature = "caching")]
{
use mountpoint_s3::data_cache::{CacheLimit, DiskDataCache};
use mountpoint_s3::data_cache::{CacheLimit, DiskDataCache, DiskDataCacheConfig};
use mountpoint_s3::fs::CacheConfig;
use mountpoint_s3::prefetch::caching_prefetch;

if args.enable_metadata_caching {
// TODO: Review default for TTL
let metadata_cache_ttl = args.metadata_cache_ttl.unwrap_or(Duration::from_secs(3600));
if let Some(path) = args.cache {
let metadata_cache_ttl = args.metadata_ttl.unwrap_or(Duration::from_secs(1));
filesystem_config.cache_config = CacheConfig {
serve_lookup_from_cache: true,
dir_ttl: metadata_cache_ttl,
file_ttl: metadata_cache_ttl,
};
}

if let Some(path) = args.data_caching_directory {
let limit = {
if let Some(max_size_in_mb) = args.data_cache_size_limit {
let max_size = (max_size_in_mb * 1024 * 1024) as usize;
CacheLimit::TotalSize { max_size }
} else if let Some(per_cent) = args.data_cache_free_space {
let min_ratio = (per_cent as f64) * 0.01;
CacheLimit::AvailableSpace { min_ratio }
} else {
CacheLimit::Unbounded
}
};

let cache = DiskDataCache::new(path, args.data_cache_block_size, limit);
let mut cache_config = DiskDataCacheConfig::default();
if let Some(max_size_in_mib) = args.max_cache_size {
cache_config.limit = CacheLimit::TotalSize {
max_size: (max_size_in_mib * 1024 * 1024) as usize,
};
}
let cache = DiskDataCache::new(path, cache_config);
let prefetcher = caching_prefetch(cache, runtime, prefetcher_config);
return create_filesystem(
client,
Expand Down

0 comments on commit 7d38be7

Please sign in to comment.