From d83dccc8c7bdf1c56366431ec5b17ad7a9b155f5 Mon Sep 17 00:00:00 2001 From: Tpt Date: Tue, 13 Sep 2022 21:32:46 +0200 Subject: [PATCH] Makes compression threshold explicitly fixed at runtime --- src/btree/mod.rs | 18 +++++++++++++----- src/column.rs | 21 ++++++++++++++------- src/options.rs | 11 +++++++---- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/src/btree/mod.rs b/src/btree/mod.rs index 081dd135..a3a5077d 100644 --- a/src/btree/mod.rs +++ b/src/btree/mod.rs @@ -8,7 +8,7 @@ use crate::{ error::{Error, Result}, index::Address, log::{LogAction, LogQuery, LogReader, LogWriter}, - options::Options, + options::{Metadata, Options, DEFAULT_COMPRESSION_THRESHOLD}, table::{ key::{TableKey, TableKeyQuery}, Entry as ValueTableEntry, Value, ValueTable, @@ -122,7 +122,8 @@ impl BTreeTable { pub fn open( id: ColId, values: Vec, - metadata: &crate::options::Metadata, + options: &Options, + metadata: &Metadata, ) -> Result { let size_tier = HEADER_ADDRESS.size_tier() as usize; if !values[size_tier].is_init() { @@ -131,12 +132,19 @@ impl BTreeTable { entry.write_header(&btree_header); values[size_tier].init_with_entry(&*entry.encoded.inner_mut())?; } - let options = &metadata.columns[id as usize]; + let col_options = &metadata.columns[id as usize]; Ok(BTreeTable { id, tables: RwLock::new(values), - ref_counted: options.ref_counted, - compression: Compress::new(options.compression, options.compression_threshold), + ref_counted: col_options.ref_counted, + compression: Compress::new( + col_options.compression, + options + .compression_threshold + .get(&id) + .copied() + .unwrap_or(DEFAULT_COMPRESSION_THRESHOLD), + ), }) } diff --git a/src/column.rs b/src/column.rs index 5000cf78..4ce41186 100644 --- a/src/column.rs +++ b/src/column.rs @@ -9,7 +9,7 @@ use crate::{ error::{Error, Result}, index::{Address, IndexTable, PlanOutcome, TableId as IndexTableId}, log::{Log, LogAction, LogOverlays, LogQuery, LogReader, LogWriter}, - options::{ColumnOptions, Metadata, Options}, + options::{ColumnOptions, Metadata, Options, DEFAULT_COMPRESSION_THRESHOLD}, stats::{ColumnStatSummary, ColumnStats}, table::{ key::{TableKey, TableKeyQuery}, @@ -271,7 +271,7 @@ impl Column { .collect::>()?; if column_options.btree_index { - Ok(Column::Tree(BTreeTable::open(col, value, metadata)?)) + Ok(Column::Tree(BTreeTable::open(col, value, options, metadata)?)) } else { Ok(Column::Hash(HashColumn::open(col, value, options, metadata)?)) } @@ -300,20 +300,27 @@ impl HashColumn { let (index, reindexing, stats) = Self::open_index(&options.path, col)?; let collect_stats = options.stats; let path = &options.path; - let options = &metadata.columns[col as usize]; + let col_options = &metadata.columns[col as usize]; let db_version = metadata.version; Ok(HashColumn { col, tables: RwLock::new(Tables { index, value }), reindex: RwLock::new(Reindex { queue: reindexing, progress: AtomicU64::new(0) }), path: path.into(), - preimage: options.preimage, - uniform_keys: options.uniform, - ref_counted: options.ref_counted, + preimage: col_options.preimage, + uniform_keys: col_options.uniform, + ref_counted: col_options.ref_counted, collect_stats, salt: metadata.salt, stats, - compression: Compress::new(options.compression, options.compression_threshold), + compression: Compress::new( + col_options.compression, + options + .compression_threshold + .get(&col) + .copied() + .unwrap_or(DEFAULT_COMPRESSION_THRESHOLD), + ), db_version, }) } diff --git a/src/options.rs b/src/options.rs index bde9a308..4112084d 100644 --- a/src/options.rs +++ b/src/options.rs @@ -13,6 +13,8 @@ pub const CURRENT_VERSION: u32 = 7; // TODO on last supported 5, remove MULTIHEAD_V4 and MULTIPART_V4 const LAST_SUPPORTED_VERSION: u32 = 4; +pub const DEFAULT_COMPRESSION_THRESHOLD: u32 = 4096; + /// Database configuration. #[derive(Clone, Debug)] pub struct Options { @@ -31,6 +33,10 @@ pub struct Options { /// Override salt value. If `None` is specified salt is loaded from metadata /// or randomly generated when creating a new database. pub salt: Option, + /// Minimal value size threshold to attempt compressing a value per column. + /// + /// Optional. A sensible default is used if nothing is set for a given column. + pub compression_threshold: HashMap, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -50,8 +56,6 @@ pub struct ColumnOptions { pub ref_counted: bool, /// Compression to use for this column. pub compression: CompressionType, - /// Minimal value size threshold to attempt compressing a value. - pub compression_threshold: u32, /// Column is using a btree indexing. pub btree_index: bool, } @@ -106,7 +110,6 @@ impl ColumnOptions { uniform, ref_counted, compression: compression.into(), - compression_threshold: ColumnOptions::default().compression_threshold, btree_index, }) } @@ -119,7 +122,6 @@ impl Default for ColumnOptions { uniform: false, ref_counted: false, compression: CompressionType::NoCompression, - compression_threshold: 4096, btree_index: false, } } @@ -134,6 +136,7 @@ impl Options { stats: true, salt: None, columns: (0..num_columns).map(|_| Default::default()).collect(), + compression_threshold: HashMap::new(), } }