From 4291baa047166964e7eb22e5a77a00f2289ad1bd Mon Sep 17 00:00:00 2001 From: Benjamin Nguyen Date: Tue, 24 Oct 2023 22:23:57 -0700 Subject: [PATCH] checkpoint --- Cargo.lock | 150 +++---------- Cargo.toml | 3 +- src/cli/mod.rs | 41 ---- src/disk/mod.rs | 310 ++++++++++++++++++++++++++ src/disk/prefix.rs | 135 +++++++++++ src/error.rs | 55 ++++- src/file/inode.rs | 51 +++++ src/file/mod.rs | 86 ++++++- src/logging.rs | 44 ++++ src/main.rs | 51 ++++- src/tree/mod.rs | 140 +++++++++++- src/tree/traversal/mod.rs | 2 - src/tree/traversal/multi_threaded.rs | 1 - src/tree/traversal/single_threaded.rs | 9 - src/user/enums.rs | 36 +++ src/user/mod.rs | 85 +++++++ 16 files changed, 999 insertions(+), 200 deletions(-) delete mode 100644 src/cli/mod.rs create mode 100644 src/disk/mod.rs create mode 100644 src/disk/prefix.rs create mode 100644 src/file/inode.rs create mode 100644 src/logging.rs delete mode 100644 src/tree/traversal/mod.rs delete mode 100644 src/tree/traversal/multi_threaded.rs delete mode 100644 src/tree/traversal/single_threaded.rs create mode 100644 src/user/enums.rs create mode 100644 src/user/mod.rs diff --git a/Cargo.lock b/Cargo.lock index ccc6de6..22688c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -76,9 +76,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.12.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" [[package]] name = "cc" @@ -99,11 +99,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e3c5919066adf22df73762e50cffcde3a758f2a848b113b586d1f86728b673b" dependencies = [ "iana-time-zone", - "js-sys", "num-integer", "num-traits", - "time", - "wasm-bindgen", "winapi", ] @@ -153,16 +150,6 @@ dependencies = [ "os_str_bytes", ] -[[package]] -name = "codespan-reporting" -version = "0.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" -dependencies = [ - "termcolor", - "unicode-width", -] - [[package]] name = "config" version = "0.13.3" @@ -218,50 +205,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "cxx" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61f1b6389c3fe1c316bf8a4dccc90a38208354b330925bce1f74a6c4756eb93" -dependencies = [ - "cc", - "cxxbridge-flags", - "cxxbridge-macro", - "link-cplusplus", -] - -[[package]] -name = "cxx-build" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12cee708e8962df2aeb38f594aae5d827c022b6460ac71a7a3e2c3c2aae5a07b" -dependencies = [ - "cc", - "codespan-reporting", - "once_cell", - "proc-macro2", - "quote", - "scratch", - "syn 2.0.12", -] - -[[package]] -name = "cxxbridge-flags" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7944172ae7e4068c533afbb984114a56c46e9ccddda550499caa222902c7f7bb" - -[[package]] -name = "cxxbridge-macro" -version = "1.0.94" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2345488264226bf682893e25de0769f3360aac9957980ec49361b083ddaa5bc5" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.12", -] - [[package]] name = "dirs" version = "5.0.0" @@ -301,6 +244,7 @@ dependencies = [ "indextree", "indoc", "libc", + "log", "lscolors", "once_cell", "regex", @@ -375,7 +319,7 @@ checksum = "c85e1d9ab2eadba7e5040d4e09cbd6d072b76a557ad64e797c2cb9d4da21d7e4" dependencies = [ "cfg-if", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", ] [[package]] @@ -405,9 +349,9 @@ checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" [[package]] name = "iana-time-zone" -version = "0.1.56" +version = "0.1.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0722cd7114b7de04316e7ea5456a0bbb20e4adb46fd27a3697adb812cff0f37c" +checksum = "2fad5b825842d2b38bd206f3e81d6957625fd7f0a361e345c30e01a0ae2dd613" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -419,12 +363,11 @@ dependencies = [ [[package]] name = "iana-time-zone-haiku" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0703ae284fc167426161c2e3f1da3ea71d94b21bedbcc9494e92b28e334e3dca" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" dependencies = [ - "cxx", - "cxx-build", + "cc", ] [[package]] @@ -490,9 +433,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.61" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] @@ -509,15 +452,6 @@ version = "0.2.141" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" -[[package]] -name = "link-cplusplus" -version = "1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecd207c9c713c34f95a097a5b029ac2ce6010530c7b49d7fea24d977dede04f5" -dependencies = [ - "cc", -] - [[package]] name = "linux-raw-sys" version = "0.1.4" @@ -542,12 +476,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.17" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "lscolors" @@ -579,7 +510,7 @@ checksum = "5b9d9a46eff5b4ff64b45a9e316a6d1e0bc719ef429cbec4dc630684212bfdf9" dependencies = [ "libc", "log", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi", "windows-sys 0.45.0", ] @@ -803,12 +734,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" -[[package]] -name = "scratch" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1792db035ce95be60c3f8853017b3999209281c24e2ba5bc8e59bf97a0c590c1" - [[package]] name = "serde" version = "1.0.156" @@ -956,17 +881,6 @@ dependencies = [ "once_cell", ] -[[package]] -name = "time" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" -dependencies = [ - "libc", - "wasi 0.10.0+wasi-snapshot-preview1", - "winapi", -] - [[package]] name = "toml" version = "0.5.11" @@ -982,12 +896,6 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" -[[package]] -name = "unicode-width" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" - [[package]] name = "utf8parse" version = "0.2.1" @@ -1031,12 +939,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" - [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1045,9 +947,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1055,24 +957,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.12", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1080,22 +982,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.12", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "winapi" diff --git a/Cargo.toml b/Cargo.toml index 74fe2b7..d5b903c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ path = "src/main.rs" [dependencies] ansi_term = "0.12.1" anyhow = "1.0.75" -chrono = "0.4.24" +chrono = { version = "0.4.24", default-features = false, features = ["clock", "std"] } clap = { version = "4.1.1", features = ["derive"] } clap_complete = "4.1.1" config = { version = "0.13.3", default-features = false, features = ["toml"] } @@ -37,6 +37,7 @@ errno = "0.3.1" filesize = "0.2.0" ignore = "0.4.2" indextree = "4.6.0" +log = { version = "0.4.20", features = ["std"] } lscolors = { version = "0.13.0", features = ["ansi_term"] } once_cell = "1.17.0" regex = "1.7.3" diff --git a/src/cli/mod.rs b/src/cli/mod.rs deleted file mode 100644 index 8cfb896..0000000 --- a/src/cli/mod.rs +++ /dev/null @@ -1,41 +0,0 @@ -use crate::error::{self, ErrorReport, Result, WithContext}; -use clap::Parser; -use std::{env, path::PathBuf}; - -#[derive(Parser, Debug)] -#[command(name = "erdtree")] -#[command(author = "Benjamin Nguyen. ")] -#[command(version = "4.0.0")] -#[command( - about = "erdtree (erd) is a cross-platform, multi-threaded, and general purpose filesystem and disk usage utility.", - long_about = None, -)] -pub struct Args { - /// Directory to traverse; defaults to current working directory - dir: Option, -} - -impl Args { - pub fn init() -> Result { - let mut clargs = Self::parse(); - clargs.set_dir()?; - Ok(clargs) - } - - fn set_dir(&mut self) -> Result<()> { - let current_dir = env::current_dir().into_report(error::Category::System)?; - - Err(std::io::Error::new( - std::io::ErrorKind::Other, - "This is the underlying error", - )) - .into_report(error::Category::User) - .context("Oh my god...") - .context("Look at her butt") - .context("omg..")?; - - //.into_report_ctx(error::Category::Internal, "hmmmm")?; - - Ok(()) - } -} diff --git a/src/disk/mod.rs b/src/disk/mod.rs new file mode 100644 index 0000000..84b4974 --- /dev/null +++ b/src/disk/mod.rs @@ -0,0 +1,310 @@ +use crate::{error::prelude::*, user::enums::BytePresentation}; +use ignore::DirEntry; +use std::{ + fmt::{self, Display}, + fs::Metadata, + ops::AddAssign, +}; + +/// Binary and SI prefixes. +pub mod prefix; + +/// https://doc.rust-lang.org/std/os/unix/fs/trait.MetadataExt.html#tymethod.blocks +#[cfg(unix)] +const BLOCK_SIZE: u64 = 512; + +/// Different metrics for reporting file size. +#[derive(Debug)] +pub enum Usage { + /// Apparent size in bytes rather than actual disk usage. + Logical { + value: u64, + presentation: BytePresentation, + }, + + /// The amount of bytes used to store the relevant file on disk. + Physical { + value: u64, + presentation: BytePresentation, + }, + + /// The amount of blocks used to store the relevant file on disk. + Blocks(u64), + + /// The total amount of words in a file + WordCount(u64), + + /// The total amount of lines in a file + LineCount(u64), +} + +impl Usage { + /// Gets the actual bytes stored on disk for a particular file. Directory sizes must be + /// recursively computed so they will be initialized to a size of 0. + #[cfg(unix)] + pub fn init_physical(metadata: &Metadata, presentation: BytePresentation) -> Self { + use std::os::unix::fs::MetadataExt; + + let value = metadata + .is_dir() + .then_some(0) + .unwrap_or_else(|| metadata.blocks() * BLOCK_SIZE); + + Self::Physical { + value, + presentation, + } + } + + /// Gets the actual bytes stored on disk for a particular file. Directory sizes must be + /// recursively computed so they will be initialized to a size of 0. + #[cfg(windows)] + pub fn init_physical(metadata: &Metadata, presentation: BytePresentation) -> Self { + use std::os::windows::fs::MetadataExt; + + let value = metadata + .is_dir() + .then_some(0) + .unwrap_or_else(|| metadata.file_size()); + + Self::Physical { + value, + presentation, + } + } + + #[cfg(not(any(windows, unix)))] + pub fn init_physical(metadata: &Metadata, presentation: BytePresentation) -> Self { + Self::init_logical(metadata, presentation) + } + + /// Gets the apparent file size rather than disk usage. Refer to `--apparent-size` in the man + /// pages of `du`: https://man7.org/linux/man-pages/man1/du.1.html + pub fn init_logical(metadata: &Metadata, presentation: BytePresentation) -> Self { + let value = metadata.is_dir().then_some(0).unwrap_or(metadata.len()); + + Self::Logical { + value, + presentation, + } + } + + /// Gets the word count. Words are delimited by a whitespace or a sequence of whitespaces. + /// Directories are initialized to 0. The `follow` argument determines whether or not to query the + /// symlink target, otherwise the symlink will have a word count of 0. + pub fn init_word_count(data: &DirEntry, metadata: &Metadata, follow: bool) -> Result { + if metadata.is_dir() || (metadata.is_symlink() && !follow) { + return Ok(Self::WordCount(0)); + } + + let word_count = std::fs::read_to_string(data.path()) + .into_report(ErrorCategory::Internal) + .map(|data| data.split_whitespace().count())?; + + u64::try_from(word_count) + .into_report(ErrorCategory::Internal) + .map(Self::WordCount) + } + + /// Gets the line count. Lines are delimited by the new-line ASCII char. Directories are + /// initialized to 0. The `follow` argument determines whether or not to query the symlink + /// target, otherwise the symlink will have a count of 0. + pub fn init_line_count(data: &DirEntry, metadata: &Metadata, follow: bool) -> Result { + if metadata.is_dir() || (metadata.is_symlink() && !follow) { + return Ok(Self::LineCount(0)); + } + + let line_count = std::fs::read_to_string(data.path()) + .into_report(ErrorCategory::Internal) + .map(|data| data.lines().count())?; + + u64::try_from(line_count) + .into_report(ErrorCategory::Internal) + .map(Self::WordCount) + } + + /// Gets the underlying numeric value representing the disk usage + pub fn value(&self) -> u64 { + match self { + Self::WordCount(count) => *count, + Self::LineCount(count) => *count, + Self::Logical { value, .. } => *value, + Self::Physical { value, .. } => *value, + + #[cfg(unix)] + Self::Blocks(blocks) => *blocks, + } + } + + /// Gets the actual amount of blocks allocated to a particular file. Directories are + /// initialized to 0. + #[cfg(unix)] + pub fn init_blocks(metadata: &Metadata) -> Self { + use std::os::unix::fs::MetadataExt; + let value = metadata.blocks(); + Self::Blocks(value) + } +} + +impl Display for Usage { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + macro_rules! byte_display { + ($p:expr, $v:expr) => { + match $p { + BytePresentation::Raw => write!(f, "{} B", $v), + BytePresentation::Binary => { + let prefix = prefix::Binary::from($v); + + if matches!(prefix, prefix::Binary::Base) { + write!(f, "{} {:>3}", $v, "B") + } else { + let bytes = ($v as f64) / prefix.base_value(); + write!(f, "{bytes:.1} {prefix}B") + } + }, + BytePresentation::StandardInternational => { + let prefix = prefix::Si::from($v); + + if matches!(prefix, prefix::Si::Base) { + write!(f, "{} {:>2}", $v, "B") + } else { + let bytes = ($v as f64) / prefix.base_value(); + write!(f, "{bytes:.1} {prefix}B") + } + }, + } + }; + } + + match self { + Self::WordCount(count) => ::fmt(count, f), + Self::LineCount(count) => ::fmt(count, f), + Self::Logical { + value, + presentation, + } => byte_display!(presentation, *value), + Self::Physical { + value, + presentation, + } => byte_display!(presentation, *value), + + #[cfg(unix)] + Self::Blocks(blocks) => ::fmt(blocks, f), + } + } +} + +impl AddAssign for Usage { + fn add_assign(&mut self, rhs: u64) { + match self { + Self::WordCount(count) => *count += rhs, + Self::LineCount(count) => *count += rhs, + Self::Logical { value, .. } => *value += rhs, + Self::Physical { value, .. } => *value += rhs, + + #[cfg(unix)] + Self::Blocks(blocks) => *blocks += rhs, + } + } +} + +#[test] +fn test_bytes_display() { + let size = Usage::Physical { + value: 998, + presentation: BytePresentation::Binary, + }; + + assert_eq!(String::from("998 B"), format!("{size}"),); + + let size = Usage::Physical { + value: 2_u64.pow(10), + presentation: BytePresentation::Binary, + }; + + assert_eq!(String::from("1.0 KiB"), format!("{size}"),); + + let size = Usage::Physical { + value: 2_u64.pow(20), + presentation: BytePresentation::Binary, + }; + + assert_eq!(String::from("1.0 MiB"), format!("{size}"),); + + let size = Usage::Physical { + value: 2_u64.pow(30), + presentation: BytePresentation::Binary, + }; + + assert_eq!(String::from("1.0 GiB"), format!("{size}"),); + + let size = Usage::Physical { + value: 2_u64.pow(40), + presentation: BytePresentation::Binary, + }; + + assert_eq!(String::from("1.0 TiB"), format!("{size}"),); + + let size = Usage::Physical { + value: 2_u64.pow(50), + presentation: BytePresentation::Binary, + }; + + assert_eq!(String::from("1.0 PiB"), format!("{size}"),); + + let size = Usage::Physical { + value: 2_u64.pow(30), + presentation: BytePresentation::Binary, + }; + + assert_eq!(String::from("1.0 GiB"), format!("{size}"),); + + let size = Usage::Physical { + value: 10_u64.pow(3), + presentation: BytePresentation::StandardInternational, + }; + + assert_eq!(String::from("1.0 KB"), format!("{size}"),); + + let size = Usage::Physical { + value: 10_u64.pow(6), + presentation: BytePresentation::StandardInternational, + }; + + assert_eq!(String::from("1.0 MB"), format!("{size}"),); + + let size = Usage::Physical { + value: 10_u64.pow(9), + presentation: BytePresentation::StandardInternational, + }; + + assert_eq!(String::from("1.0 GB"), format!("{size}"),); + + let size = Usage::Physical { + value: 10_u64.pow(12), + presentation: BytePresentation::StandardInternational, + }; + + assert_eq!(String::from("1.0 TB"), format!("{size}"),); + + let size = Usage::Physical { + value: 10_u64.pow(15), + presentation: BytePresentation::StandardInternational, + }; + + assert_eq!(String::from("1.0 PB"), format!("{size}"),); + + let size = Usage::Physical { + value: 998, + presentation: BytePresentation::StandardInternational, + }; + + assert_eq!(String::from("998 B"), format!("{size}"),); + + let size = Usage::Physical { + value: 1000, + presentation: BytePresentation::Raw, + }; + + assert_eq!(String::from("1000 B"), format!("{size}"),); +} diff --git a/src/disk/prefix.rs b/src/disk/prefix.rs new file mode 100644 index 0000000..448c420 --- /dev/null +++ b/src/disk/prefix.rs @@ -0,0 +1,135 @@ +use std::{ + convert::From, + fmt::{self, Display}, +}; + +/// https://en.wikipedia.org/wiki/Binary_prefix +#[derive(Debug, PartialEq)] +pub enum Binary { + Base, + Kibi, + Mebi, + Gibi, + Tebi, + Pebi, +} + +/// https://en.wikipedia.org/wiki/International_System_of_Units +#[derive(Debug, PartialEq)] +pub enum Si { + Base, + Kilo, + Mega, + Giga, + Tera, + Peta, +} + +impl Binary { + pub fn base_value(&self) -> f64 { + match self { + Self::Base => 1., + Self::Kibi => 2_u64.pow(10) as f64, + Self::Mebi => 2_u64.pow(20) as f64, + Self::Gibi => 2_u64.pow(30) as f64, + Self::Tebi => 2_u64.pow(40) as f64, + Self::Pebi => 2_u64.pow(50) as f64, + } + } +} + +impl Si { + pub fn base_value(&self) -> f64 { + match self { + Self::Base => 1., + Self::Kilo => 10_u64.pow(3) as f64, + Self::Mega => 10_u64.pow(6) as f64, + Self::Giga => 10_u64.pow(9) as f64, + Self::Tera => 10_u64.pow(12) as f64, + Self::Peta => 10_u64.pow(15) as f64, + } + } +} + +impl From for Binary { + fn from(bytes: u64) -> Self { + let bytes = (bytes as f64).log2(); + + if bytes < 10. { + Self::Base + } else if bytes < 20. { + Self::Kibi + } else if bytes < 30. { + Self::Mebi + } else if bytes < 40. { + Self::Gibi + } else if bytes < 50. { + Self::Tebi + } else { + Self::Pebi + } + } +} + +impl From for Si { + fn from(bytes: u64) -> Self { + let bytes = (bytes as f64).log10(); + + if bytes < 3. { + Self::Base + } else if bytes < 6. { + Self::Kilo + } else if bytes < 9. { + Self::Mega + } else if bytes < 12. { + Self::Giga + } else if bytes < 15. { + Self::Tera + } else { + Self::Peta + } + } +} + +impl Display for Binary { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Base => write!(f, ""), + Self::Kibi => write!(f, "Ki"), + Self::Mebi => write!(f, "Mi"), + Self::Gibi => write!(f, "Gi"), + Self::Tebi => write!(f, "Ti"), + Self::Pebi => write!(f, "Pi"), + } + } +} + +impl Display for Si { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Base => write!(f, ""), + Self::Kilo => write!(f, "K"), + Self::Mega => write!(f, "M"), + Self::Giga => write!(f, "G"), + Self::Tera => write!(f, "T"), + Self::Peta => write!(f, "P"), + } + } +} + +#[test] +fn test_prefixes() { + assert_eq!(Binary::from(100), Binary::Base); + assert_eq!(Binary::from(2_u64.pow(10)), Binary::Kibi); + assert_eq!(Binary::from(2_u64.pow(20)), Binary::Mebi); + assert_eq!(Binary::from(2_u64.pow(30)), Binary::Gibi); + assert_eq!(Binary::from(2_u64.pow(40)), Binary::Tebi); + assert_eq!(Binary::from(2_u64.pow(50)), Binary::Pebi); + + assert_eq!(Si::from(100), Si::Base); + assert_eq!(Si::from(10_u64.pow(3)), Si::Kilo); + assert_eq!(Si::from(10_u64.pow(6)), Si::Mega); + assert_eq!(Si::from(10_u64.pow(9)), Si::Giga); + assert_eq!(Si::from(10_u64.pow(12)), Si::Tera); + assert_eq!(Si::from(10_u64.pow(15)), Si::Peta); +} diff --git a/src/error.rs b/src/error.rs index f3c5634..0f1e65e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,6 +1,20 @@ use ansi_term::{Color, Style}; use std::{convert::From, error::Error as StdError, fmt, result::Result as StdResult}; +/// Meant to be a convenient wild-card import to allow access to the [`crate::error`] module's facilities +/// error-handling facilities. +pub mod prelude { + pub use super::{Error, ErrorCategory, ErrorReport, Result, WithContext}; + + macro_rules! error_source { + () => { + format!("{}:{}", file!(), line!()) + }; + } + + pub(crate) use error_source; +} + /// General result type to be used through the application. pub type Result = std::result::Result; @@ -9,24 +23,36 @@ pub type Result = std::result::Result; #[derive(Debug)] pub struct Error { source: anyhow::Error, - category: Category, + category: ErrorCategory, help_text: Option, } -/// Category of errors with which to generate a report. +/// ErrorCategory of errors with which to generate a report. #[derive(Debug)] -pub enum Category { - /// Errors due to logical errors within the application. +pub enum ErrorCategory { + /// Errors due to logical errors within the application. When creating an [`Error`] via + /// [`ErrorReport`], an `Internal` error will come with a default help text to guide the user + /// to the Github issues page to file a new issue. Becareful when overriding help texts under + /// these circumstances. Internal, + /// User-specific errors to be used in relation to command-line arguments and configs. User, + /// Errors related to environment such as the missing of the `$HOME` environment variable. System, + + /// Errors that are meant to be recoverable. + Warning, } impl Error { - pub fn new(category: Category, source: anyhow::Error, help_text: Option) -> Self { - Self { source, category, help_text } + pub fn new(category: ErrorCategory, source: anyhow::Error, help_text: Option) -> Self { + Self { + source, + category, + help_text, + } } fn internal_error_help_message() -> String { @@ -46,26 +72,31 @@ impl fmt::Display for Error { let help = Color::Cyan.bold().paint("help"); if let Some(ref help_txt) = self.help_text { - writeln!(f, "{icon} {prefix}: {:?}\n\n{help}: {help_txt}", self.source) + writeln!( + f, + "{icon} {prefix}: {:?}\n\n{help}: {help_txt}", + self.source + ) } else { writeln!(f, "{} {:?}", icon, self.source) } } } -impl fmt::Display for Category { +impl fmt::Display for ErrorCategory { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Internal => write!(f, "Internal error"), Self::User => write!(f, "Error"), Self::System => write!(f, "System error"), + Self::Warning => write!(f, ""), } } } /// Convenience trait to generate a [`Result`] from any type that implements [`std::error::Error`]. pub trait ErrorReport { - fn into_report(self, category: Category) -> Result; + fn into_report(self, category: ErrorCategory) -> Result; } /// Allows the chaining of contexts to [`Error`]'s underlying [`anyhow::Error`]. @@ -81,9 +112,11 @@ where } impl ErrorReport for StdResult { - fn into_report(self, category: Category) -> Result { + fn into_report(self, category: ErrorCategory) -> Result { self.map_err(|e| { - let help_text = matches!(category, Category::Internal).then(Error::internal_error_help_message); + let help_text = matches!(category, ErrorCategory::Internal) + .then(Error::internal_error_help_message); + let anyhow_err = anyhow::Error::from(e); Error::new(category, anyhow_err, help_text) }) diff --git a/src/file/inode.rs b/src/file/inode.rs new file mode 100644 index 0000000..bb7078f --- /dev/null +++ b/src/file/inode.rs @@ -0,0 +1,51 @@ +use std::{convert::TryFrom, fs::Metadata}; + +/// Represents a file's underlying inode. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct Inode { + pub ino: u64, + pub dev: u64, + pub nlink: u64, +} + +impl Inode { + /// Initializer for an inode given all the properties that make it unique. + pub fn new(ino: u64, dev: u64, nlink: u64) -> Self { + Self { ino, dev, nlink } + } +} + +#[derive(Debug, thiserror::Error)] +#[error("Insufficient information to compute inode")] +pub struct INodeError; + +impl TryFrom<&Metadata> for Inode { + type Error = INodeError; + + #[cfg(unix)] + fn try_from(md: &Metadata) -> Result { + use std::os::unix::fs::MetadataExt; + + Ok(Self::new(md.ino(), md.dev(), md.nlink())) + } + + #[cfg(windows)] + fn try_from(md: &Metadata) -> Result { + use std::os::windows::fs::MetadataExt; + + if let (Some(dev), Some(ino), Some(nlink)) = ( + md.volume_serial_number(), + md.file_index(), + md.number_of_links(), + ) { + return Ok(Self::new(ino, dev.into(), nlink.into())); + } + + Err(Self::Error {}) + } + + #[cfg(not(any(unix, windows)))] + fn try_from(md: &Metadata) -> Result { + Err(Self::Error {}) + } +} diff --git a/src/file/mod.rs b/src/file/mod.rs index 5a582bd..d4261a7 100644 --- a/src/file/mod.rs +++ b/src/file/mod.rs @@ -1,7 +1,89 @@ +use crate::{ + disk, + error::prelude::*, + user::{enums::Metric, Context}, +}; use ignore::DirEntry; -use std::fs::Metadata; +use std::{ + fs::{self, Metadata}, + ops::Deref, +}; +/// Concerned with querying information about a file's underlying inode. +pub mod inode; +use inode::Inode; + +/// Erdtree's wrapper around [`DirEntry`], it's metadata ([`Metadata`]). Also contains disk usage +/// information of files. Directories will always be initialized to have a size of zero as they +/// must be recursively computed. +#[derive(Debug)] pub struct File { - inner: DirEntry, + data: DirEntry, metadata: Metadata, + size: disk::Usage, +} + +impl File { + /// Plain Jane constructor for [`File`]. + pub fn new(data: DirEntry, metadata: Metadata, size: disk::Usage) -> Self { + Self { + data, + metadata, + size, + } + } + + /// Initializes [`File`] from the given [`DirEntry`] and [`Context`]. + pub fn init( + data: DirEntry, + Context { + metric, + byte_units, + follow, + .. + }: &Context, + ) -> Result { + let path = data.path(); + + let metadata = if *follow { + fs::metadata(path).into_report(ErrorCategory::System)? + } else { + fs::symlink_metadata(path).into_report(ErrorCategory::System)? + }; + + let size = match metric { + Metric::Physical => disk::Usage::init_physical(&metadata, *byte_units), + Metric::Logical => disk::Usage::init_logical(&metadata, *byte_units), + Metric::Word => disk::Usage::init_word_count(&data, &metadata, *follow)?, + Metric::Line => disk::Usage::init_line_count(&data, &metadata, *follow)?, + + #[cfg(unix)] + Metric::Blocks => disk::Usage::init_blocks(&metadata), + }; + + Ok(Self::new(data, metadata, size)) + } + + /// Attempts to query the [`File`]'s underlying inode which is represented by [`Inode`]. + pub fn inode(&self) -> Result { + Inode::try_from(&self.metadata).into_report(ErrorCategory::Internal) + } + + /// Gets a mutable reference to the `size` field. + pub fn size_mut(&mut self) -> &mut disk::Usage { + &mut self.size + } + + /// Gets an immmutable reference to the `size` field. + pub fn size(&self) -> &disk::Usage { + &self.size + } +} + +impl Deref for File { + type Target = DirEntry; + + fn deref(&self) -> &Self::Target { + &self.data + } } diff --git a/src/logging.rs b/src/logging.rs new file mode 100644 index 0000000..b8944e8 --- /dev/null +++ b/src/logging.rs @@ -0,0 +1,44 @@ +use crate::error::prelude::*; +use chrono::Utc; +use log::{LevelFilter, Log, Metadata, Record}; +use std::fmt::Write; + +pub static mut BUFFER: String = String::new(); + +pub struct LoggityLog; + +impl LoggityLog { + pub fn new() -> Self { + LoggityLog {} + } + + pub fn init() -> Result<&'static LoggityLog> { + let logger = Box::new(LoggityLog::new()); + let leak: &'static LoggityLog = Box::leak(logger); + log::set_logger(leak).into_report(ErrorCategory::Internal)?; + log::set_max_level(LevelFilter::Info); + Ok(leak) + } +} + +impl Log for LoggityLog { + fn enabled(&self, _metadata: &Metadata<'_>) -> bool { + true + } + + fn log(&self, record: &Record<'_>) { + unsafe { + let _ = writeln!( + BUFFER, + "[{}] {} {}", + Utc::now().to_rfc3339(), + record.level(), + record.args() + ); + } + } + + fn flush(&self) { + unsafe { println!("{BUFFER}") } + } +} diff --git a/src/main.rs b/src/main.rs index 597aafa..c7e8c51 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,22 +1,65 @@ #![cfg_attr(windows, feature(windows_by_handle))] - +#![allow( + clippy::cast_precision_loss, + clippy::struct_excessive_bools, + clippy::wildcard_imports, + clippy::obfuscated_if_else +)] +use log::Log; use std::process::ExitCode; -mod cli; +/// Defines the command-line interface and the context used throughout Erdtree. +mod user; + +/// Concerned with disk usage calculation and presentation. +mod disk; + +/// Error handling and reporting utilities to be used throughout the Erdtree. mod error; + +/// Erdtree's representation of a file. mod file; + +/// Concerned with logging throughout the application. +mod logging; + +/// Virtual file-tree data structure and relevant operations. mod tree; +use tree::FileTree; fn main() -> ExitCode { if let Err(e) = run() { eprintln!("{e}"); return ExitCode::FAILURE; } - ExitCode::SUCCESS } fn run() -> error::Result<()> { - let clargs = cli::Args::init()?; + let ctx = user::Context::init()?; + + let logger = ctx + .verbose + .then_some(logging::LoggityLog::init()) + .transpose()?; + + let file_tree = if ctx.threads > 1 { + FileTree::init(&ctx)? + } else { + FileTree::init(&ctx)? + }; + + let Some(indextree::NodeEdge::Start(id)) = file_tree.traverse().next() else { + panic!("womp"); + }; + + let root = file_tree[id].get(); + + println!("{root:?}"); + + if let Some(logger) = logger { + logger.flush(); + } + Ok(()) } diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 2045791..804d84a 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1,16 +1,146 @@ -use crate::{error::Result, file::File}; - -use indextree::{Arena, NodeId}; - -mod traversal; +use crate::{error::prelude::*, file::File, user::Context}; +use ignore::{Walk, WalkBuilder}; +use indextree::{Arena, NodeId, Traverse}; +use std::{collections::HashMap, convert::TryFrom, ops::Deref}; +/// Representation of the file-tree that is traversed starting from the root directory whose index +/// in the underlying `arena` is `root_id`. pub struct FileTree { root_id: NodeId, arena: Arena, } +#[derive(Debug, thiserror::Error)] +pub enum TreeError { + #[error("Failed to query the root directory")] + RootDirMissing, + + #[error("Expected ancestor node to exist in arena")] + ParentNode, + + #[error("Failed to compute directory size")] + MissingDirSize, +} + impl FileTree { pub fn new(root_id: NodeId, arena: Arena) -> Self { Self { root_id, arena } } + + /// Initializes a [`FileTree`] completely on one thread. + pub fn init(ctx: &Context) -> Result { + let mut walker = Walk::try_from(ctx)?; + + let root_entry = walker + .next() + .ok_or(TreeError::RootDirMissing) + .into_report(ErrorCategory::Internal)?; + + let root_node = root_entry + .into_report(ErrorCategory::Internal) + .and_then(|data| File::init(data, ctx))?; + + let mut arena = Arena::new(); + let root_node_id = arena.new_node(root_node); + let mut current_dir_id = root_node_id; + + let mut dirsize_map = HashMap::new(); + let mut dir_stack = vec![]; + + dirsize_map.insert(root_node_id, 0); + + for dent in walker { + let node = match dent + .into_report(ErrorCategory::Warning) + .and_then(|data| File::init(data, ctx)) + { + Ok(data) => data, + Err(e) => { + log::error!("{e}"); + continue; + }, + }; + + let is_dir = node.file_type().is_some_and(|ft| ft.is_dir()); + let size = node.size().value(); + + let new_node_id = arena.new_node(node); + + current_dir_id.append(new_node_id, &mut arena); + + let parent_dir_id = new_node_id + .ancestors(&arena) + .nth(1) // skip self + .ok_or(TreeError::ParentNode) + .into_report(ErrorCategory::Internal) + .context(error_source!())?; + + if let Some(parent_size) = dirsize_map.get_mut(&parent_dir_id) { + *parent_size += size; + } else { + dirsize_map.insert(parent_dir_id, size); + } + + if is_dir { + dir_stack.push(new_node_id); + current_dir_id = new_node_id; + } + } + + while let Some(node_id) = dir_stack.pop() { + let node_size = dirsize_map + .remove(&node_id) + .ok_or(TreeError::MissingDirSize) + .into_report(ErrorCategory::Internal) + .context(error_source!())?; + + let parent_size = node_id + .ancestors(&arena) + .nth(1) // skip self + .and_then(|parent_dir_id| dirsize_map.get_mut(&parent_dir_id)) + .ok_or(TreeError::ParentNode) + .into_report(ErrorCategory::Internal) + .context(error_source!())?; + + *parent_size += node_size; + } + + println!("{dirsize_map:?}"); + + Ok(Self::new(root_node_id, arena)) + } + + pub fn traverse(&self) -> Traverse<'_, File> { + self.root_id.traverse(&self.arena) + } +} + +impl Deref for FileTree { + type Target = Arena; + + fn deref(&self) -> &Self::Target { + &self.arena + } +} + +/// Initializes a single-threaded [`Walk`] instance from [`Context`]. +impl TryFrom<&Context> for Walk { + type Error = Error; + + fn try_from(ctx: &Context) -> Result { + let path = match ctx.dir() { + Some(d) => d.to_path_buf(), + None => Context::get_current_dir()?, + }; + + let walker = WalkBuilder::new(path) + .follow_links(ctx.follow) + .git_ignore(!ctx.no_ignore) + .git_global(!ctx.no_ignore) + .hidden(!ctx.hidden) + .same_file_system(ctx.same_fs) + .build(); + + Ok(walker) + } } diff --git a/src/tree/traversal/mod.rs b/src/tree/traversal/mod.rs deleted file mode 100644 index 03aac33..0000000 --- a/src/tree/traversal/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -mod multi_threaded; -mod single_threaded; diff --git a/src/tree/traversal/multi_threaded.rs b/src/tree/traversal/multi_threaded.rs deleted file mode 100644 index 8b13789..0000000 --- a/src/tree/traversal/multi_threaded.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/src/tree/traversal/single_threaded.rs b/src/tree/traversal/single_threaded.rs deleted file mode 100644 index 5efcff0..0000000 --- a/src/tree/traversal/single_threaded.rs +++ /dev/null @@ -1,9 +0,0 @@ -use crate::{error::Result, tree::FileTree}; -use ignore::WalkBuilder; -use std::{convert::AsRef, path::Path}; - -impl FileTree { - pub fn init>(path: P) -> Result { - todo!() - } -} diff --git a/src/user/enums.rs b/src/user/enums.rs new file mode 100644 index 0000000..6ac4ac1 --- /dev/null +++ b/src/user/enums.rs @@ -0,0 +1,36 @@ +use clap::ValueEnum; + +/// The disk usage metric to report. +#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, Default)] +pub enum Metric { + /// Physical disk usage in bytes + #[default] + Physical, + + /// Apparent disk usage in bytes + Logical, + + /// Total words in a file + Word, + + /// Total lines in a file + Line, + + /// Total amount of blocks allocated to store a file on disk + #[cfg(unix)] + Blocks, +} + +/// Whether to report byte size using SI or binary prefixes or no prefix. +#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, Default)] +pub enum BytePresentation { + /// Reports the total amount of bytes + #[default] + Raw, + + /// Reports byte size in binary units e.g. KiB + Binary, + + /// Reports byte size in SI units e.g. KB + StandardInternational, +} diff --git a/src/user/mod.rs b/src/user/mod.rs new file mode 100644 index 0000000..7117eb8 --- /dev/null +++ b/src/user/mod.rs @@ -0,0 +1,85 @@ +use crate::error::prelude::*; +use clap::Parser; +use std::{env, path::PathBuf}; + +/// Enum definitions for enumerated command-line arguments. +pub mod enums; + +/// Defines the CLI whose purpose is to capture user arguments and reconcile them with arguments +/// found with a config file if relevant. +#[derive(Parser, Debug)] +#[command(name = "erdtree")] +#[command(author = "Benjamin Nguyen. ")] +#[command(version = "4.0.0")] +#[command( + about = "erdtree (erd) is a cross-platform, multi-threaded, and general purpose filesystem and disk usage utility.", + long_about = None, +)] +pub struct Context { + /// Directory to traverse; defaults to current working directory + dir: Option, + + /// Show hidden files + #[arg(short = '.', long)] + pub hidden: bool, + + /// Report byte size in either binary or SI units + #[arg(short, long, value_enum, default_value_t)] + pub byte_units: enums::BytePresentation, + + /// Disable traversal of .git directory when traversing hidden files + #[arg(long, requires = "hidden")] + pub no_git: bool, + + /// Follow symlinks + #[arg(short = 'f', long)] + pub follow: bool, + + #[arg(short, long, value_enum, default_value_t)] + pub metric: enums::Metric, + + /// Do not respect .gitignore files + #[arg(short = 'i', long)] + pub no_ignore: bool, + + /// Number of threads to use for disk reads + #[arg(short = 'T', long, default_value_t = Context::default_num_threads())] + pub threads: usize, + + /// Prevent traversal into directories that are on different filesystems + #[arg(short = 'x', long = "one-file-system")] + pub same_fs: bool, + + /// Prints logs at the end of the output + #[arg(short = 'v', long = "verbose")] + pub verbose: bool, +} + +impl Context { + pub fn init() -> Result { + let mut clargs = Self::parse(); + + if clargs.dir.is_none() { + let current_dir = Self::get_current_dir()?; + clargs.dir = Some(current_dir); + } + + Ok(clargs) + } + + pub fn dir(&self) -> Option<&PathBuf> { + self.dir.as_ref() + } + + pub fn get_current_dir() -> Result { + env::current_dir() + .and_then(std::fs::canonicalize) + .into_report(ErrorCategory::System) + .context("Failed to access current working directory") + .set_help("Ensure current directory exists and sufficient permissions are granted") + } + + fn default_num_threads() -> usize { + std::thread::available_parallelism().map_or(3, usize::from) + } +}