Skip to content

Commit

Permalink
Merge pull request #12 from sharkLoc/master
Browse files Browse the repository at this point in the history
add gzip/xz/bzip2 support for IO
  • Loading branch information
wjwei-handsome authored May 18, 2024
2 parents 8d2dfe2 + 904814e commit c9d955a
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 19 deletions.
26 changes: 24 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ thiserror = "1.0.50"
anyhow = "1.0.75"
minijinja = "1.0.15"
clap_complete = "4.5.1"
xz2 = "0.1.7"
flate2 = "1.0.30"
bzip2 = "0.4.4"

[lib]
name = "wgalib"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ Options:
-V, --version Print version

GLOBAL:
-o, --outfile <OUTFILE> Output file ("-" for stdout) [default: -]
-o, --outfile <OUTFILE> Output file ("-" for stdout), file name ending in .gz/.bz2/.xz will be compressed automatically [default: -]
-r, --rewrite Bool, if rewrite output file [default: false]
-t, --threads <THREADS> Threads, default 1 [default: 1]
-v, --verbose... Logging level [-v: Info, -vv: Debug, -vvv: Trace, defalut: Warn]
Expand Down
2 changes: 1 addition & 1 deletion src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ help_template =
) // change template more!
]
pub struct Cli {
/// Output file ("-" for stdout)
/// Output file ("-" for stdout), file name ending in .gz/.bz2/.xz will be compressed automatically
#[arg(long, short, global = true, default_value = "-", help_heading = Some("GLOBAL"))]
pub outfile: String,
/// Bool, if rewrite output file [default: false]
Expand Down
117 changes: 102 additions & 15 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,20 @@ use crate::{
use clap::CommandFactory;
use clap_complete::{generate, Shell};
use log::{info, warn};
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Stdin, Write};
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Stdin, Write};
use std::path::Path;
use std::{fs::File, path::PathBuf};

// TODO : define a pub type WResult = Result<(), WGAError>;

const BUFFER_SIZE: usize = 32 * 1024;

const MAGIC_MAX_LEN: usize = 6;
// compressed file magic number, ref: https://docs.rs/infer/latest/infer/archive/index.html
const GZ_MAGIC: [u8; 3] = [0x1f, 0x8b, 0x08];
const BZ_MAGIC: [u8; 3] = [0x42, 0x5a, 0x68];
const XZ_MAGIC: [u8; 6] = [0xfd, 0x37, 0x7a, 0x58, 0x5A, 0x00];

type RdrWtr = (Box<dyn BufRead + Send>, Box<dyn Write>);
fn prepare_rdr_wtr(
input: &Option<String>,
Expand Down Expand Up @@ -92,20 +98,71 @@ pub fn reverse_complement(input: &str) -> Result<String, WGAError> {
Ok(output)
}

fn get_magic_num(path: &str) -> Result<[u8; MAGIC_MAX_LEN], WGAError> {
let mut buffer: [u8; MAGIC_MAX_LEN] = [0; MAGIC_MAX_LEN];
let mut fp = File::open(path)?;
let _ = fp.read(&mut buffer)?;
Ok(buffer)
}

fn is_gzipped(path: &str) -> Result<bool, WGAError> {
let buffer = get_magic_num(path)?;
let gz_or_not =
buffer[0] == GZ_MAGIC[0] && buffer[1] == GZ_MAGIC[1] && buffer[2] == GZ_MAGIC[2];
Ok(gz_or_not || Path::new(path).extension().is_some_and(|ext| ext == "gz"))
}

fn is_bzipped(path: &str) -> Result<bool, WGAError> {
let buffer = get_magic_num(path)?;
let bz_or_not =
buffer[0] == BZ_MAGIC[0] && buffer[1] == BZ_MAGIC[1] && buffer[2] == BZ_MAGIC[2];
Ok(bz_or_not || Path::new(path).extension().is_some_and(|ext| ext == "bz2"))
}

fn is_xz(path: &str) -> Result<bool, WGAError> {
let buffer = get_magic_num(path)?;
let xz_or_not = buffer[0] == XZ_MAGIC[0]
&& buffer[1] == XZ_MAGIC[1]
&& buffer[2] == XZ_MAGIC[2]
&& buffer[3] == XZ_MAGIC[3]
&& buffer[4] == XZ_MAGIC[4]
&& buffer[5] == XZ_MAGIC[5];
Ok(xz_or_not || Path::new(path).extension().is_some_and(|ext| ext == "xz"))
}

pub fn get_input_reader(input: &Option<String>) -> Result<Box<dyn BufRead + Send>, WGAError> {
let reader: Box<dyn BufRead + Send> = match input {
Some(path) => {
if path == "-" {
Box::new(BufReader::with_capacity(BUFFER_SIZE, stdin_reader()?))
} else {
match File::open(path) {
Ok(file) => Box::new(BufReader::with_capacity(BUFFER_SIZE, file)),
Err(_) => return Err(WGAError::FileNotExist(PathBuf::from(path))),
let reader: Box<dyn BufRead + Send> = if let Some(path) = input {
match File::open(path) {
Ok(file) => {
if is_xz(path)? {
// decode xz compressed file
Box::new(BufReader::with_capacity(
BUFFER_SIZE,
xz2::read::XzDecoder::new_multi_decoder(file),
))
} else if is_gzipped(path)? {
// decode gzip compressed file
Box::new(BufReader::with_capacity(
BUFFER_SIZE,
flate2::read::MultiGzDecoder::new(file),
))
} else if is_bzipped(path)? {
// decode bzip2 compressed file
Box::new(BufReader::with_capacity(
BUFFER_SIZE,
bzip2::read::MultiBzDecoder::new(file),
))
} else {
// stdin flag "-" covered
Box::new(BufReader::with_capacity(BUFFER_SIZE, file))
}
}
Err(_) => return Err(WGAError::FileNotExist(PathBuf::from(path))),
}
None => Box::new(BufReader::with_capacity(BUFFER_SIZE, stdin_reader()?)),
} else {
Box::new(BufReader::with_capacity(BUFFER_SIZE, stdin_reader()?))
};

Ok(reader)
}

Expand All @@ -121,12 +178,42 @@ fn stdin_reader() -> Result<Stdin, WGAError> {

fn get_output_writer(outputpath: &str, rewrite: bool) -> Result<Box<dyn Write>, WGAError> {
check_outfile(outputpath, rewrite)?;
if outputpath == "-" {
Ok(Box::new(stdout()))

let file = File::create(outputpath)?;
let compression_level: u32 = 6;

let writer: Box<dyn Write> = if Path::new(outputpath)
.extension()
.is_some_and(|ext| ext == "xz")
{
// encode file to xz format
Box::new(BufWriter::with_capacity(
BUFFER_SIZE,
xz2::write::XzEncoder::new(file, compression_level),
))
} else if Path::new(outputpath)
.extension()
.is_some_and(|ext| ext == "gz")
{
// encode file to gzip format
Box::new(BufWriter::with_capacity(
BUFFER_SIZE,
flate2::write::GzEncoder::new(file, flate2::Compression::new(compression_level)),
))
} else if Path::new(outputpath)
.extension()
.is_some_and(|ext| ext == "bz2")
{
// encode file to bzip2 format
Box::new(BufWriter::with_capacity(
BUFFER_SIZE,
bzip2::write::BzEncoder::new(file, bzip2::Compression::new(compression_level)),
))
} else {
let file = File::create(outputpath)?;
Ok(Box::new(BufWriter::new(file)))
}
Box::new(BufWriter::new(stdout()))
};

Ok(writer)
}

/// check if output file exists and if rewrite
Expand Down

0 comments on commit c9d955a

Please sign in to comment.