Skip to content

Commit

Permalink
leviosam_utils refactor (#75)
Browse files Browse the repository at this point in the history
* Refactors the `LevioSamUtils::WriteDeferred` class.
* Moves some functions from `leviosam` to `leviosam_utils`
* Improves testing
  • Loading branch information
milkschen authored Jun 2, 2024
1 parent db9d89e commit 97fafa6
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 91 deletions.
62 changes: 8 additions & 54 deletions src/leviosam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,55 +32,6 @@
KSEQ_INIT(gzFile, gzread);
;

bool check_split_rule(std::string rule) {
auto cnt = std::count(LevioSamUtils::DEFER_OPT.begin(),
LevioSamUtils::DEFER_OPT.end(), rule);
if (cnt != 1) {
std::cerr << "[E::check_split_rule] " << rule
<< " is not a valid filtering option\n";
std::cerr << "Valid options:\n";
for (auto &opt : LevioSamUtils::DEFER_OPT) {
std::cerr << " - " << opt << "\n";
}
return false;
}
return true;
}

bool add_split_rule(std::vector<std::pair<std::string, float>> &split_rules,
std::string s) {
if (s == "lifted") {
split_rules.push_back(std::make_pair(s, 1));
return true;
}
std::string delim(":");
auto start = 0U;
auto end = s.find(delim);
int cnt = 0;
std::string key;
float value;
while (end != std::string::npos) {
if (cnt == 0) {
key = s.substr(start, end - start);
if (check_split_rule(key) == false) {
return false;
}
std::cerr << "[I::add_split_rule] Adding rule `" << key;
value = std::stof(s.substr(end - start + 1, end));
std::cerr << ":" << value << "`\n";
split_rules.push_back(std::make_pair(key, value));
} else {
std::cerr << "[E::add_split_rule] Invalid split rule: " << s
<< "\n";
return false;
}
start = end + delim.length();
end = s.find(delim, start);
cnt += 1;
}
return true;
}

/** Updates allocated threads given input args.
*
* LevioSAM2 expects either (1) --threads (-t) or (2) --hts_threads and/or
Expand Down Expand Up @@ -458,10 +409,11 @@ void lift_run(lift_opts args) {

LevioSamUtils::WriteDeferred wd;
if (args.split_rules.size() != 0) {
wd.init(args.outpre, args.split_rules, args.out_format, hdr_orig, hdr,
args.bed_defer_source, args.bed_defer_dest,
args.bed_commit_source, args.bed_commit_dest,
args.bed_isec_threshold);
wd.init(args.split_rules, hdr_orig, hdr, args.bed_defer_source,
args.bed_defer_dest, args.bed_commit_source,
args.bed_commit_dest, args.bed_isec_threshold);
wd.open_sams(args.outpre, args.out_format);
wd.print_info();
}

std::vector<std::thread> threads;
Expand All @@ -487,6 +439,7 @@ void lift_run(lift_opts args) {
sam_hdr_destroy(hdr_orig);
sam_close(sam_fp);
sam_close(out_sam_fp);
wd.close_sams();
}

/* This is a wrapper for the constructor of LiftMap.*/
Expand Down Expand Up @@ -735,7 +688,8 @@ int main(int argc, char **argv) {
args.sample = optarg;
break;
case 'S':
if (add_split_rule(args.split_rules, optarg) == false) {
if (LevioSamUtils::add_split_rule(args.split_rules, optarg) ==
false) {
exit(1);
}
break;
Expand Down
126 changes: 98 additions & 28 deletions src/leviosam_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,13 @@ size_t kstr_get_m(size_t var) {

namespace LevioSamUtils {

void WriteDeferred::init(
const std::string outpre,
const std::vector<std::pair<std::string, float>>& split_rules,
const std::string of, sam_hdr_t* ihdr, sam_hdr_t* ohdr,
const BedUtils::Bed& b_defer_source, const BedUtils::Bed& b_defer_dest,
const BedUtils::Bed& b_commit_source, const BedUtils::Bed& b_commit_dest,
const float& b_isec_th) {
void WriteDeferred::init(const SplitRules& split_rules, sam_hdr_t* ihdr,
sam_hdr_t* ohdr, const BedUtils::Bed& b_defer_source,
const BedUtils::Bed& b_defer_dest,
const BedUtils::Bed& b_commit_source,
const BedUtils::Bed& b_commit_dest,
const float& b_isec_th) {
write_deferred = true;
std::string rules_str("");
for (auto& r : split_rules) {
split_modes.emplace(r.first);
if (r.first == "mapq") {
Expand All @@ -52,30 +50,16 @@ void WriteDeferred::init(
bed_commit_source = b_commit_source;
bed_commit_dest = b_commit_dest;
bed_isec_threshold = b_isec_th;

std::string out_mode = (of == "sam") ? "w" : "wb";
std::string out_fn = outpre + "-deferred." + of;
out_fp = sam_open(out_fn.data(), out_mode.data());
if (sam_hdr_write(out_fp, ohdr) < 0) {
std::cerr << "[E::WriteDeferred::init] Failed to write sam_hdr for "
<< out_fn << "\n";
exit(1);
}

std::string out_fn_orig = outpre + "-unliftable." + of;
out_fp_orig = sam_open(out_fn_orig.data(), out_mode.data());
if (sam_hdr_write(out_fp_orig, hdr_orig) < 0) {
std::cerr << "[E::WriteDeferred::init] Failed to write sam_hdr for "
<< out_fn_orig << "\n";
exit(1);
}
print_info();
}

WriteDeferred::~WriteDeferred() {
/** Closes the deferred and unliftable HTS files. */
void WriteDeferred::close_sams() {
if (write_deferred) {
sam_close(out_fp);
sam_close(out_fp_orig);
} else {
std::cerr << "[W::WriteDeferred::close_sams] No HTS files to close "
"when `write_deferred` is False.\n";
}
};

Expand Down Expand Up @@ -118,6 +102,34 @@ void WriteDeferred::print_info() {
}
}

/** Gets the write_deferred status. */
bool WriteDeferred::get_write_deferred() { return write_deferred; }

/** Creates deferred and unliftable HTS files.
*
* @param out_prefix Output prefix
* @param out_format Output format. Options: "sam", "bam"
*/
void WriteDeferred::open_sams(const std::string out_prefix,
const std::string out_format) {
std::string out_mode = (out_format == "sam") ? "w" : "wb";
std::string out_fn = out_prefix + "-deferred." + out_format;
out_fp = sam_open(out_fn.data(), out_mode.data());
if (sam_hdr_write(out_fp, hdr) < 0) {
std::cerr << "[E::WriteDeferred::init] Failed to write sam_hdr for "
<< out_fn << "\n";
exit(1);
}

std::string out_fn_orig = out_prefix + "-unliftable." + out_format;
out_fp_orig = sam_open(out_fn_orig.data(), out_mode.data());
if (sam_hdr_write(out_fp_orig, hdr_orig) < 0) {
std::cerr << "[E::WriteDeferred::init] Failed to write sam_hdr for "
<< out_fn_orig << "\n";
exit(1);
}
}

/* Returns true if an alignment is excluded (committed)
*/
bool WriteDeferred::commit_aln_source(const bam1_t* const aln) {
Expand Down Expand Up @@ -185,6 +197,65 @@ bool WriteDeferred::commit_aln_dest(const bam1_t* const aln) {
return true;
}

/** Checks if a split rule is valid.
*
* Split rules are important for the WriteDeferred class.
* Allowed rules are specified in the DEFER_OPT vector (leviosam_utils.hpp).
*
* @param rule A split rule.
* @return True if the rule is supported. False otherwise.
*/
bool check_split_rule(std::string rule) {
auto cnt = std::count(DEFER_OPT.begin(), DEFER_OPT.end(), rule);
if (cnt != 1) {
std::cerr << "[E::check_split_rule] " << rule
<< " is not a valid filtering option\n";
std::cerr << "Valid options:\n";
for (auto& opt : DEFER_OPT) {
std::cerr << " - " << opt << "\n";
}
return false;
}
return true;
}

/** Add a split rule. The split rules are for the WriteDeferred class.
*
* @param SplitRules A vector to update
* @param string The actual specified rule. Usually a string and a number
* (float), delimited by a colon. E.g. "mapq:20". There are special cases where
* only the string is required.
* @return True if the rule is added successfully.
*/
bool add_split_rule(SplitRules& split_rules, std::string s) {
std::cerr << "[I::add_split_rule] Adding rule `" << s << "`\n";

if (s == "lifted") {
split_rules.push_back(std::make_pair(s, 1.));
return true;
}
const char delim(':');
size_t start = 0;
size_t end = s.find(delim);

if (end != std::string::npos) {
// Not valid if there are more than two delims.
if (std::count(s.begin(), s.end(), delim) == 1) {
std::string key = s.substr(start, end - start);
if (check_split_rule(key) == false) {
std::cerr << "[E::add_split_rule] Invalid split rule: `" << s
<< "`\n";
return false;
}
float value = std::stof(s.substr(end - start + 1, end));
split_rules.push_back(std::make_pair(key, value));
return true;
}
}
std::cerr << "[E::add_split_rule] Invalid split rule: `" << s << "`\n";
return false;
}

/** Removes the MN:i and MD:z tags from an alignment object.
*
* @param aln Alignment object.
Expand Down Expand Up @@ -597,5 +668,4 @@ std::string make_cmd(int argc, char** argv) {
}
return cmd;
}

}; // namespace LevioSamUtils
15 changes: 10 additions & 5 deletions src/leviosam_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ size_t kstr_get_m(size_t var);

namespace LevioSamUtils {

using SplitRules = std::vector<std::pair<std::string, float>>;
const std::vector<std::string> DEFER_OPT{"lifted", "mapq", "clipped_frac",
"isize", "aln_score", "hdist"};

Expand Down Expand Up @@ -82,22 +83,22 @@ class FastqRecord {
class WriteDeferred {
public:
WriteDeferred() : write_deferred(false){};
~WriteDeferred();

void init(const std::string outpre,
const std::vector<std::pair<std::string, float>>& split_rules,
const std::string of, sam_hdr_t* ihdr, sam_hdr_t* ohdr,
void init(const SplitRules& split_rules, sam_hdr_t* ihdr, sam_hdr_t* ohdr,
const BedUtils::Bed& b_defer_source,
const BedUtils::Bed& b_defer_dest,
const BedUtils::Bed& b_commit_source,
const BedUtils::Bed& b_commit_dest, const float& b_isec_th);

void open_sams(const std::string out_prefix, const std::string out_format);
void close_sams();
void print_info();
void write_deferred_bam(bam1_t* aln, sam_hdr_t* hdr);
void write_deferred_bam_orig(bam1_t* aln);
bool commit_aln_dest(const bam1_t* const aln);
bool commit_aln_source(const bam1_t* const aln);

bool get_write_deferred();

std::mutex mutex_fwrite;

private:
Expand All @@ -117,6 +118,10 @@ class WriteDeferred {
float bed_isec_threshold = BED_ISEC_TH;
};

bool check_split_rule(std::string rule);

bool add_split_rule(SplitRules& split_rules, std::string s);

/// Removes the MN:i and MD:z tags from a BAM object.
void remove_nm_md_tag(bam1_t* aln);

Expand Down
46 changes: 43 additions & 3 deletions src/leviosam_utils_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
#include "htslib/sam.h"
#include "leviosam.hpp"

//
// bit_vector tests
//
/* Bit_vector tests */

typedef sdsl::bit_vector::size_type size_type;

Expand Down Expand Up @@ -346,6 +344,31 @@ TEST(UtilsTest, UpdateFlagUnmap) {
sam_close(sam_fp);
}

TEST(UtilsTest, CheckSplitRule) {
EXPECT_EQ(LevioSamUtils::check_split_rule("lifted"), true);
EXPECT_EQ(LevioSamUtils::check_split_rule("mapq"), true);
EXPECT_EQ(LevioSamUtils::check_split_rule("wrong_rule"), false);
}

TEST(UtilsTest, AddSplitRule) {
LevioSamUtils::SplitRules split_rules;
EXPECT_EQ(LevioSamUtils::add_split_rule(split_rules, "mapq:20"), true);
EXPECT_EQ(split_rules[0].first, "mapq");
EXPECT_FLOAT_EQ(split_rules[0].second, 20.);

EXPECT_EQ(LevioSamUtils::add_split_rule(split_rules, "lifted"), true);
EXPECT_EQ(split_rules[1].first, "lifted");
EXPECT_FLOAT_EQ(split_rules[1].second, 1.);

EXPECT_EQ(LevioSamUtils::add_split_rule(split_rules, "clipped_frac:0.66"),
true);
EXPECT_EQ(split_rules[2].first, "clipped_frac");
EXPECT_FLOAT_EQ(split_rules[2].second, 0.66);

EXPECT_EQ(LevioSamUtils::add_split_rule(split_rules, "none"), false);
EXPECT_EQ(LevioSamUtils::add_split_rule(split_rules, "mapq:20:20"), false);
}

TEST(UltimaGenomicsTest, UpdateFlags) {
samFile* sam_fp = sam_open("ultima_small.sam", "r");
sam_hdr_t* sam_hdr = sam_hdr_read(sam_fp);
Expand Down Expand Up @@ -406,6 +429,23 @@ TEST(ChainTest, GetMateQueryLenOnRef) {
bam_destroy1(aln);
}

TEST(WriteDeferredTest, WriteDeferredInit) {
LevioSamUtils::WriteDeferred wd;
LevioSamUtils::SplitRules split_rules;
sam_hdr_t* hdr_orig;
sam_hdr_t* hdr;
BedUtils::Bed bed_defer_source;
BedUtils::Bed bed_defer_dest;
BedUtils::Bed bed_commit_source;
BedUtils::Bed bed_commit_dest;
float bed_isec_threshold = 0;

wd.init(split_rules, hdr_orig, hdr, bed_defer_source, bed_defer_dest,
bed_commit_source, bed_commit_dest, bed_isec_threshold);

EXPECT_EQ(wd.get_write_deferred(), true);
}

int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
Expand Down
2 changes: 1 addition & 1 deletion src/version.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#ifndef VERSION
#define VERSION "0.4.2-30dcb4a"
#define VERSION "0.4.2-7721149"
#endif

0 comments on commit 97fafa6

Please sign in to comment.