Rename Hit struct to Match

We use the term "hit" both for - a query strobemer successfully looked up in the index - a locus on the query paired with a locus on the reference (this type of hit is merged into NAMs) I think choosing the name for the latter to be "match" makes sense because the M in NAM stands for "match". That is, we can then say that we merge multiple overlapping matches into non-overlapping, approximate matches (=NAMs). The flow would then be: hit -> match -> NAM.
ksahlin · Oct 1, 2024 · 7eccbe4 · 7eccbe4
1 parent 63e2715
commit 7eccbe4
Showing 1 changed file with 24 additions and 24 deletions.
diff --git a/src/nam.cpp b/src/nam.cpp
@@ -2,15 +2,15 @@
 
 namespace {
 
-struct Hit {
+struct Match {
     int query_start;
     int query_end;
     int ref_start;
     int ref_end;
 };
 
-inline void add_to_hits_per_ref(
-    robin_hood::unordered_map<unsigned int, std::vector<Hit>>& hits_per_ref,
+inline void add_to_matches_map(
+    robin_hood::unordered_map<unsigned int, std::vector<Match>>& matches_map,
     int query_start,
     int query_end,
     const StrobemerIndex& index,
@@ -22,22 +22,22 @@ inline void add_to_hits_per_ref(
         int ref_end = ref_start + index.strobe2_offset(position) + index.k();
         int diff = std::abs((query_end - query_start) - (ref_end - ref_start));
         if (diff <= min_diff) {
-            hits_per_ref[index.reference_index(position)].push_back(Hit{query_start, query_end, ref_start, ref_end});
+            matches_map[index.reference_index(position)].push_back(Match{query_start, query_end, ref_start, ref_end});
             min_diff = diff;
         }
     }
 }
 
-void merge_hits_into_nams(
-    robin_hood::unordered_map<unsigned int, std::vector<Hit>>& hits_per_ref,
+void merge_matches_into_nams(
+    robin_hood::unordered_map<unsigned int, std::vector<Match>>& matches_map,
     int k,
     bool sort,
     bool is_revcomp,
     std::vector<Nam>& nams  // inout
 ) {
-    for (auto &[ref_id, hits] : hits_per_ref) {
+    for (auto &[ref_id, matches] : matches_map) {
         if (sort) {
-            std::sort(hits.begin(), hits.end(), [](const Hit& a, const Hit& b) -> bool {
+            std::sort(matches.begin(), matches.end(), [](const Match& a, const Match& b) -> bool {
                     // first sort on query starts, then on reference starts
                     return (a.query_start < b.query_start) || ( (a.query_start == b.query_start) && (a.ref_start < b.ref_start) );
                 }
@@ -46,7 +46,7 @@ void merge_hits_into_nams(
 
         std::vector<Nam> open_nams;
         int prev_q_start = 0;
-        for (auto &h : hits) {
+        for (auto &h : matches) {
             bool is_added = false;
             for (auto & o : open_nams) {
 
@@ -77,7 +77,7 @@ void merge_hits_into_nams(
                 }
 
             }
-            // Add the hit to open matches
+            // Add to open matches
             if (!is_added){
                 Nam n;
                 n.query_start = h.query_start;
@@ -134,15 +134,15 @@ void merge_hits_into_nams(
     }
 }
 
-std::vector<Nam> merge_hits_into_nams_forward_and_reverse(
-    std::array<robin_hood::unordered_map<unsigned int, std::vector<Hit>>, 2>& hits_per_ref,
+std::vector<Nam> merge_matches_into_nams_forward_and_reverse(
+    std::array<robin_hood::unordered_map<unsigned int, std::vector<Match>>, 2>& matches_map,
     int k,
     bool sort
 ) {
     std::vector<Nam> nams;
     for (size_t is_revcomp = 0; is_revcomp < 2; ++is_revcomp) {
-        auto& hits_oriented = hits_per_ref[is_revcomp];
-        merge_hits_into_nams(hits_oriented, k, sort, is_revcomp, nams);
+        auto& hits_oriented = matches_map[is_revcomp];
+        merge_matches_into_nams(hits_oriented, k, sort, is_revcomp, nams);
     }
     return nams;
 }
@@ -159,9 +159,9 @@ std::tuple<float, int, std::vector<Nam>> find_nams(
     const QueryRandstrobeVector &query_randstrobes,
     const StrobemerIndex& index
 ) {
-    std::array<robin_hood::unordered_map<unsigned int, std::vector<Hit>>, 2> hits_per_ref;
-    hits_per_ref[0].reserve(100);
-    hits_per_ref[1].reserve(100);
+    std::array<robin_hood::unordered_map<unsigned int, std::vector<Match>>, 2> matches_map;
+    matches_map[0].reserve(100);
+    matches_map[1].reserve(100);
     int nr_good_hits = 0;
     int total_hits = 0;
     for (const auto &q : query_randstrobes) {
@@ -172,11 +172,11 @@ std::tuple<float, int, std::vector<Nam>> find_nams(
                 continue;
             }
             nr_good_hits++;
-            add_to_hits_per_ref(hits_per_ref[q.is_reverse], q.start, q.end, index, position);
+            add_to_matches_map(matches_map[q.is_reverse], q.start, q.end, index, position);
         }
     }
     float nonrepetitive_fraction = total_hits > 0 ? ((float) nr_good_hits) / ((float) total_hits) : 1.0;
-    auto nams = merge_hits_into_nams_forward_and_reverse(hits_per_ref, index.k(), false);
+    auto nams = merge_matches_into_nams_forward_and_reverse(matches_map, index.k(), false);
     return {nonrepetitive_fraction, nr_good_hits, nams};
 }
 
@@ -203,11 +203,11 @@ std::pair<int, std::vector<Nam>> find_nams_rescue(
         }
     };
 
-    std::array<robin_hood::unordered_map<unsigned int, std::vector<Hit>>, 2> hits_per_ref;
+    std::array<robin_hood::unordered_map<unsigned int, std::vector<Match>>, 2> matches_map;
     std::vector<RescueHit> hits_fw;
     std::vector<RescueHit> hits_rc;
-    hits_per_ref[0].reserve(100);
-    hits_per_ref[1].reserve(100);
+    matches_map[0].reserve(100);
+    matches_map[1].reserve(100);
     hits_fw.reserve(5000);
     hits_rc.reserve(5000);
 
@@ -234,14 +234,14 @@ std::pair<int, std::vector<Nam>> find_nams_rescue(
             if ((rh.count > rescue_cutoff && cnt >= 5) || rh.count > 1000) {
                 break;
             }
-            add_to_hits_per_ref(hits_per_ref[is_revcomp], rh.query_start, rh.query_end, index, rh.position);
+            add_to_matches_map(matches_map[is_revcomp], rh.query_start, rh.query_end, index, rh.position);
             cnt++;
             n_hits++;
         }
         is_revcomp++;
     }
 
-    return {n_hits, merge_hits_into_nams_forward_and_reverse(hits_per_ref, index.k(), true)};
+    return {n_hits, merge_matches_into_nams_forward_and_reverse(matches_map, index.k(), true)};
 }
 
 std::ostream& operator<<(std::ostream& os, const Nam& n) {