-
Notifications
You must be signed in to change notification settings - Fork 2
/
ReadAlign_maxMappableLength2strands.cpp
101 lines (89 loc) · 4.13 KB
/
ReadAlign_maxMappableLength2strands.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#include "ReadAlign.h"
#include "SuffixArrayFuns.h"
#include "ErrorWarning.h"
uint ReadAlign::maxMappableLength2strands(uint pieceStartIn, uint pieceLengthIn, uint iDir, uint iSA1, uint iSA2, uint& maxLbest, uint iFrag) {
//returns number of mappings, maxMappedLength=mapped length
uint Nrep=0, indStartEnd[2], maxL;
uint NrepAll[P.pGe.gSAsparseD], indStartEndAll[P.pGe.gSAsparseD][2], maxLall[P.pGe.gSAsparseD];
maxLbest=0;
bool dirR = iDir==0;
for (uint iDist=0; iDist<min(pieceLengthIn,P.pGe.gSAsparseD); iDist++) {//cycle through different distances
uint pieceStart;
uint pieceLength=pieceLengthIn-iDist;
//calculate full index
uint Lmax=min(P.pGe.gSAindexNbases,pieceLength);
uint ind1=0;
if (dirR) {//forward search
pieceStart=pieceStartIn+iDist;
for (uint ii=0;ii<Lmax;ii++) {//calculate index TODO: make the index calculation once for the whole read and store it
ind1 <<=2LLU;
ind1 += ((uint) Read1[0][pieceStart+ii]);
};
} else {//reverse search
pieceStart=pieceStartIn-iDist;
for (uint ii=0;ii<Lmax;ii++) {//calculate index TODO: make the index calculation once for the whole read and store it
ind1 <<=2LLU;
ind1 += ( 3-((uint) Read1[0][pieceStart-ii]) );
};
};
//find SA boundaries
uint Lind=Lmax;
while (Lind>0) {//check the precense of the prefix for Lind
iSA1=mapGen.SAi[mapGen.genomeSAindexStart[Lind-1]+ind1];
if ((iSA1 & mapGen.SAiMarkAbsentMaskC) == 0) {//prefix exists
break;
} else {//this prefix does not exist, reduce Lind
--Lind;
ind1 = ind1 >> 2;
};
};
if (mapGen.genomeSAindexStart[Lind-1]+ind1+1 < mapGen.genomeSAindexStart[Lind]) {//we are not at the end of the SA
iSA2=((mapGen.SAi[mapGen.genomeSAindexStart[Lind-1]+ind1+1] & mapGen.SAiMarkNmask) & mapGen.SAiMarkAbsentMask) - 1;
} else {
iSA2=mapGen.nSA-1;
};
//#define SA_SEARCH_FULL
#ifdef SA_SEARCH_FULL
//full search of the array even if the index search gave maxL
maxL=0;
Nrep = maxMappableLength(mapGen, Read1, pieceStart, pieceLength, iSA1 & mapGen.SAiMarkNmask, iSA2, dirR, maxL, indStartEnd);
#else
if (Lind < P.pGe.gSAindexNbases && (iSA1 & mapGen.SAiMarkNmaskC)==0 ) {//no need for SA search
indStartEnd[0]=iSA1;
indStartEnd[1]=iSA2;
Nrep=indStartEnd[1]-indStartEnd[0]+1;
maxL=Lind;
} else if (iSA1==iSA2) {//unique align already, just find maxL
if ((iSA1 & mapGen.SAiMarkNmaskC)!=0) {
ostringstream errOut;
errOut << "BUG: in ReadAlign::maxMappableLength2strands";
exitWithError(errOut.str(), std::cerr, P.inOut->logMain, EXIT_CODE_BUG, P);
};
indStartEnd[0]=indStartEnd[1]=iSA1;
Nrep=1;
bool comparRes;
maxL=compareSeqToGenome(mapGen, Read1, pieceStart, pieceLength, Lind, iSA1, dirR, comparRes);
} else {//SA search, pieceLength>maxL
if ( (iSA1 & mapGen.SAiMarkNmaskC)==0 ) {//no N in the prefix
maxL=Lind;
} else {
maxL=0;
};
Nrep = maxMappableLength(mapGen, Read1, pieceStart, pieceLength, iSA1 & mapGen.SAiMarkNmask, iSA2, dirR, maxL, indStartEnd);
};
#endif
if (maxL+iDist > maxLbest) {//this idist is better
maxLbest=maxL+iDist;
};
NrepAll[iDist]=Nrep;
indStartEndAll[iDist][0]=indStartEnd[0];
indStartEndAll[iDist][1]=indStartEnd[1];
maxLall[iDist]=maxL;
};
for (uint iDist=0; iDist<min(pieceLengthIn,P.pGe.gSAsparseD); iDist++) {//cycle through different distances, store the ones with largest maxL
if ( (maxLall[iDist]+iDist) == maxLbest) {
storeAligns(iDir, (dirR ? pieceStartIn+iDist : pieceStartIn-iDist), NrepAll[iDist], maxLall[iDist], indStartEndAll[iDist], iFrag);
};
};
return Nrep;
};