-
Notifications
You must be signed in to change notification settings - Fork 2
/
outputSJ.cpp
157 lines (138 loc) · 6.85 KB
/
outputSJ.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#include "ReadAlignChunk.h"
#include "Parameters.h"
#include "OutSJ.h"
#include <limits.h>
#include "ErrorWarning.h"
int compareUint(const void* i1, const void* i2) {//compare uint arrays
uint s1=*( (uint*)i1 );
uint s2=*( (uint*)i2 );
if (s1>s2) {
return 1;
} else if (s1<s2) {
return -1;
} else {
return 0;
};
};
void outputSJ(ReadAlignChunk** RAchunk, Parameters& P) {//collapses junctions from all therads/chunks; outputs junctions to file
// system("echo `date` ..... Writing splice junctions >> Log.timing.out");
Junction oneSJ(RAchunk[0]->mapGen);
char** sjChunks = new char* [P.runThreadN+1];
#define OUTSJ_limitScale 5
OutSJ allSJ (P.limitOutSJcollapsed*OUTSJ_limitScale,P,RAchunk[0]->mapGen);
if (P.outFilterBySJoutStage!=1) {//chunkOutSJ
for (int ic=0;ic<P.runThreadN;ic++) {//populate sjChunks with links to data
sjChunks[ic]=RAchunk[ic]->chunkOutSJ->data;
memset(sjChunks[ic]+RAchunk[ic]->chunkOutSJ->N*oneSJ.dataSize,255,oneSJ.dataSize);//mark the junction after last with big number
};
} else {//chunkOutSJ1
for (int ic=0;ic<P.runThreadN;ic++) {//populate sjChunks with links to data
sjChunks[ic]=RAchunk[ic]->chunkOutSJ1->data;
memset(sjChunks[ic]+RAchunk[ic]->chunkOutSJ1->N*oneSJ.dataSize,255,oneSJ.dataSize);//mark the junction after last with big number
};
};
while (true) {
int icOut=-1;//chunk from which the junction is output
for (int ic=0;ic<P.runThreadN;ic++) {//scan through all chunks, find the "smallest" junction
if ( *(uint*)(sjChunks[ic])<ULONG_MAX && (icOut==-1 ||compareSJ((void*) sjChunks[ic], (void*) sjChunks[icOut])<0 ) ) {
icOut=ic;
};
};
if (icOut<0) break; //no more junctions to output
for (int ic=0;ic<P.runThreadN;ic++) {//scan through all chunks, find the junctions equal to icOut-junction
if (ic!=icOut && compareSJ((void*) sjChunks[ic], (void*) sjChunks[icOut])==0) {
oneSJ.collapseOneSJ(sjChunks[icOut],sjChunks[ic],P);//collapse ic-junction into icOut
sjChunks[ic] += oneSJ.dataSize;//shift ic-chunk by one junction
};
};
//write out the junction
oneSJ.junctionPointer(sjChunks[icOut],0);//point to the icOut junction
//filter the junction
bool sjFilter;
sjFilter=*oneSJ.annot>0 \
|| ( ( *oneSJ.countUnique>=(uint) P.outSJfilterCountUniqueMin[(*oneSJ.motif+1)/2] \
|| (*oneSJ.countMultiple+*oneSJ.countUnique)>=(uint) P.outSJfilterCountTotalMin[(*oneSJ.motif+1)/2] )\
&& *oneSJ.overhangLeft >= (uint) P.outSJfilterOverhangMin[(*oneSJ.motif+1)/2] \
&& *oneSJ.overhangRight >= (uint) P.outSJfilterOverhangMin[(*oneSJ.motif+1)/2] \
&& ( (*oneSJ.countMultiple+*oneSJ.countUnique)>P.outSJfilterIntronMaxVsReadN.size() || *oneSJ.gap<=(uint) P.outSJfilterIntronMaxVsReadN[*oneSJ.countMultiple+*oneSJ.countUnique-1]) );
if (sjFilter) {//record the junction in all SJ
memcpy(allSJ.data+allSJ.N*oneSJ.dataSize,sjChunks[icOut],oneSJ.dataSize);
allSJ.N++;
if (allSJ.N == P.limitOutSJcollapsed*OUTSJ_limitScale ) {
ostringstream errOut;
errOut <<"EXITING because of fatal error: buffer size for SJ output is too small\n";
errOut <<"Solution: increase input parameter --limitOutSJcollapsed\n";
exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
};
};
sjChunks[icOut] += oneSJ.dataSize;//shift icOut-chunk by one junction
};
bool* sjFilter=new bool[allSJ.N];
if (P.outFilterBySJoutStage!=2) {
//filter non-canonical junctions that are close to canonical
uint* sjA = new uint [allSJ.N*3];
for (uint ii=0;ii<allSJ.N;ii++) {//scan through all junctions, filter by the donor ditance to a nearest donor, fill acceptor array
oneSJ.junctionPointer(allSJ.data,ii);
sjFilter[ii]=false;
uint x1=0, x2=-1;
if (ii>0) x1=*( (uint*)(allSJ.data+(ii-1)*oneSJ.dataSize) ); //previous junction donor
if (ii+1<allSJ.N) x2=*( (uint*)(allSJ.data+(ii+1)*oneSJ.dataSize) ); //next junction donor
uint minDist=min(*oneSJ.start-x1, x2-*oneSJ.start);
sjFilter[ii]= minDist >= (uint) P.outSJfilterDistToOtherSJmin[(*oneSJ.motif+1)/2];
sjA[ii*3]=*oneSJ.start+(uint)*oneSJ.gap;//acceptor
sjA[ii*3+1]=ii;
if (*oneSJ.annot==0) {
sjA[ii*3+2]=*oneSJ.motif;
} else {
sjA[ii*3+2]=SJ_MOTIF_SIZE+1;
};
};
qsort((void*) sjA, allSJ.N, sizeof(uint)*3, compareUint);
for (uint ii=0;ii<allSJ.N;ii++) {//
if (sjA[ii*3+2]==SJ_MOTIF_SIZE+1) {//no filtering for annotated junctions
sjFilter[sjA[ii*3+1]]=true;
} else {
uint x1=0, x2=-1;
if (ii>0) x1=sjA[ii*3-3]; //previous junction donor
if (ii+1<allSJ.N) x2=sjA[ii*3+3]; //next junction donor
uint minDist=min(sjA[ii*3]-x1, x2-sjA[ii*3]);
sjFilter[sjA[ii*3+1]] = sjFilter[sjA[ii*3+1]] && ( minDist >= (uint) P.outSJfilterDistToOtherSJmin[(sjA[ii*3+2]+1)/2] );
};
};
};
//output junctions
if (P.outFilterBySJoutStage!=1) {//output file
ofstream outSJfileStream((P.outFileNamePrefix+"SJ.out.tab").c_str());
for (uint ii=0;ii<allSJ.N;ii++) {//write to file
if ( P.outFilterBySJoutStage==2 || sjFilter[ii] ) {
oneSJ.junctionPointer(allSJ.data,ii);
oneSJ.outputStream(outSJfileStream);//write to file
};
};
outSJfileStream.close();
} else {//make sjNovel array in P
P.sjNovelN=0;
for (uint ii=0;ii<allSJ.N;ii++)
{//count novel junctions
if (sjFilter[ii])
{//only those passing filter
oneSJ.junctionPointer(allSJ.data,ii);
if (*oneSJ.annot==0) P.sjNovelN++;
};
};
P.sjNovelStart = new uint [P.sjNovelN];
P.sjNovelEnd = new uint [P.sjNovelN];
P.inOut->logMain <<"Detected " <<P.sjNovelN<<" novel junctions that passed filtering, will proceed to filter reads that contained unannotated junctions"<<endl;
uint isj=0;
for (uint ii=0;ii<allSJ.N;ii++) {//write to file
if (sjFilter[ii]) {
oneSJ.junctionPointer(allSJ.data,ii);
if (*oneSJ.annot==0) {//unnnotated only
P.sjNovelStart[isj]=*oneSJ.start;
P.sjNovelEnd[isj]=*oneSJ.start+(uint)(*oneSJ.gap)-1;
isj++;
};
};
};
};
};