-
Notifications
You must be signed in to change notification settings - Fork 1
/
sam2bam
executable file
·68 lines (61 loc) · 2.76 KB
/
sam2bam
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/bin/bash
#PBS -l nodes=1:ppn=4
#### usage ####
usage() {
echo Program: "sam2bam (convert SAM to BAM format)"
echo Author: RTH, University of Copenhagen, Denmark
echo Version: 1.0
echo Contact: sachin@rth.dk
echo "Usage: sam2bam -i <file> -m <mode>"
echo "Options:"
echo " -i <file> [SAM (map) file]"
echo " -m <int> [mode to run]"
echo " [1: convert all reads in sam to bam (default)]"
echo " [2: convert all reads in sam to bam while also uncollapsing identical reads]"
echo " [3: convert only reads having mapping frequency equal to '-c' in sam to bam]"
echo " [4: convert only reads having mapping frequency equal to '-c' in sam to bam while also uncollapsing identical reads]"
echo " -c <int> [mapping frequency (default:1)]"
echo " -x [just filter the reads, do not convert to bam]"
echo " -h [help]"
echo
exit 0
}
MODE=1
MAPPING_FREQUENCY=1
#### parse options ####
while getopts i:m:c:xh ARG; do
case "$ARG" in
i) SAMFILE=$OPTARG;;
m) MODE=$OPTARG;;
c) MAPPING_FREQUENCY=$OPTARG;;
x) NOBAM=1;;
h) HELP=1;;
esac
done
## usage, if necessary file and directories are given/exist
if [ ! -f "$SAMFILE" -o "$HELP" ]; then
usage
fi
ID=`echo $SAMFILE | sed 's/\.sam//g'`;
if [ "$MODE" -eq 1 -a -z "$NOBAM" ]; then
samtools view -bS $SAMFILE > $ID.bam
elif [ "$MODE" -eq 2 ]; then
if [ -z "$NOBAM" ]; then
perl -ane 'if($_=~/^\@/) { print $_; } else { ($id,$expr)=split(/\|/,$F[0]); for($i=1;$i<=$expr;$i++) { $newid=$id."_$i"; $line=$_; $line=~s/\Q$F[0]\E/$newid/g; print "$line"; } }' $SAMFILE | samtools view -bS - > $ID.uncollapsed.bam
else
perl -ane 'if($_=~/^\@/) { print $_; } else { ($id,$expr)=split(/\|/,$F[0]); for($i=1;$i<=$expr;$i++) { $newid=$id."_$i"; $line=$_; $line=~s/\Q$F[0]\E/$newid/g; print "$line"; } }' $SAMFILE > $ID.uncollapsed.sam
fi
elif [ "$MODE" -eq 3 ]; then
if [ -z "$NOBAM" ]; then
grep -P "^\@|NH:i:[1-$MAPPING_FREQUENCY]{0,1}\s+" $SAMFILE | samtools view -bS - > $ID.uniq$MAPPING_FREQUENCY.bam
else
grep -P "^\@|NH:i:[1-$MAPPING_FREQUENCY]{0,1}\s+" $SAMFILE > $ID.uniq$MAPPING_FREQUENCY.sam
fi
elif [ "$MODE" -eq 4 ]; then
if [ -z "$NOBAM" ]; then
grep -P "^\@|NH:i:[1-$MAPPING_FREQUENCY]{0,1}\s+" $SAMFILE | perl -ane 'if($_=~/^\@/) { print $_; } else { ($id,$expr)=split(/\|/,$F[0]); for($i=1;$i<=$expr;$i++) { $newid=$id."_$i"; $line=$_; $line=~s/\Q$F[0]\E/$newid/g; print "$line"; } }' | samtools view -bS - > $ID.uniq$MAPPING_FREQUENCY.uncollapsed.bam
else
grep -P "^\@|NH:i:[1-$MAPPING_FREQUENCY]{0,1}\s+" $SAMFILE | perl -ane 'if($_=~/^\@/) { print $_; } else { ($id,$expr)=split(/\|/,$F[0]); for($i=1;$i<=$expr;$i++) { $newid=$id."_$i"; $line=$_; $line=~s/\Q$F[0]\E/$newid/g; print "$line"; } }' > $ID.uniq$MAPPING_FREQUENCY.uncollapsed.sam
fi
fi
exit