Skip to content
This repository has been archived by the owner on Jan 24, 2018. It is now read-only.

Commit

Permalink
Merge pull request #243 from dcolligan/bam2bam
Browse files Browse the repository at this point in the history
Add datadriven BAM files
  • Loading branch information
jeromekelleher committed Mar 16, 2015
2 parents d648e43 + d907df2 commit 182543f
Show file tree
Hide file tree
Showing 9 changed files with 110 additions and 31 deletions.
21 changes: 21 additions & 0 deletions scripts/bam2bam.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""
Convert a BAM file to a small BAM file
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import utils


@utils.Timed()
def main():
tool = utils.AlignmentFileTool(
utils.AlignmentFileConstants.BAM,
utils.AlignmentFileConstants.BAM)
tool.parseArgs()
tool.convert()


if __name__ == '__main__':
main()
36 changes: 5 additions & 31 deletions scripts/bam2sam.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,42 +5,16 @@
from __future__ import print_function
from __future__ import unicode_literals

import argparse

import pysam

import utils


def parseArgs():
parser = argparse.ArgumentParser(
description="BAM to SAM conversion tool")
parser.add_argument(
"inputFile", help="the name of the BAM file to read")
parser.add_argument(
"--outputFile", "-o", default='out.sam',
help="the name of the SAM file to write")
parser.add_argument(
"--numLines", "-n", default=10,
help="the number of lines to write")
args = parser.parse_args()
return args


def bam2sam(args):
bam = pysam.AlignmentFile(args.inputFile, "rb")
sam = pysam.AlignmentFile(args.outputFile, "wh", header=bam.header)
for _ in xrange(args.numLines):
alignedSegment = bam.next()
sam.write(alignedSegment)
bam.close()
sam.close()


@utils.Timed()
def main():
args = parseArgs()
bam2sam(args)
tool = utils.AlignmentFileTool(
utils.AlignmentFileConstants.BAM,
utils.AlignmentFileConstants.SAM)
tool.parseArgs()
tool.convert()


if __name__ == '__main__':
Expand Down
78 changes: 78 additions & 0 deletions scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import print_function
from __future__ import unicode_literals

import argparse
import functools
import os
import shlex
Expand All @@ -15,6 +16,7 @@
import humanize
import requests
import yaml
import pysam


def log(message):
Expand Down Expand Up @@ -130,3 +132,79 @@ def getYamlDocument(filePath):
with open(filePath) as stream:
doc = yaml.load(stream)
return doc


class AlignmentFileConstants(object):
"""
A container class for constants dealing with alignment files
"""
SAM = "SAM"
BAM = "BAM"
BAI = "BAI"


class AlignmentFileTool(object):
"""
Helps with operations on BAM and SAM files
"""
def __init__(self, inputFileFormat, outputFileFormat):
self.inputFileFormat = inputFileFormat
self.outputFileFormat = outputFileFormat
self.args = None

def parseArgs(self):
description = "{} to {} conversion tool".format(
self.inputFileFormat, self.outputFileFormat)
parser = argparse.ArgumentParser(
description=description)
inputHelpText = "the name of the {} file to read".format(
self.inputFileFormat)
parser.add_argument(
"inputFile", help=inputHelpText)
outputHelpText = "the name of the {} file to write".format(
self.outputFileFormat)
defaultOutputFilePath = "out.{}".format(
self.outputFileFormat.lower())
parser.add_argument(
"--outputFile", "-o", default=defaultOutputFilePath,
help=outputHelpText)
parser.add_argument(
"--numLines", "-n", default=10,
help="the number of lines to write")
parser.add_argument(
"--skipIndexing", default=False, action='store_true',
help="don't create an index file")
args = parser.parse_args()
self.args = args

def convert(self):
# set flags
if self.inputFileFormat == AlignmentFileConstants.SAM:
inputFlags = "r"
elif self.inputFileFormat == AlignmentFileConstants.BAM:
inputFlags = "rb"
if self.outputFileFormat == AlignmentFileConstants.SAM:
outputFlags = "wh"
elif self.outputFileFormat == AlignmentFileConstants.BAM:
outputFlags = "wb"
# open files
inputFile = pysam.AlignmentFile(
self.args.inputFile, inputFlags)
outputFile = pysam.AlignmentFile(
self.args.outputFile, outputFlags, header=inputFile.header)
outputFilePath = outputFile.filename
log("Creating alignment file '{}'".format(outputFilePath))
# write new file
for _ in xrange(self.args.numLines):
alignedSegment = inputFile.next()
outputFile.write(alignedSegment)
# clean up
inputFile.close()
outputFile.close()
# create index file
if (not self.args.skipIndexing and
self.outputFileFormat == AlignmentFileConstants.BAM):
indexFilePath = "{}.{}".format(
outputFilePath, AlignmentFileConstants.BAI.lower())
log("Creating index file '{}'".format(indexFilePath))
runCommand("samtools index {}".format(outputFilePath))
5 changes: 5 additions & 0 deletions tests/data/reads/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Files in the directories wgBam and wgSam are taken from here:

http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeUwRepliSeq/

And shortened / converted with tools in the scripts/ directory.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions tests/datadriven/test_reads.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,5 @@ def getProtocolClass(self):

def testFixMe(self):
readGroupSet = self._gaObject
# TODO
self.assertIsNotNone(readGroupSet)

0 comments on commit 182543f

Please sign in to comment.