Skip to content

Commit

Permalink
Added task fastaq_to_orfs_gff
Browse files Browse the repository at this point in the history
  • Loading branch information
martinghunt committed Apr 8, 2014
1 parent 1512f5b commit 2dc4c9e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 0 deletions.
17 changes: 17 additions & 0 deletions fastaq/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,23 @@ def fastaq_to_mira_xml(infile, outfile):
utils.close(fout)


def fastaq_to_orfs_gff(infile, outfile, min_length=300):
seq_reader = sequences.file_reader(infile)
fout = utils.open_file_write(outfile)
for seq in seq_reader:
print(seq.id)
orfs = seq.all_orfs(min_length=min_length)
for coords, revcomp in orfs:
if revcomp:
strand = '-'
else:
strand = '+'

print(seq.id, 'fastaq', 'CDS', coords.start+1, coords.end+1, '.', strand, '.', sep='\t', file=fout)

utils.close(fout)


def file_to_dict(infile, d):
seq_reader = sequences.file_reader(infile)
for seq in seq_reader:
Expand Down
15 changes: 15 additions & 0 deletions fastaq/tests/data/sequences_test_orfs.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
1 fastaq CDS 28 222 . + .
1 fastaq CDS 45 227 . + .
1 fastaq CDS 49 171 . - .
1 fastaq CDS 110 241 . + .
1 fastaq CDS 144 266 . - .
1 fastaq CDS 228 422 . + .
1 fastaq CDS 278 433 . - .
1 fastaq CDS 287 478 . + .
1 fastaq CDS 289 519 . - .
1 fastaq CDS 563 703 . + .
1 fastaq CDS 601 759 . + .
1 fastaq CDS 606 818 . + .
1 fastaq CDS 819 938 . + .
1 fastaq CDS 836 988 . + .
1 fastaq CDS 865 999 . + .
9 changes: 9 additions & 0 deletions fastaq/tests/tasks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ def test_fastaq_to_mira_xml(self):
os.unlink(tmp)


class TestFastaqToOrfsGFF(unittest.TestCase):
def test_fastaq_to_orfs_gff(self):
'''Test fastaq_to_orfs_gff'''
outfile = 'tmp.orfs.gff'
tasks.fastaq_to_orfs_gff(os.path.join(data_dir, 'sequences_test_orfs.fa'), outfile, min_length=120)
self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_orfs.gff'), outfile, shallow=False))
os.unlink(outfile)


class TestFilter(unittest.TestCase):
def test_length_filter(self):
'''Check that filtering by length works as expected'''
Expand Down

0 comments on commit 2dc4c9e

Please sign in to comment.