Skip to content

Commit

Permalink
Merge pull request #56 from martinghunt/fix_looks_like_gene
Browse files Browse the repository at this point in the history
Fix looks like gene
  • Loading branch information
John Tate committed Jan 4, 2016
2 parents 7837ae4 + b4b7a22 commit ef627f5
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 7 deletions.
2 changes: 1 addition & 1 deletion pyfastaq/common.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = '3.11.0'
version = '3.11.1'
20 changes: 15 additions & 5 deletions pyfastaq/sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,11 @@ def contig_coords(self):
return [intervals.Interval(coords[i], coords[i+1]) for i in range(0, len(coords)-1,2)]




def orfs(self, frame=0, revcomp=False):
'''Returns a list of ORFs that the sequence has, starting on the given
frame. Each returned ORF is an interval.Interval object.
If revomp=True, then finds the ORFs of the reverse complement
of the sequence.'''
assert frame in [0,1,2]
if revcomp:
self.revcomp()
Expand All @@ -314,6 +316,11 @@ def orfs(self, frame=0, revcomp=False):


def all_orfs(self, min_length=300):
'''Finds all open reading frames in the sequence, that are at least as
long as min_length. Includes ORFs on the reverse strand.
Returns a list of ORFs, where each element is a tuple:
(interval.Interval, bool)
where bool=True means on the reverse strand'''
orfs = []
for frame in [0,1,2]:
for revcomp in [False, True]:
Expand All @@ -335,10 +342,13 @@ def is_complete_orf(self):
return False


def looks_like_gene(self, translation_table=1):
def looks_like_gene(self):
'''Returns true iff: length >=6, length is a multiple of 3, first codon is start, last codon is a stop and has no other stop codons'''
return self.is_complete_orf() and len(self) >= 6 and len(self) %3 == 0 and self.seq[0:3] in genetic_codes.starts[genetic_code]

return self.is_complete_orf() \
and len(self) >= 6 \
and len(self) %3 == 0 \
and self.seq[0:3].upper() in genetic_codes.starts[genetic_code]


# Fills the object with the next sequence in the file. Returns
# True if this was successful, False if no more sequences in the file.
Expand Down
6 changes: 6 additions & 0 deletions pyfastaq/tests/sequences_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ def test_looks_like_gene(self):
tests = [
(sequences.Fasta('ID', 'TTT'), False),
(sequences.Fasta('ID', 'TTGTAA'), True),
(sequences.Fasta('ID', 'ttgTAA'), True),
(sequences.Fasta('ID', 'TTGTTTTAA'), True),
(sequences.Fasta('ID', 'TTGTAATTTTAA'), False),
(sequences.Fasta('ID', 'TTGTTTTGAA'), False),
Expand All @@ -260,6 +261,11 @@ def test_looks_like_gene(self):
for t in tests:
self.assertEqual(t[0].looks_like_gene(), t[1])

sequences.genetic_code = 1
self.assertFalse(sequences.Fasta('ID', 'ATTCAGTAA').looks_like_gene())
sequences.genetic_code = 11
self.assertTrue(sequences.Fasta('ID', 'ATTCAGTAA').looks_like_gene())


def test_is_all_Ns(self):
'''Test is_all_Ns()'''
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name='pyfastaq',
version='3.11.0',
version='3.11.1',
description='Script to manipulate FASTA and FASTQ files, plus API for developers',
packages = find_packages(),
author='Martin Hunt',
Expand Down

0 comments on commit ef627f5

Please sign in to comment.