Skip to content

Commit

Permalink
Merge pull request #51 from martinghunt/remove_numpy
Browse files Browse the repository at this point in the history
Remove numpy
  • Loading branch information
John Tate committed Oct 16, 2015
2 parents 69d83f6 + 924a9eb commit 35db6ca
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 69 deletions.
2 changes: 1 addition & 1 deletion pyfastaq/common.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = '3.9.0'
version = '3.10.0'
57 changes: 0 additions & 57 deletions pyfastaq/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import sys
import copy
import random
import numpy
from pyfastaq import sequences, utils, caf

class Error (Exception): pass
Expand Down Expand Up @@ -404,62 +403,6 @@ def make_random_contigs(contigs, length, outfile, name_by_letters=False, prefix=
utils.close(fout)


def make_long_reads(infile, outfile, method='tiling', fixed_read_length=20000, tile_step=10000, gamma_shape=1.2, gamma_scale=6000, coverage=10, gamma_min_length=20000, seed=None, ins_skip=None, ins_window=None,):
assert method in ['tiling', 'gamma', 'uniform']
assert ins_skip == ins_window == None or None not in [ins_skip, ins_window]
if seed is not None:
random.seed(a=seed)
seq_reader = sequences.file_reader(infile)
f = utils.open_file_write(outfile)

for seq in seq_reader:
if method == 'tiling':
if len(seq) < fixed_read_length:
print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr)
continue
for i in range(0, len(seq), tile_step):
end = min(len(seq), i + fixed_read_length)
fa = sequences.Fasta('_'.join([seq.id, str(i + 1), str(end)]), seq[i:end])
if ins_skip:
fa.add_insertions(skip=ins_skip, window=ins_window)
print(fa, file=f)
if end >= len(seq):
break
elif method == 'gamma':
if len(seq) < gamma_min_length:
print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr)
continue
total_read_length = 0
while total_read_length < coverage * len(seq) - 0.5 * gamma_min_length:
read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale))
while read_length < gamma_min_length or read_length > len(seq):
read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale))

start = random.randint(0, len(seq) - read_length)
end = start + read_length - 1
fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1])
total_read_length += len(fa)
if ins_skip:
fa.add_insertions(skip=ins_skip, window=ins_window)
print(fa, file=f)
elif method == 'uniform':
if len(seq) < fixed_read_length:
print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr)
continue
total_read_length = 0
while total_read_length < coverage * len(seq) - 0.5 * fixed_read_length:
start = random.randint(0, len(seq) - fixed_read_length)
end = start + fixed_read_length - 1
fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1])
total_read_length += len(fa)
if ins_skip:
fa.add_insertions(skip=ins_skip, window=ins_window)
print(fa, file=f)


utils.close(f)


def mean_length(infile, limit=None):
'''Returns the mean length of the sequences in the input file. By default uses all sequences. To limit to the first N sequences, use limit=N'''
total = 0
Expand Down
9 changes: 0 additions & 9 deletions pyfastaq/tests/tasks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,15 +271,6 @@ def files_are_equal(file1, file2):
os.unlink(tmp)


class TestMakeLongReads(unittest.TestCase):
def test_tiling_reads(self):
tmp = 'tmp.out.fa'
fa_in = os.path.join(data_dir, 'tasks_test_make_long_reads.input.fa')
tasks.make_long_reads(fa_in, tmp, method='tiling', fixed_read_length=10, tile_step=5)
self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'tasks_test_make_long_reads.output.fa'), tmp, shallow=False))
os.unlink(tmp)


class TestMeanLength(unittest.TestCase):
def test_mean_length(self):
'''Test mean_length'''
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

setup(
name='pyfastaq',
version='3.9.0',
version='3.10.0',
description='Script to manipulate FASTA and FASTQ files, plus API for developers',
packages = find_packages(),
author='Martin Hunt',
Expand All @@ -13,7 +13,6 @@
scripts=glob.glob('scripts/*'),
test_suite='nose.collector',
tests_require=['nose >= 1.3'],
install_requires=['numpy >= 1.7.1'],
license='GPLv3',
classifiers=[
'Development Status :: 4 - Beta',
Expand Down

0 comments on commit 35db6ca

Please sign in to comment.