diff --git a/pyfastaq/common.py b/pyfastaq/common.py index 9b9d5ab..63b3d84 100644 --- a/pyfastaq/common.py +++ b/pyfastaq/common.py @@ -1 +1 @@ -version = '3.9.0' +version = '3.10.0' diff --git a/pyfastaq/tasks.py b/pyfastaq/tasks.py index 41755c6..e5e1b42 100644 --- a/pyfastaq/tasks.py +++ b/pyfastaq/tasks.py @@ -2,7 +2,6 @@ import sys import copy import random -import numpy from pyfastaq import sequences, utils, caf class Error (Exception): pass @@ -404,62 +403,6 @@ def make_random_contigs(contigs, length, outfile, name_by_letters=False, prefix= utils.close(fout) -def make_long_reads(infile, outfile, method='tiling', fixed_read_length=20000, tile_step=10000, gamma_shape=1.2, gamma_scale=6000, coverage=10, gamma_min_length=20000, seed=None, ins_skip=None, ins_window=None,): - assert method in ['tiling', 'gamma', 'uniform'] - assert ins_skip == ins_window == None or None not in [ins_skip, ins_window] - if seed is not None: - random.seed(a=seed) - seq_reader = sequences.file_reader(infile) - f = utils.open_file_write(outfile) - - for seq in seq_reader: - if method == 'tiling': - if len(seq) < fixed_read_length: - print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr) - continue - for i in range(0, len(seq), tile_step): - end = min(len(seq), i + fixed_read_length) - fa = sequences.Fasta('_'.join([seq.id, str(i + 1), str(end)]), seq[i:end]) - if ins_skip: - fa.add_insertions(skip=ins_skip, window=ins_window) - print(fa, file=f) - if end >= len(seq): - break - elif method == 'gamma': - if len(seq) < gamma_min_length: - print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr) - continue - total_read_length = 0 - while total_read_length < coverage * len(seq) - 0.5 * gamma_min_length: - read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale)) - while read_length < gamma_min_length or read_length > len(seq): - read_length = int(numpy.random.gamma(gamma_shape, scale=gamma_scale)) - - start = random.randint(0, len(seq) - read_length) - end = start + read_length - 1 - fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1]) - total_read_length += len(fa) - if ins_skip: - fa.add_insertions(skip=ins_skip, window=ins_window) - print(fa, file=f) - elif method == 'uniform': - if len(seq) < fixed_read_length: - print('Skipping sequence', seq.id, 'because it is too short at', len(seq), 'bases', file=sys.stderr) - continue - total_read_length = 0 - while total_read_length < coverage * len(seq) - 0.5 * fixed_read_length: - start = random.randint(0, len(seq) - fixed_read_length) - end = start + fixed_read_length - 1 - fa = sequences.Fasta('_'.join([seq.id, str(start + 1), str(end + 1)]), seq[start:end+1]) - total_read_length += len(fa) - if ins_skip: - fa.add_insertions(skip=ins_skip, window=ins_window) - print(fa, file=f) - - - utils.close(f) - - def mean_length(infile, limit=None): '''Returns the mean length of the sequences in the input file. By default uses all sequences. To limit to the first N sequences, use limit=N''' total = 0 diff --git a/pyfastaq/tests/tasks_test.py b/pyfastaq/tests/tasks_test.py index 0bae920..3d157ea 100644 --- a/pyfastaq/tests/tasks_test.py +++ b/pyfastaq/tests/tasks_test.py @@ -271,15 +271,6 @@ def files_are_equal(file1, file2): os.unlink(tmp) -class TestMakeLongReads(unittest.TestCase): - def test_tiling_reads(self): - tmp = 'tmp.out.fa' - fa_in = os.path.join(data_dir, 'tasks_test_make_long_reads.input.fa') - tasks.make_long_reads(fa_in, tmp, method='tiling', fixed_read_length=10, tile_step=5) - self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'tasks_test_make_long_reads.output.fa'), tmp, shallow=False)) - os.unlink(tmp) - - class TestMeanLength(unittest.TestCase): def test_mean_length(self): '''Test mean_length''' diff --git a/setup.py b/setup.py index eca24c8..012b341 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setup( name='pyfastaq', - version='3.9.0', + version='3.10.0', description='Script to manipulate FASTA and FASTQ files, plus API for developers', packages = find_packages(), author='Martin Hunt', @@ -13,7 +13,6 @@ scripts=glob.glob('scripts/*'), test_suite='nose.collector', tests_require=['nose >= 1.3'], - install_requires=['numpy >= 1.7.1'], license='GPLv3', classifiers=[ 'Development Status :: 4 - Beta',