Skip to content

Commit

Permalink
Merged python3 into master, fixing #93 and #120
Browse files Browse the repository at this point in the history
  • Loading branch information
AlphaSquad committed Dec 13, 2021
2 parents 231a9dd + 97e94d2 commit 900897e
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 12 deletions.
7 changes: 7 additions & 0 deletions defaults/default_config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ readsim=tools/art_illumina-2.3.6/art_illumina
#for ART:
#HiSeq 150bp: hi150
#MBARC-26 150bp: mbarc
#custom profile (see below): own
#for wgsim:
#error rate as <float> (e.g. 0.05 for 5% error rate)
#blank for nanosim and wgsim
Expand All @@ -52,6 +53,12 @@ profile=mbarc
# Directory containing error profiles (can be blank for wgsim)
error_profiles=tools/art_illumina-2.3.6/profiles/

# For supplying custom error profiles with "own" option:
# path to error profile files (without "[1/2].txt")
base_profile_name=
# read length for custom error profile
profile_read_length=

#paired end read, insert size (not applicable for nanosim)
fragments_size_mean=270
fragment_size_standard_deviation=27
Expand Down
28 changes: 20 additions & 8 deletions metagenomesimulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,14 +336,26 @@ def _simulate_reads(self, file_path_distribution, sample_index):
tmp_dir=self._project_file_folder_handler.get_tmp_wd())

file_path_genome_locations = self._project_file_folder_handler.get_genome_location_file_path()
simulator.simulate(
file_path_distribution=file_path_distribution,
file_path_genome_locations=file_path_genome_locations,
directory_output=directory_output_tmp,
total_size=self._sample_size_in_base_pairs,
profile=self._error_profile,
fragment_size_mean=self._fragments_size_mean_in_bp,
fragment_size_standard_deviation=self._fragment_size_standard_deviation_in_bp)
if self._read_simulator_type == "art":
simulator.simulate(
file_path_distribution=file_path_distribution,
file_path_genome_locations=file_path_genome_locations,
directory_output=directory_output_tmp,
total_size=self._sample_size_in_base_pairs,
profile=self._error_profile,
fragment_size_mean=self._fragments_size_mean_in_bp,
fragment_size_standard_deviation=self._fragment_size_standard_deviation_in_bp,
profile_filename=self._custom_profile_filename,
own_read_length=self._custom_readlength)
else:
simulator.simulate(
file_path_distribution=file_path_distribution,
file_path_genome_locations=file_path_genome_locations,
directory_output=directory_output_tmp,
total_size=self._sample_size_in_base_pairs,
profile=self._error_profile,
fragment_size_mean=self._fragments_size_mean_in_bp,
fragment_size_standard_deviation=self._fragment_size_standard_deviation_in_bp)

# convert sam to bam
samtools = SamtoolsWrapper(
Expand Down
34 changes: 30 additions & 4 deletions scripts/ReadSimulationWrapper/readsimulationwrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,7 +791,8 @@ def __init__(self, file_path_executable, directory_error_profiles, **kwargs):

def simulate(
self, file_path_distribution, file_path_genome_locations, directory_output,
total_size, profile, fragment_size_mean, fragment_size_standard_deviation):
total_size, profile, fragment_size_mean, fragment_size_standard_deviation,
profile_filename=None, own_read_length=None):
"""
Simulate reads based on a given sample distribution
Expand All @@ -802,13 +803,17 @@ def simulate(
@param directory_output: Directory for the sam and fastq files output
@type directory_output: str | unicode
@param total_size: Size of sample in base pairs
@type total_size: int
@param profile: Art illumina error profile: 'low', 'mi', 'hi', 'hi150'
@type total_size: int | long
@param profile: Art illumina error profile: 'low', 'mi', 'hi', 'hi150', 'own'
@type profile: str | unicode
@param fragment_size_mean: Size of the fragment of which the ends are used as reads in base pairs
@type fragment_size_mean: int
@param fragment_size_standard_deviation: Standard deviation of the fragment size in base pairs.
@type fragment_size_standard_deviation: int
@type fragment_size_standard_deviation: int | long
@param profile_filename: Optional base name of user-supplied error profile files (without "[1/2].txt").
@type profile_filename: str | unicode | None
@param own_read_length: Optional read length for user-supplied error profile.
@type own_read_length: int | long | None
"""
assert isinstance(total_size, (float, int)), "Expected natural digit"
assert isinstance(fragment_size_mean, int), "Expected natural digit"
Expand All @@ -817,6 +822,27 @@ def simulate(
assert fragment_size_mean > 0, "Mean fragments size needs to be a positive number"
assert fragment_size_standard_deviation > 0, "Fragment size standard deviation needs to be a positive number"
assert self.validate_dir(directory_output)
# if user specifies own profile, add corresponding parameters
if profile == "own":
# sanity checks
assert own_read_length, "Read length must be given when supplying own profile"
assert isinstance(own_read_length, (int, long)), "Expected natural digit for read length"
assert own_read_length > 0, "Read length must be a positive number"
assert profile_filename, "Profile filename must be given when supplying own profile"
# sanity check file name
legal_for_filename = string.ascii_letters + string.digits + '_-./\\'
assert self.validate_characters(profile_filename, legal_alphabet=legal_for_filename)
# check if supplied files are present
own_filenames = [
profile_filename+file_end
for file_end in ['1.txt', '2.txt']
]
#assert self.validate_dir(self._directory_error_profiles, file_names=own_filenames)
for own_file in own_filenames:
assert self.validate_file(own_file)
# add user-supplied profiles
self._art_error_profiles["own"] = profile_filename
self._art_read_length["own"] = own_read_length
if profile is not None:
assert profile in self._art_error_profiles, "Unknown art illumina profile: '{}'".format(profile)
assert profile in self._art_read_length, "Unknown art illumina profile: '{}'".format(profile)
Expand Down
8 changes: 8 additions & 0 deletions scripts/configfilehandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ def _read_config(self, file_path_config):

if self._error_profile is None:
self._error_profile = self._config.get_value("profile", silent=True)

if self._custom_profile_filename is None:
self._custom_profile_filename = self._config.get_value("base_profile_name", silent=True)

if self._custom_readlength is None:
self._custom_readlength = self._config.get_value("profile_read_length", is_digit=True, silent=True)

if self._fragment_size_standard_deviation_in_bp is None:
self._fragment_size_standard_deviation_in_bp = self._config.get_value(
Expand Down Expand Up @@ -203,6 +209,8 @@ def _stream_read_simulator(self, output_stream=sys.stdout):
output_stream.write("error_profiles={}\n".format(self._directory_error_profiles or ""))
output_stream.write("samtools={}\n".format(self._executable_samtools))
output_stream.write("profile={}\n".format(self._error_profile))
output_stream.write("base_profile_name={}\n".format(self._custom_profile_filename or ""))
output_stream.write("profile_read_length={}\n".format(self._custom_readlength or ""))
output_stream.write("size={}\n".format(self._sample_size_in_base_pairs/self._base_pairs_multiplication_factor))
output_stream.write("type={}\n".format(self._read_simulator_type))
output_stream.write("fragments_size_mean={}\n".format(self._fragments_size_mean_in_bp))
Expand Down
2 changes: 2 additions & 0 deletions scripts/defaultvalues.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class DefaultValues(DefaultLogging):

_read_simulator_type = None
_error_profile = None
_custom_profile_filename = None
_custom_readlength = None
_fragment_size_standard_deviation_in_bp = None
_fragments_size_mean_in_bp = None

Expand Down

0 comments on commit 900897e

Please sign in to comment.