Merged python3 into master, fixing #93 and #120

CAMI-challenge · Dec 13, 2021 · 900897e · 900897e
2 parents 231a9dd + 97e94d2
commit 900897e
Show file tree

Hide file tree

Showing 5 changed files with 67 additions and 12 deletions.
diff --git a/defaults/default_config.ini b/defaults/default_config.ini
@@ -44,6 +44,7 @@ readsim=tools/art_illumina-2.3.6/art_illumina
 #for ART:
 #HiSeq 150bp: hi150
 #MBARC-26 150bp: mbarc
+#custom profile (see below): own
 #for wgsim:
 #error rate as <float> (e.g. 0.05 for 5% error rate)
 #blank for nanosim and wgsim
@@ -52,6 +53,12 @@ profile=mbarc
 # Directory containing error profiles (can be blank for wgsim)
 error_profiles=tools/art_illumina-2.3.6/profiles/
 
+# For supplying custom error profiles with "own" option:
+# path to error profile files (without "[1/2].txt")
+base_profile_name=
+# read length for custom error profile
+profile_read_length=
+
 #paired end read, insert size (not applicable for nanosim)
 fragments_size_mean=270
 fragment_size_standard_deviation=27

diff --git a/metagenomesimulation.py b/metagenomesimulation.py
@@ -336,14 +336,26 @@ def _simulate_reads(self, file_path_distribution, sample_index):
             tmp_dir=self._project_file_folder_handler.get_tmp_wd())
 
         file_path_genome_locations = self._project_file_folder_handler.get_genome_location_file_path()
-        simulator.simulate(
-            file_path_distribution=file_path_distribution,
-            file_path_genome_locations=file_path_genome_locations,
-            directory_output=directory_output_tmp,
-            total_size=self._sample_size_in_base_pairs,
-            profile=self._error_profile,
-            fragment_size_mean=self._fragments_size_mean_in_bp,
-            fragment_size_standard_deviation=self._fragment_size_standard_deviation_in_bp)
+        if self._read_simulator_type == "art":
+            simulator.simulate(
+                file_path_distribution=file_path_distribution,
+                file_path_genome_locations=file_path_genome_locations,
+                directory_output=directory_output_tmp,
+                total_size=self._sample_size_in_base_pairs,
+                profile=self._error_profile,
+                fragment_size_mean=self._fragments_size_mean_in_bp,
+                fragment_size_standard_deviation=self._fragment_size_standard_deviation_in_bp,
+                profile_filename=self._custom_profile_filename,
+                own_read_length=self._custom_readlength)
+        else:
+            simulator.simulate(
+                file_path_distribution=file_path_distribution,
+                file_path_genome_locations=file_path_genome_locations,
+                directory_output=directory_output_tmp,
+                total_size=self._sample_size_in_base_pairs,
+                profile=self._error_profile,
+                fragment_size_mean=self._fragments_size_mean_in_bp,
+                fragment_size_standard_deviation=self._fragment_size_standard_deviation_in_bp)
 
         # convert sam to bam
         samtools = SamtoolsWrapper(

diff --git a/scripts/ReadSimulationWrapper/readsimulationwrapper.py b/scripts/ReadSimulationWrapper/readsimulationwrapper.py
@@ -791,7 +791,8 @@ def __init__(self, file_path_executable, directory_error_profiles, **kwargs):
 
     def simulate(
         self, file_path_distribution, file_path_genome_locations, directory_output,
-        total_size, profile, fragment_size_mean, fragment_size_standard_deviation):
+        total_size, profile, fragment_size_mean, fragment_size_standard_deviation,
+        profile_filename=None, own_read_length=None):
         """
         Simulate reads based on a given sample distribution
 
@@ -802,13 +803,17 @@ def simulate(
         @param directory_output: Directory for the sam and fastq files output
         @type directory_output: str | unicode
         @param total_size: Size of sample in base pairs
-        @type total_size: int 
-        @param profile: Art illumina error profile: 'low', 'mi', 'hi', 'hi150'
+        @type total_size: int | long
+        @param profile: Art illumina error profile: 'low', 'mi', 'hi', 'hi150', 'own'
         @type profile: str | unicode
         @param fragment_size_mean: Size of the fragment of which the ends are used as reads in base pairs
         @type fragment_size_mean: int 
         @param fragment_size_standard_deviation: Standard deviation of the fragment size in base pairs.
-        @type fragment_size_standard_deviation: int 
+        @type fragment_size_standard_deviation: int | long
+        @param profile_filename: Optional base name of user-supplied error profile files (without "[1/2].txt").
+        @type profile_filename: str | unicode | None
+        @param own_read_length: Optional read length for user-supplied error profile.
+        @type own_read_length: int | long | None
         """
         assert isinstance(total_size, (float, int)), "Expected natural digit"
         assert isinstance(fragment_size_mean, int), "Expected natural digit"
@@ -817,6 +822,27 @@ def simulate(
         assert fragment_size_mean > 0, "Mean fragments size needs to be a positive number"
         assert fragment_size_standard_deviation > 0, "Fragment size standard deviation needs to be a positive number"
         assert self.validate_dir(directory_output)
+        # if user specifies own profile, add corresponding parameters
+        if profile == "own":
+            # sanity checks
+            assert own_read_length, "Read length must be given when supplying own profile"
+            assert isinstance(own_read_length, (int, long)), "Expected natural digit for read length"
+            assert own_read_length > 0, "Read length must be a positive number"
+            assert profile_filename, "Profile filename must be given when supplying own profile"
+            # sanity check file name
+            legal_for_filename = string.ascii_letters + string.digits + '_-./\\'
+            assert self.validate_characters(profile_filename, legal_alphabet=legal_for_filename)
+            # check if supplied files are present
+            own_filenames = [
+                profile_filename+file_end
+                for file_end in ['1.txt', '2.txt']
+            ]
+            #assert self.validate_dir(self._directory_error_profiles, file_names=own_filenames)
+            for own_file in own_filenames:
+                assert self.validate_file(own_file)
+            # add user-supplied profiles
+            self._art_error_profiles["own"] = profile_filename
+            self._art_read_length["own"] = own_read_length
         if profile is not None:
             assert profile in self._art_error_profiles, "Unknown art illumina profile: '{}'".format(profile)
             assert profile in self._art_read_length,  "Unknown art illumina profile: '{}'".format(profile)

diff --git a/scripts/configfilehandler.py b/scripts/configfilehandler.py
@@ -91,6 +91,12 @@ def _read_config(self, file_path_config):
 
         if self._error_profile is None:
             self._error_profile = self._config.get_value("profile", silent=True)
+
+        if self._custom_profile_filename is None:
+            self._custom_profile_filename = self._config.get_value("base_profile_name", silent=True)
+
+        if self._custom_readlength is None:
+            self._custom_readlength = self._config.get_value("profile_read_length", is_digit=True, silent=True)
 
         if self._fragment_size_standard_deviation_in_bp is None:
             self._fragment_size_standard_deviation_in_bp = self._config.get_value(
@@ -203,6 +209,8 @@ def _stream_read_simulator(self, output_stream=sys.stdout):
         output_stream.write("error_profiles={}\n".format(self._directory_error_profiles or ""))
         output_stream.write("samtools={}\n".format(self._executable_samtools))
         output_stream.write("profile={}\n".format(self._error_profile))
+        output_stream.write("base_profile_name={}\n".format(self._custom_profile_filename or ""))
+        output_stream.write("profile_read_length={}\n".format(self._custom_readlength or ""))
         output_stream.write("size={}\n".format(self._sample_size_in_base_pairs/self._base_pairs_multiplication_factor))
         output_stream.write("type={}\n".format(self._read_simulator_type))
         output_stream.write("fragments_size_mean={}\n".format(self._fragments_size_mean_in_bp))

diff --git a/scripts/defaultvalues.py b/scripts/defaultvalues.py
@@ -59,6 +59,8 @@ class DefaultValues(DefaultLogging):
 
     _read_simulator_type = None
     _error_profile = None
+    _custom_profile_filename = None
+    _custom_readlength = None
     _fragment_size_standard_deviation_in_bp = None
     _fragments_size_mean_in_bp = None