nf-core · ypriverol · Oct 31, 2023 · Oct 23, 2023 · Oct 23, 2023 · Oct 23, 2023
diff --git a/.gitignore b/.gitignore
@@ -12,4 +12,9 @@ testing*
 /build/
 results*/
 venv/
+node_modules
+conversion_inputs
+debug_dir
+test_out
+
 lint_log.txt
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,13 +15,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### `Changed`
 
-- Update for pmultiqc to pmultiqc=0.0.21
-- Update for openms to openms=3.1.0
+- [#314](https://github.com/bigbio/quantms/pull/314) Update for pmultiqc to pmultiqc=0.0.23
+- [#308](https://github.com/bigbio/quantms/pull/308) Update for openms to openms=3.1.0
 - Update for sdrf-pipelines to sdrf-pipelines=0.0.24
 - Update for msstats to msstats=4.2.1
 
 ### `Fixed`
 
+- [#316](https://github.com/bigbio/quantms/pull/316) Fixed jar path selection of luciphoradapter and msgf+
 - Fixed bug where modification masses were not calculated correctly in DIA-NN conversion.
 - Fixed multiple bugs Pull Request [#293 BigBio](https://github.com/bigbio/quantms/pull/293), [#279 BigBio](https://github.com/bigbio/quantms/pull/279), [#265 BigBio](https://github.com/bigbio/quantms/pull/265), [#260 BigBio](https://github.com/bigbio/quantms/pull/260), [#257 BigBio](https://github.com/bigbio/quantms/pull/257)
 
@@ -36,6 +37,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - lfq_intensity_threshold: Minimum intensity of a feature to be considered in the MBR algorithm (default: 1000)
 - sage_processes: Number of processes to use in SAGE search engine (default: 1)
 - diann_speclib: Path to the spectral library to use in DIA-NN (default: null)
+- convert_dotd: if convert .d file to mzml (default: false)
 
 ### `Deprecations`
 

diff --git a/README.md b/README.md
@@ -55,10 +55,11 @@ On release, automated continuous integration tests run the pipeline on a full-si
 
 ### DIA-LFQ (data-independent label-free quantification)
 
-1. RAW file conversion to mzML ([`thermorawfileparser`](https://github.com/compomics/ThermoRawFileParser))
-2. DIA-NN analysis [`dia-nn`](https://github.com/vdemichev/DiaNN/)
-3. Generation of output files (msstats)
-4. QC reports generation [`pmultiqc`](https://github.com/bigbio/pmultiqc)
+1. RAW file conversion to mzML when RAW as input([`thermorawfileparser`](https://github.com/compomics/ThermoRawFileParser))
+2. Performing an [optional step](https://github.com/bigbio/quantms/blob/dev/modules/local/tdf2mzml/main.nf): Converting .d to mzML when bruker data as input and set `convert_dotd` to true
+3. DIA-NN analysis [`dia-nn`](https://github.com/vdemichev/DiaNN/)
+4. Generation of output files (msstats)
+5. QC reports generation [`pmultiqc`](https://github.com/bigbio/pmultiqc)
 
 ### Functionality overview
 

diff --git a/bin/diann_convert.py b/bin/diann_convert.py
@@ -52,7 +52,7 @@ def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_c
 
     :param folder: DiannConvert specifies the folder where the required file resides. The folder contains
         the DiaNN main report, protein matrix, precursor matrix, experimental design file, protein sequence
-        FASTA file, version file of DiaNN and mzml_info TSVs
+        FASTA file, version file of DiaNN and ms_info TSVs
     :type folder: str
     :param dia_params: A list contains DIA parameters
     :type dia_params: list
@@ -252,8 +252,8 @@ def fasta(self) -> os.PathLike:
             return self.find_suffix_file(".fa")
 
     @property
-    def mzml_info(self) -> os.PathLike:
-        return self.find_suffix_file("mzml_info.tsv")
+    def ms_info(self) -> os.PathLike:
+        return self.find_suffix_file("ms_info.tsv")
 
     @property
     def validate_diann_version(self) -> str:
@@ -826,7 +826,7 @@ def mztab_PSH(report, folder, database):
     :type report: pandas.core.frame.DataFrame
     :param folder: DiannConvert specifies the folder where the required file resides. The folder contains
         the DiaNN main report, protein matrix, precursor matrix, experimental design file, protein sequence
-        FASTA file, version file of DiaNN and mzml_info TSVs
+        FASTA file, version file of DiaNN and ms_info TSVs
     :type folder: str
     :param database: Path to fasta file
     :type database: str
@@ -837,7 +837,7 @@ def mztab_PSH(report, folder, database):
 
     def __find_info(dir, n):
         # This line matches n="220101_myfile", folder="." to
-        # "myfolder/220101_myfile_mzml_info.tsv"
+        # "myfolder/220101_myfile_ms_info.tsv"
         files = list(Path(dir).glob(f"*{n}*_info.tsv"))
         # Check that it matches one and only one file
         if not files:
@@ -860,6 +860,12 @@ def __find_info(dir, n):
         group.sort_values(by="RT.Start", inplace=True)
         target = target[["Retention_Time", "SpectrumID", "Exp_Mass_To_Charge"]]
         target.columns = ["RT.Start", "opt_global_spectrum_reference", "exp_mass_to_charge"]
+        # Standardize spectrum identifier format for bruker data
+        if type(target.loc[0, "opt_global_spectrum_reference"]) != str:
+            target.loc[:, "opt_global_spectrum_reference"] = "scan=" + target.loc[
+                :, "opt_global_spectrum_reference"
+            ].astype(str)
+
         # TODO seconds returned from precursor.getRT()
         target.loc[:, "RT.Start"] = target.apply(lambda x: x["RT.Start"] / 60, axis=1)
         out_mztab_PSH = pd.concat([out_mztab_PSH, pd.merge_asof(group, target, on="RT.Start", direction="nearest")])