From f4c74554c2e601bfce8d9036263fb6da4e26e1bd Mon Sep 17 00:00:00 2001
From: Timo Sachsenberg <timo.sachsenberg@uni-tuebingen.de>
Date: Wed, 31 Jan 2024 04:38:03 +0100
Subject: [PATCH] started moving some parts that can be used by LFQ and
 Isobaric Workflows to helper class

---
 .../QUANTITATION/DDAWorkflowCommons.h         | 221 ++++++++++++++++++
 .../QUANTITATION/IsobaricIsotopeCorrector.h   |   2 +
 .../ANALYSIS/QUANTITATION/sources.cmake       |   1 +
 .../QUANTITATION/DDAWorkflowCommons.cpp       |   9 +
 .../ANALYSIS/QUANTITATION/sources.cmake       |   1 +
 src/topp/ProteomicsLFQ.cpp                    | 155 +-----------
 6 files changed, 244 insertions(+), 145 deletions(-)
 create mode 100644 src/openms/include/OpenMS/ANALYSIS/QUANTITATION/DDAWorkflowCommons.h
 create mode 100644 src/openms/source/ANALYSIS/QUANTITATION/DDAWorkflowCommons.cpp
diff --git a/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/DDAWorkflowCommons.h b/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/DDAWorkflowCommons.h
new file mode 100644
index 00000000000..efe490ece12
--- /dev/null
+++ b/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/DDAWorkflowCommons.h
@@ -0,0 +1,221 @@
+// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// --------------------------------------------------------------------------
+// $Maintainer: Timo Sachsenberg $
+// $Authors: Timo Sachsenberg $
+// --------------------------------------------------------------------------
+
+#pragma once
+
+#include <OpenMS/CONCEPT/Exception.h>
+#include <OpenMS/CONCEPT/LogStream.h>
+#include <OpenMS/DATASTRUCTURES/String.h>
+
+#include <map>
+
+namespace OpenMS
+{
+    /**
+        @brief Common functions for DDA workflows
+    
+        @ingroup Analysis_ID
+    */
+    class OPENMS_DLLAPI DDAWorkflowCommons
+    {
+        public:
+        /* @brief create Map between mzML file and corresponding id file
+         * Checks implemented:
+         * 1. Check if the number of spectra and id files match.
+         * 2. If spectra and id files share common base names (without extension) 
+         *    but appear in different order, throw an error.
+        */
+        static std::map<String, String> mapMzML2Ids(StringList & in, StringList & in_ids)
+        {
+            // Detect the common case that ID files have same names as spectra files
+            if (!File::validateMatchingFileNames(in, in_ids, true, true, false)) // only basenames, without extension, only order
+            {
+            // Spectra and id files have the same set of basenames but appear in different order. -> this is most likely an error
+            throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
+                "ID and spectra file match but order of file names seem to differ. They need to be provided in the same order.");
+            }
+
+            std::map<String, String> mzfile2idfile;
+            for (Size i = 0; i != in.size(); ++i)
+            {
+                const String& in_abs_path = File::absolutePath(in[i]);
+                const String& id_abs_path = File::absolutePath(in_ids[i]);
+                mzfile2idfile[in_abs_path] = id_abs_path;      
+                OPENMS_LOG_DEBUG << ("Spectra: " + in[i] + "\t Ids: " + in_ids[i],  1) << std::endl;
+            }
+            return mzfile2idfile;
+        }
+    }
+
+/**
+ * @brief Small helper to get the mapping from id files to mzML files
+ *
+ * Basically just reverses the mapMzML2Ids function. 
+ * Potential improvement: Could be combined into a single functionexposed to the user.
+ */
+  std::map<String, String> mapId2MzMLs(const map<String, String>& m2i)
+  {
+    map<String, String> idfile2mzfile;
+    for (const auto& m : m2i)
+    {
+      idfile2mzfile[m.second] = m.first;
+    }
+    return idfile2mzfile;
+  }
+
+
+/**
+ * Estimates the median chromatographic full width at half maximum (FWHM) for a given MSExperiment.
+ *
+ * @param ms_centroided The centroided MSExperiment for which to estimate the FWHM.
+ * @return The estimated median chromatographic FWHM based on the top 1000 intensity mass traces.
+ */
+  double estimateMedianChromatographicFWHM(MSExperiment & ms_centroided)
+  {
+    MassTraceDetection mt_ext;
+    Param mtd_param = mt_ext.getParameters();
+
+    OPENMS_LOG_DEBUG << "Parameters passed to MassTraceDetection" << mtd_param << std::endl;
+
+    std::vector<MassTrace> m_traces;
+    mt_ext.run(ms_centroided, m_traces, 1000);
+
+    std::vector<double> fwhm_1000;
+    for (auto &m : m_traces)
+    {
+      if (m.getSize() == 0) continue;
+      m.updateMeanMZ();
+      m.updateWeightedMZsd();
+      double fwhm = m.estimateFWHM(false);
+      fwhm_1000.push_back(fwhm);
+    }
+
+    double median_fwhm = Math::median(fwhm_1000.begin(), fwhm_1000.end());
+
+    return median_fwhm;
+  }
+
+/**
+ * @brief Recalibrates the masses of the MSExperiment using peptide identifications.
+ *
+ * This function recalibrates the masses of the MSExperiment by applying a mass recalibration
+ * based on the theoretical masses from identification data.
+ *
+ * @param ms_centroided The MSExperiment object containing the centroided spectra.
+ * @param peptide_ids The vector of PeptideIdentification objects containing the peptide identifications.
+ * @param id_file_abs_path The absolute path of the identification file.
+ */
+  void recalibrateMS1(MSExperiment & ms_centroided,
+    vector<PeptideIdentification>& peptide_ids,
+    const String & id_file_abs_path = "")
+  {
+    InternalCalibration ic;
+    ic.setLogType(log_type_);
+    ic.fillCalibrants(peptide_ids, 25.0); // >25 ppm maximum deviation defines an outlier TODO: check if we need to adapt this
+    if (ic.getCalibrationPoints().size() <= 1) return;
+
+    // choose calibration model based on number of calibration points
+
+    // there seem to be some problems with the QUADRATIC model that we first need to investigate
+    //MZTrafoModel::MODELTYPE md = (ic.getCalibrationPoints().size() == 2) ? MZTrafoModel::LINEAR : MZTrafoModel::QUADRATIC;
+    //bool use_RANSAC = (md == MZTrafoModel::LINEAR || md == MZTrafoModel::QUADRATIC);
+
+    MZTrafoModel::MODELTYPE md = MZTrafoModel::LINEAR;
+    bool use_RANSAC = true;
+
+    Size RANSAC_initial_points = (md == MZTrafoModel::LINEAR) ? 2 : 3;
+    Math::RANSACParam p(RANSAC_initial_points, 70, 10, 30, true); // TODO: check defaults (taken from tool)
+    MZTrafoModel::setRANSACParams(p);
+    // these limits are a little loose, but should prevent grossly wrong models without burdening the user with yet another parameter.
+    MZTrafoModel::setCoefficientLimits(25.0, 25.0, 0.5); 
+
+    IntList ms_level = {1};
+    double rt_chunk = 300.0; // 5 minutes
+    String qc_residual_path, qc_residual_png_path;
+    if (!id_file_abs_path.empty())
+    {
+      const String & id_basename = File::basename(id_file_abs_path);
+      qc_residual_path = id_basename + "qc_residuals.tsv";
+      qc_residual_png_path = id_basename + "qc_residuals.png";
+    } 
+
+    if (!ic.calibrate(ms_centroided, 
+                  ms_level, md, rt_chunk, use_RANSAC, 
+                  10.0,
+                  5.0, 
+                  "",                      
+                  "",
+                  qc_residual_path,
+                  qc_residual_png_path,
+                  "Rscript"))
+    {
+      OPENMS_LOG_WARN << "\nCalibration failed. See error message above!" << std::endl;
+    }
+  }
+
+
+/**
+ * @brief Extracts seeding features from centroided MS data (e.g., for untarged extraction).
+ *
+ * MS1 spectra are subjected to a threshold filter to removelow-intensity peaks, 
+ * and then uses the FeatureFinderMultiplex algorithm to identify potential seeding features.
+ * The function also takes into account the median full width at half maximum (FWHM) of the peaks 
+ * to adjust the FeatureFinderMultiplex parameters for better seed detection.
+ *
+ * @param[in] ms_centroided The MSExperiment object containing centroided mass spectrometry data. Only MS1 level
+ *                          spectra are considered for seed calculation.
+ * @param[out] seeds The FeatureMap object where the identified seeding features will be stored.
+ * @param[in] median_fwhm The median FWHM of the peaks, used to adjust the FeatureFinderMultiplex parameters for
+ *                        seed detection.
+ *
+ * @note The function currently uses hardcoded parameters for the threshold filter and FeatureFinderMultiplex
+ *       algorithm, which may need to be derived from the data or provided as function arguments in future
+ *       implementations.
+ */
+  void calculateSeeds(
+    const MSExperiment & ms_centroided, 
+    const double intensity_threshold,
+    FeatureMap & seeds, 
+    double median_fwhm
+    Size charge_min = 2,
+    Size charge_max = 5)
+  {
+    //TODO: Actually FFM provides a parameter for minimum intensity. Also it copies the full experiment again once or twice.
+    MSExperiment e;
+    for (const auto& s : ms_centroided)
+    { 
+      if (s.getMSLevel() == 1) 
+      {              
+        e.addSpectrum(s);
+      }
+    }
+
+    ThresholdMower threshold_mower_filter;
+    Param tm = threshold_mower_filter.getParameters();
+    tm.setValue("threshold", intensity_threshold);;  // TODO: derive from data
+    threshold_mower_filter.setParameters(tm);
+    threshold_mower_filter.filterPeakMap(e);
+
+    FeatureFinderMultiplexAlgorithm algorithm;
+    Param p = algorithm.getParameters();
+    p.setValue("algorithm:labels", ""); // unlabeled only
+    p.setValue("algorithm:charge", String(charge_min) + ":" + String(charge_max));
+    p.setValue("algorithm:rt_typical", median_fwhm * 3.0);
+    p.setValue("algorithm:rt_band", 3.0); // max 3 seconds shifts between isotopic traces (not sure if needed)
+    p.setValue("algorithm:rt_min", median_fwhm * 0.5);
+    p.setValue("algorithm:spectrum_type", "centroid");
+    algorithm.setParameters(p);
+    //FIXME progress of FFM is not printed at all
+    const bool progress(true);
+    algorithm.run(e, progress);
+    seeds = algorithm.getFeatureMap(); 
+    OPENMS_LOG_INFO << "Using " << seeds.size() << " seeds from untargeted feature extraction." << endl;
+  }
+
+}
+
diff --git a/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/IsobaricIsotopeCorrector.h b/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/IsobaricIsotopeCorrector.h
index 6857ae61f8c..648daee7e61 100644
--- a/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/IsobaricIsotopeCorrector.h
+++ b/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/IsobaricIsotopeCorrector.h
@@ -9,6 +9,7 @@
 #pragma once
 
 #include <OpenMS/DATASTRUCTURES/Matrix.h>
+#include <memory>
 
 // forward decl
 namespace Eigen
@@ -78,6 +79,7 @@ namespace OpenMS
      */
     static void solveNNLS_(const Matrix<double>& correction_matrix,
                            const Matrix<double>& m_b, Matrix<double>& m_x);
+    // @jpfeuffer why shared_ptr needed here?                           
     static void solveNNLS_(std::shared_ptr<Eigen::MatrixXd> & correction_matrix, std::vector<double> & b, std::vector<double> & x);
     static void solveNNLS_(std::shared_ptr<const Eigen::MatrixXd> & correction_matrix, std::vector<double> & b, std::vector<double> & x);
 
diff --git a/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/sources.cmake b/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/sources.cmake
index f98de702e5f..1b1900438e2 100644
--- a/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/sources.cmake
+++ b/src/openms/include/OpenMS/ANALYSIS/QUANTITATION/sources.cmake
@@ -3,6 +3,7 @@ set(directory include/OpenMS/ANALYSIS/QUANTITATION)
 
 ### list all header files of the directory here
 set(sources_list_h
+DDAWorkflowCommons.h
 IsobaricChannelExtractor.h
 IsobaricIsotopeCorrector.h
 IsobaricNormalizer.h
diff --git a/src/openms/source/ANALYSIS/QUANTITATION/DDAWorkflowCommons.cpp b/src/openms/source/ANALYSIS/QUANTITATION/DDAWorkflowCommons.cpp
new file mode 100644
index 00000000000..bccb921f37e
--- /dev/null
+++ b/src/openms/source/ANALYSIS/QUANTITATION/DDAWorkflowCommons.cpp
@@ -0,0 +1,9 @@
+// Copyright (c) 2002-present, The OpenMS Team -- EKU Tuebingen, ETH Zurich, and FU Berlin
+// SPDX-License-Identifier: BSD-3-Clause
+//
+// --------------------------------------------------------------------------
+// $Maintainer: Timo Sachsenberg $
+// $Authors: Timo Sachsenberg $
+// --------------------------------------------------------------------------
+
+#include <OpenMS/ANALYSIS/QUANTITATION/DDAWorkflowCommons.h>
diff --git a/src/openms/source/ANALYSIS/QUANTITATION/sources.cmake b/src/openms/source/ANALYSIS/QUANTITATION/sources.cmake
index 9f7ae4a3e9c..d515f2fb738 100644
--- a/src/openms/source/ANALYSIS/QUANTITATION/sources.cmake
+++ b/src/openms/source/ANALYSIS/QUANTITATION/sources.cmake
@@ -3,6 +3,7 @@ set(directory source/ANALYSIS/QUANTITATION)
 
 ### list all filenames of the directory here
 set(sources_list
+DDAWorkflowCommons.cpp
 IsobaricChannelExtractor.cpp
 IsobaricIsotopeCorrector.cpp
 IsobaricNormalizer.cpp
diff --git a/src/topp/ProteomicsLFQ.cpp b/src/topp/ProteomicsLFQ.cpp
index eba7217bac1..8a799a498ac 100644
--- a/src/topp/ProteomicsLFQ.cpp
+++ b/src/topp/ProteomicsLFQ.cpp
@@ -20,6 +20,7 @@
 #include <OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentAlgorithmTreeGuided.h>
 #include <OpenMS/ANALYSIS/MAPMATCHING/MapAlignmentTransformer.h>
 #include <OpenMS/ANALYSIS/QUANTITATION/PeptideAndProteinQuant.h>
+#include <OpenMS/ANALYSIS/QUANTITATION/DDAWorkflowCommons.h>
 #include <OpenMS/APPLICATIONS/MapAlignerBase.h>
 #include <OpenMS/APPLICATIONS/TOPPBase.h>
 #include <OpenMS/DATASTRUCTURES/CalibrationData.h>
@@ -274,40 +275,6 @@ class ProteomicsLFQ :
     registerFullParam_(combined);
   }
 
-  // Map between mzML file and corresponding id file
-  // Warn if the primaryMSRun indicates that files were provided in the wrong order.
-  map<String, String> mapMzML2Ids_(StringList & in, StringList & in_ids)
-  {
-    // Detect the common case that ID files have same names as spectra files
-    if (!File::validateMatchingFileNames(in, in_ids, true, true, false)) // only basenames, without extension, only order
-    {
-      // Spectra and id files have the same set of basenames but appear in different order. -> this is most likely an error
-      throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
-        "ID and spectra file match but order of file names seem to differ. They need to be provided in the same order.");
-    }
-
-    map<String, String> mzfile2idfile;
-    for (Size i = 0; i != in.size(); ++i)
-    {
-      const String& in_abs_path = File::absolutePath(in[i]);
-      const String& id_abs_path = File::absolutePath(in_ids[i]);
-      mzfile2idfile[in_abs_path] = id_abs_path;      
-      writeDebug_("Spectra: " + in[i] + "\t Ids: " + in_ids[i],  1);
-    }
-    return mzfile2idfile;
-  }
-
-  // map back
-  map<String, String> mapId2MzMLs_(const map<String, String>& m2i)
-  {
-    map<String, String> idfile2mzfile;
-    for (const auto& m : m2i)
-    {
-      idfile2mzfile[m.second] = m.first;
-    }
-    return idfile2mzfile;
-  }
-
   ExitCodes centroidAndCorrectPrecursors_(const String & mz_file, MSExperiment & ms_centroided)
   { 
     Param pp_param = getParam_().copy("Centroiding:", true);
@@ -384,111 +351,6 @@ class ProteomicsLFQ :
     return EXECUTION_OK;
   }
 
-  void recalibrateMasses_(MSExperiment & ms_centroided, vector<PeptideIdentification>& peptide_ids, const String & id_file_abs_path)
-  {
-    InternalCalibration ic;
-    ic.setLogType(log_type_);
-    ic.fillCalibrants(peptide_ids, 25.0); // >25 ppm maximum deviation defines an outlier TODO: check if we need to adapt this
-    if (ic.getCalibrationPoints().size() <= 1) return;
-
-    // choose calibration model based on number of calibration points
-
-    // there seem to be some problems with the QUADRATIC model that we first need to investigate
-    //MZTrafoModel::MODELTYPE md = (ic.getCalibrationPoints().size() == 2) ? MZTrafoModel::LINEAR : MZTrafoModel::QUADRATIC;
-    //bool use_RANSAC = (md == MZTrafoModel::LINEAR || md == MZTrafoModel::QUADRATIC);
-
-    MZTrafoModel::MODELTYPE md = MZTrafoModel::LINEAR;
-    bool use_RANSAC = true;
-
-    Size RANSAC_initial_points = (md == MZTrafoModel::LINEAR) ? 2 : 3;
-    Math::RANSACParam p(RANSAC_initial_points, 70, 10, 30, true); // TODO: check defaults (taken from tool)
-    MZTrafoModel::setRANSACParams(p);
-    // these limits are a little loose, but should prevent grossly wrong models without burdening the user with yet another parameter.
-    MZTrafoModel::setCoefficientLimits(25.0, 25.0, 0.5); 
-
-    IntList ms_level = {1};
-    double rt_chunk = 300.0; // 5 minutes
-    String qc_residual_path, qc_residual_png_path;
-    if (debug_level_ >= 1)
-    {
-      const String & id_basename = File::basename(id_file_abs_path);
-      qc_residual_path = id_basename + "qc_residuals.tsv";
-      qc_residual_png_path = id_basename + "qc_residuals.png";
-    } 
-
-    if (!ic.calibrate(ms_centroided, ms_level, md, rt_chunk, use_RANSAC, 
-                  10.0,
-                  5.0, 
-                  "",                      
-                  "",
-                  qc_residual_path,
-                  qc_residual_png_path,
-                  "Rscript"))
-    {
-      OPENMS_LOG_WARN << "\nCalibration failed. See error message above!" << std::endl;
-    }
-  }
-
-  double estimateMedianChromatographicFWHM_(MSExperiment & ms_centroided)
-  {
-    MassTraceDetection mt_ext;
-    Param mtd_param = mt_ext.getParameters();
-    writeDebug_("Parameters passed to MassTraceDetection", mtd_param, 3);
-
-    std::vector<MassTrace> m_traces;
-    mt_ext.run(ms_centroided, m_traces, 1000);
-
-    std::vector<double> fwhm_1000;
-    for (auto &m : m_traces)
-    {
-      if (m.getSize() == 0) continue;
-      m.updateMeanMZ();
-      m.updateWeightedMZsd();
-      double fwhm = m.estimateFWHM(false);
-      fwhm_1000.push_back(fwhm);
-    }
-
-    double median_fwhm = Math::median(fwhm_1000.begin(), fwhm_1000.end());
-
-    OPENMS_LOG_INFO << "Median chromatographic FWHM: " << median_fwhm << std::endl;
-
-    return median_fwhm;
-  }
-
-  void calculateSeeds_(const MSExperiment & ms_centroided, FeatureMap & seeds, double median_fwhm)
-  {
-    //TODO: Actually FFM provides a parameter for minimum intensity. Also it copies the full experiment again once or twice.
-    MSExperiment e;
-    for (const auto& s : ms_centroided)
-    { 
-      if (s.getMSLevel() == 1) 
-      {              
-        e.addSpectrum(s);
-      }
-    }
-
-    ThresholdMower threshold_mower_filter;
-    Param tm = threshold_mower_filter.getParameters();
-    tm.setValue("threshold", getDoubleOption_("Seeding:intThreshold"));  // TODO: derive from data
-    threshold_mower_filter.setParameters(tm);
-    threshold_mower_filter.filterPeakMap(e);
-
-    FeatureFinderMultiplexAlgorithm algorithm;
-    Param p = algorithm.getParameters();
-    p.setValue("algorithm:labels", ""); // unlabeled only
-    p.setValue("algorithm:charge", getStringOption_("Seeding:charge")); //TODO infer from IDs?
-    p.setValue("algorithm:rt_typical", median_fwhm * 3.0);
-    p.setValue("algorithm:rt_band", getDoubleOption_("Seeding:traceRTTolerance")); // max 3 seconds shifts between isotopic traces
-    p.setValue("algorithm:rt_min", median_fwhm * 0.5);
-    p.setValue("algorithm:spectrum_type", "centroid");
-    algorithm.setParameters(p);
-    //FIXME progress of FFM is not printed at all
-    const bool progress(true);
-    algorithm.run(e, progress);
-    seeds = algorithm.getFeatureMap(); 
-    OPENMS_LOG_INFO << "Using " << seeds.size() << " seeds from untargeted feature extraction." << endl;
-  }
-
 
   // aligns the feature maps 
   double align_(
@@ -1019,7 +881,8 @@ class ProteomicsLFQ :
       //-------------------------------------------------------------
       if (getStringOption_("mass_recalibration") == "true")
       {
-        recalibrateMasses_(ms_centroided, peptide_ids, id_file_abs_path);
+        String debug_output_basename = (debug_level_ > 666) ? id_file_abs_path : "";
+        DDAWorkflowCommons::recalibrateMS1(ms_centroided, peptide_ids, debug_output_basename);
       }
 
       vector<ProteinIdentification> ext_protein_ids;
@@ -1028,7 +891,8 @@ class ProteomicsLFQ :
       //////////////////////////////////////////
       // Chromatographic parameter estimation
       //////////////////////////////////////////
-      median_fwhm = estimateMedianChromatographicFWHM_(ms_centroided);
+      median_fwhm = DDAWorfklowCommons::estimateMedianChromatographicFWHM(ms_centroided);
+      OPENMS_LOG_INFO << "Median chromatographic FWHM: " << median_fwhm << std::endl;
 
       //-------------------------------------------------------------
       // Feature detection
@@ -1044,7 +908,8 @@ class ProteomicsLFQ :
 
       if (!targeted_only)
       {
-        calculateSeeds_(ms_centroided, seeds, median_fwhm);
+        // TODO: infer min/max charge from ID data
+        DDAWorkflowCommons::calculateSeeds(ms_centroided, getDoubleOption_("Seeding:intThreshold"), seeds, median_fwhm, 2, 5);
         if (debug_level_ > 666)
         {
           FileHandler().storeFeatures("debug_seeds_fraction_" + String(ms_files.first) + "_" + String(fraction_group) + ".featureXML", seeds, {FileTypes::FEATUREXML});
@@ -1669,9 +1534,9 @@ class ProteomicsLFQ :
     }
 
     // Map between mzML file and corresponding id file
-    // Here we currently assume that these are provided in the exact same order.
-    map<String, String> mzfile2idfile = mapMzML2Ids_(in, in_ids);
-    map<String, String> idfile2mzfile = mapId2MzMLs_(mzfile2idfile);
+    // We assume that these are provided in the exact same order.
+    map<String, String> mzfile2idfile = DDAWorkflowCommons::mapMzML2Ids(in, in_ids);
+    map<String, String> idfile2mzfile = DDAWorkflowCommons::mapId2MzMLs(mzfile2idfile);
 
     // TODO maybe check if mzMLs in experimental design match to mzMLs passed as in parameter
     //  IF both are present