diff --git a/src/dimet/__main__.py b/src/dimet/__main__.py
index a371634..96919d6 100644
--- a/src/dimet/__main__.py
+++ b/src/dimet/__main__.py
@@ -25,7 +25,6 @@ def main_run_analysis(cfg: DictConfig) -> None:
         config=hydra.utils.instantiate(cfg.analysis.dataset))
     dataset.preload()
     dataset.split_datafiles_by_compartment()
-    dataset.save_datafiles_split_by_compartment()
     method: Method = hydra.utils.instantiate(
         cfg.analysis.method).build()  # method factory
 
diff --git a/src/dimet/config/analysis/method/bivariate_analysis.yaml b/src/dimet/config/analysis/method/bivariate_analysis.yaml
new file mode 100644
index 0000000..2ec0fa5
--- /dev/null
+++ b/src/dimet/config/analysis/method/bivariate_analysis.yaml
@@ -0,0 +1,26 @@
+_target_: dimet.method.BivariateAnalysisConfig
+
+label: bivariate analysis
+name: Computation of the correlation of MDV profiles, or the metabolite time course profiles
+
+# (**) : automatically will run
+
+conditions_MDV_comparison: # (**) if >= 2 conditions and >=1 timepoint (timepoints run separately)
+  isotopologue_proportions: pearson
+
+timepoints_MDV_comparison:  # (**) if >= 1 condition and >=2 timepoints
+  isotopologue_proportions: pearson
+
+conditions_metabolite_time_profiles:  # (**) if >= 2 conditions  AND >=2 time points in data
+  abundances: pearson
+  mean_enrichment: pearson
+
+correction_method: fdr_bh
+
+impute_values:
+  abundances: "min"
+  mean_enrichment: "min"
+  isotopologues: "min"
+  isotopologue_proportions: "min"
+
+output_include_gmean_arr_columns: True # if False, the 'gmean_arr_.." columns are excluded
\ No newline at end of file
diff --git a/src/dimet/data/__init__.py b/src/dimet/data/__init__.py
index a25da3c..7603192 100644
--- a/src/dimet/data/__init__.py
+++ b/src/dimet/data/__init__.py
@@ -47,7 +47,6 @@ def build(self) -> "Dataset":
 class Dataset(BaseModel):
     config: DatasetConfig
     raw_data_folder: str = None
-    processed_data_folder: str = None
     sub_folder_absolute: str = None
     metadata_df: Optional[pd.DataFrame] = None
     abundances_df: Optional[pd.DataFrame] = None
@@ -77,8 +76,7 @@ def preload(self):
         else:
             self.sub_folder_absolute = self.config.subfolder
         self.raw_data_folder = os.path.join(self.sub_folder_absolute, "raw")
-        self.processed_data_folder = os.path.join(self.sub_folder_absolute,
-                                                  "processed")
+
         # start loading the dataframes
         file_paths = [
             ("metadata", os.path.join(self.raw_data_folder,
@@ -106,10 +104,14 @@ def preload(self):
                     dfs.append(pd.read_csv(file_path, sep="\t", header=0))
                 self.available_datasets.add(label)
             except FileNotFoundError:
-                logger.critical(
-                    "File %s not found, continuing, "
-                    "but this might fail miserably",
-                    file_path)
+                if file_path.endswith(self.config.isotopologues + ".csv"):
+                    message_detail = "isotopologue absolute values missing"
+                    logger.critical(
+                        "File %s not found (%s), continuing"
+                        % (file_path, message_detail))
+                else:
+                    logger.critical("File %s not found, continuing",
+                                    file_path)
                 dfs.append(None)
             except Exception as e:
                 logger.error(
@@ -169,19 +171,6 @@ def split_datafiles_by_compartment(self) -> None:
         frames_dict = set_samples_names(frames_dict, self.metadata_df)
         self.compartmentalized_dfs = frames_dict
 
-    def save_datafiles_split_by_compartment(self) -> None:
-        os.makedirs(self.processed_data_folder, exist_ok=True)
-        out_data_path = self.processed_data_folder
-        for file_name in self.compartmentalized_dfs.keys():
-            for compartment in self.compartmentalized_dfs[file_name].keys():
-                df = self.compartmentalized_dfs[file_name][compartment]
-                tmp_file_name = self.get_file_for_label(file_name)
-                output_file_name = f"{tmp_file_name}-{compartment}.csv"
-                df.to_csv(os.path.join(out_data_path, output_file_name),
-                          sep="\t", header=True, index=False)
-                logger.info(
-                    f"Saved the {compartment} compartment version "
-                    f"of {file_name} in {out_data_path}")
 
     def get_file_for_label(self, label):
         if label == "abundances":
@@ -210,7 +199,6 @@ class DataIntegration(Dataset):
     def set_dataset_integration_config(self):
         self.preload()
         self.split_datafiles_by_compartment()
-        self.save_datafiles_split_by_compartment()
 
         self.integration_files_folder_absolute = os.path.join(
             self.sub_folder_absolute, "integration_files")
diff --git a/src/dimet/method/__init__.py b/src/dimet/method/__init__.py
index f77014c..0e2b45f 100644
--- a/src/dimet/method/__init__.py
+++ b/src/dimet/method/__init__.py
@@ -15,6 +15,7 @@
                              metabolites_values_for_metabologram)
 from dimet.data import DataIntegration, Dataset
 from dimet.helpers import flatten
+from dimet.processing.bivariate_analysis import bivariate_comparison
 from dimet.processing.differential_analysis import (differential_comparison,
                                                     multi_group_compairson,
                                                     time_course_analysis)
@@ -184,6 +185,18 @@ def build(self) -> "MetabologramIntegration":
         return MetabologramIntegration(config=self)
 
 
+class BivariateAnalysisConfig(MethodConfig):
+    """
+    Sets default values or fills them for the bi-variate analysis
+    """
+    correction_method: str = "fdr_bh"
+    output_include_gmean_arr_columns: bool = True
+
+    def build(self) -> "BivariateAnalysis":
+        return BivariateAnalysis(config=self)
+
+
+
 class Method(BaseModel):
     config: MethodConfig
 
@@ -852,3 +865,78 @@ def check_expectations_config_metabo(
         except ValueError as e:
             logger.error(f"Data inconsistency: {e}")
             sys.exit(1)
+
+
+class BivariateAnalysis(Method):
+    config: BivariateAnalysisConfig
+
+    def run(self, cfg: DictConfig, dataset: Dataset) -> None:
+        """
+        Runs bivariate analysis, the 'behavior' is the type of comparison:
+        - conditions_MDV_comparison
+        - timepoints_MDV_comparison
+        - conditions_metabolite_time_profiles
+        """
+        logger.info(
+            "Will compute bi-variate analysis, with the following config: %s",
+            self.config)
+
+        out_table_dir = os.path.join(os.getcwd(), cfg.table_path)
+        os.makedirs(out_table_dir, exist_ok=True)
+        self.check_expectations(cfg, dataset)
+
+        datatype = "isotopologue_proportions"
+        if datatype in dataset.compartmentalized_dfs.keys():
+            logger.info(f"Running bi-variate analysis with "
+                        f"{datatype}:")
+            if len(cfg.analysis.conditions) >= 2:
+                logger.info("assessing MDV (Mass Distribution Vector) "
+                            "between conditions")
+                bivariate_comparison(
+                    datatype, dataset, cfg,
+                    behavior="conditions_MDV_comparison",
+                    out_table_dir=out_table_dir)
+            if len(dataset.metadata_df["timepoint"].unique()) >= 2:
+                logger.info("assessing MDV (Mass Distribution Vector) "
+                            "between time-points")
+                bivariate_comparison(
+                    datatype, dataset, cfg,
+                    behavior="timepoints_MDV_comparison",
+                    out_table_dir=out_table_dir)
+
+        if (len(cfg.analysis.conditions) >= 2) and (
+           len(dataset.metadata_df["timepoint"].unique()) >= 2):
+            for datatype in ["abundances", "mean_enrichment"]:
+                if datatype in dataset.compartmentalized_dfs.keys():
+                    logger.info(f"Running bi-variate analysis with "
+                                f"{datatype} to compare "
+                                f"time course profiles between conditions")
+                    bivariate_comparison(
+                        datatype, dataset, cfg,
+                        behavior="conditions_metabolite_time_profiles",
+                        out_table_dir=out_table_dir)
+
+    def check_expectations(self, cfg: DictConfig, dataset: Dataset) -> None:
+        # check that necessary information is provided in the analysis config
+        try:
+            if ((len(cfg.analysis.conditions) < 2) and
+                    (len(dataset.metadata_df["timepoint"].unique()) < 2)):
+                raise ValueError("Less than 2 conditions, "
+                                 "AND less than 2 timepoints, "
+                                 "impossible to run bi-variate analysis, "
+                                 "aborting")
+            if not set(cfg.analysis.conditions).issubset(
+                        set(dataset.metadata_df['condition'])):
+                raise ValueError(
+                    "Conditions provided for bi-variate analysis "
+                    "in the config file "
+                    "are not present in the metadata file, aborting"
+                )
+        except ConfigAttributeError as e:
+            logger.error(
+                f"Mandatory parameter not provided in the config file:{e}, "
+                f"aborting")
+            sys.exit(1)
+        except ValueError as e:
+            logger.error(f"Data inconsistency:{e}")
+            sys.exit(1)
\ No newline at end of file
diff --git a/src/dimet/processing/bivariate_analysis.py b/src/dimet/processing/bivariate_analysis.py
new file mode 100644
index 0000000..f712bfd
--- /dev/null
+++ b/src/dimet/processing/bivariate_analysis.py
@@ -0,0 +1,438 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+@author: Johanna Galvis, Macha Nikolski
+"""
+import logging
+import operator
+import os
+
+from typing import List, Dict
+
+import numpy as np
+import pandas as pd
+import scipy.stats as stats
+from dimet.constants import (assert_literal, availtest_methods_type,
+                             data_files_keys_type)
+from dimet.data import Dataset
+from dimet.helpers import (arg_repl_zero2value,
+                           compute_padj,
+                           row_wise_nanstd_reduction)
+from omegaconf import DictConfig
+
+logger = logging.getLogger(__name__)
+
+
+def compute_statistical_correlation(df: pd.DataFrame,
+                                    test: str) -> pd.DataFrame:
+    """
+    computes correlation test,  e.g. pearson
+    row-wise for each dataframe
+    """
+    stat_list = []
+    pvalue_list = []
+    for i, metabolite in enumerate(list(df['metabolite'])):
+        # array of n-(timepoints or m+x) geometrical means values
+        array_1 = df.loc[metabolite, "gmean_arr_1"]
+        array_2 = df.loc[metabolite, "gmean_arr_2"]
+        if test == "pearson":
+            try:
+                stat_res, pvalue = stats.pearsonr(array_1, array_2)
+            except ValueError:
+                stat_res, pvalue = (np.nan, np.nan)
+
+        elif test == "spearman":
+            try:
+                stat_res, pvalue = stats.spearmanr(array_1, array_2)
+            except ValueError:
+                stat_res, pvalue = (np.nan, np.nan)
+
+        stat_list.append(stat_res)
+        if np.isnan(stat_res):
+            pvalue_list.append(np.nan)
+        else:
+            pvalue_list.append(pvalue)
+
+    df["correlation_coefficient"] = stat_list
+    df["pvalue"] = pvalue_list
+
+    return df
+
+
+def compute_test_for_df_dict(df_dict: Dict[str, pd.DataFrame],
+                             test: str) -> Dict[str, pd.DataFrame]:
+    """parses a dictionary of dataframes
+       computes bivariate test : e.g. pearson
+    """
+    for akey in df_dict.keys():
+        df = df_dict[akey].copy()
+        df.index = df['metabolite']
+
+        df_dict[akey] = compute_statistical_correlation(df, test)
+
+    return df_dict
+
+
+def compute_bivariate_by_behavior(
+        df: pd.DataFrame, metadata_df: pd.DataFrame, comparison: List[str],
+        behavior: str, test: str) -> Dict[str, pd.DataFrame]:
+    """
+    performs two steps:
+    1. calls functions to compute geometric means, obtaining df's inside dict
+    2. computes the bivariate statistical test (pearson by default)
+    """
+    if behavior == "conditions_MDV_comparison":
+        df_dict = conditions_MDV_gmean_df_dict(df, metadata_df, comparison)
+
+    elif behavior == "timepoints_MDV_comparison":
+        df_dict = timepoints_MDV_gmean_df_dict(df, metadata_df, comparison)
+
+    elif behavior == "conditions_metabolite_time_profiles":
+        # only abundances or mean enrichment processed
+        df_dict = metabolite_time_profiles_gmean_df_dict(
+            df, metadata_df, comparison)
+
+    # 2. compute the statistical test
+    df_dict = compute_test_for_df_dict(df_dict, test)
+
+    return df_dict
+
+
+def conditions_MDV_gmean_df_dict(df: pd.DataFrame, metadata_df: pd.DataFrame,
+                                 comparison: List[str]
+                                 ) -> Dict[str, pd.DataFrame]:
+    """
+    Note: e.g. comparison [Ctl, Treated1]
+    Separately by time-point:
+    Using isotopologue proportions, by metabolite and condition,
+     computes the arrays of geometric means, ordered by m+x.
+     Outputs dict of df, each df with columns arr_1 and arr_2 (np arrays);
+             the keys of the dict are the time-points
+    """
+    clue_isotopologue_df = compute_isotopologue_meaning(list(df.index))
+    metabolites_uniq = clue_isotopologue_df["metabolite"].unique()
+    df_dict = dict()
+    for timepoint in list(metadata_df["timepoint"].unique()):
+        inner_gmean_dict = {"metabolite": metabolites_uniq,
+                            "gmean_arr_1": list(),  "gmean_arr_2": list()}
+        metadata_df_tp = metadata_df.loc[
+                         metadata_df["timepoint"] == timepoint, :]
+        df_timepoint = df[metadata_df_tp['name_to_plot']].copy()
+        for k, condition in enumerate(comparison):
+            metadata_df_tp_cond = metadata_df_tp.loc[
+                                  metadata_df_tp["condition"] == condition, :]
+            df_one_group = df_timepoint[
+                metadata_df_tp_cond['name_to_plot']].copy()
+
+            inner_gmean_dict = inner_gmean_dict_filler(
+                k, inner_gmean_dict, df_one_group, clue_isotopologue_df)
+
+        df_dict[timepoint] = pd.DataFrame(inner_gmean_dict)
+
+    return df_dict
+
+
+def timepoints_MDV_gmean_df_dict(df: pd.DataFrame, metadata_df: pd.DataFrame,
+                                 comparison: List[str]
+                                 ) -> Dict[str, pd.DataFrame]:
+    """
+    Note: e.g. comparison [T1, T0]
+    Separately by condition:
+     Using isotopologue proportions, by metabolite and time-point,
+     computes the arrays of geometric means, ordered by m+x.
+     Outputs dict of df, the keys of the dict are the conditions
+    """
+    clue_isotopologue_df = compute_isotopologue_meaning(list(df.index))
+    metabolites_uniq = clue_isotopologue_df["metabolite"].unique()
+    df_dict = dict()
+    for condition in list(metadata_df['condition'].unique()):
+        metadata_df_sub1 = metadata_df.loc[
+                         metadata_df["condition"] == condition, :]
+
+        df_sub1 = df[metadata_df_sub1['name_to_plot']].copy()
+        inner_gmean_dict = {"metabolite": metabolites_uniq,
+                            "gmean_arr_1": list(), "gmean_arr_2": list()}
+
+        for k, timepoint in enumerate(comparison):
+            metadata_df_a_gr = metadata_df_sub1.loc[
+                               metadata_df_sub1["timepoint"] == timepoint, :]
+            df_a_group = df_sub1[metadata_df_a_gr['name_to_plot']].copy()
+            inner_gmean_dict = inner_gmean_dict_filler(
+                k, inner_gmean_dict, df_a_group, clue_isotopologue_df)
+
+        df_dict[condition] = pd.DataFrame(inner_gmean_dict)
+
+    return df_dict
+
+
+def inner_gmean_dict_filler(
+        k: int, inner_gmean_dict: Dict[str, List[None]],
+        df_a_group: pd.DataFrame,   clue_isotopologue_df: pd.DataFrame
+) -> Dict[str, List[np.array]]:
+    """
+    For MDV, fills the dictionary (of one single condition or time point)
+    inner_gmean_dict = {"metabolite": metabolites_uniq,
+                          "gmean_arr_1": list(), "gmean_arr_2": list()}, where
+    this function replaces (fills) 'gmean_arr_..' lists only.
+    output:  inner_gmean_dict, filled:
+        'gmean_arr_..' list of np.arrays (one np.array by metabolite),
+        (respects the order of the metabolites in the 'metabolite' key).
+        Note: each elem in np. array respects the order of isotopologues m+x.
+    """
+    assert k in [0, 1], "k can only take value 0 or 1"
+    metabolites_uniq = clue_isotopologue_df["metabolite"].unique()
+    # compute the arrays of geometric means
+    df_a_group["gmean"] = np.around(df_a_group.apply(
+        lambda x: stats.gmean(x.dropna()), axis=1), decimals=6)
+    df_a_group['gmean'] = modify_gmean_by_sanity(df_a_group)
+    df_a_group["isotopologue_name"] = list(df_a_group.index)
+    # order these geometric means by m+x
+    merged_df = pd.merge(df_a_group, clue_isotopologue_df, how='left',
+                         on="isotopologue_name").sort_values(
+                         by=["metabolite", "m+x"])
+    MDV_ordered_list_gmeans = list()
+    for i, metabolite in enumerate(metabolites_uniq):
+        mdv_arr = merged_df.loc[merged_df["metabolite"] == metabolite,
+                                "gmean"].values
+        MDV_ordered_list_gmeans.append(np.array(mdv_arr))
+    key_name = f'gmean_arr_{k + 1}'
+    inner_gmean_dict[key_name] = MDV_ordered_list_gmeans
+
+    return inner_gmean_dict
+
+
+def modify_gmean_by_sanity(df: pd.DataFrame) -> np.array:
+    """
+    Accepts the df of samples of one single group, with gmean column (last).
+    If at least 2 no-NaN samples, gmean value is preserved,
+     otherwise gmean value is replaced by np.nan.
+    """
+    gmean_values = df["gmean"].to_numpy()
+    df_samples = df.drop(columns=["gmean"])
+    # Count non-NaN cells for each  row
+    no_nan_count = df_samples.count(axis=1).to_numpy()
+    for i in range(len(gmean_values)):
+        if no_nan_count[i] < 2:
+            gmean_values[i] = np.nan
+    return gmean_values
+
+
+def metabolite_time_profiles_gmean_df_dict(
+        df: pd.DataFrame, metadata_df: pd.DataFrame, comparison: List[str]
+) -> Dict[str, pd.DataFrame]:
+    """
+    Using mean enrichment or abundances,
+    computes the arrays of geometric means, ordered by time.
+    Outputs dict of df of arrays, for comparing 2 condition time profiles.
+    """
+    metadata_df = metadata_df.loc[
+                  metadata_df['condition'].isin(comparison), :]
+    metadata_df['timenum'] = metadata_df['timenum'].astype(float)
+    metadata_df_sorted = metadata_df.sort_values(by="timenum")
+    df = df[metadata_df_sorted['name_to_plot']]
+    metabolites = list(df.index)
+    inner_gmean_dict = {"metabolite": metabolites,
+                        "gmean_arr_1": list(), "gmean_arr_2": list()}
+    for k, condition in enumerate(comparison):
+        metadata_sorted2 = metadata_df_sorted.loc[
+                metadata_df_sorted["condition"] == condition, :].copy()
+        tmp_gmean_time_dict = {}
+        for curr_time in list(metadata_sorted2['timepoint'].unique()):
+            metadata_sorted3 = metadata_sorted2.loc[
+                metadata_sorted2['timepoint'] == curr_time, :].copy()
+            data_time_df = df[metadata_sorted3['name_to_plot']].copy()
+            data_time_df = data_time_df.assign(
+                gmean=data_time_df.apply(lambda x: stats.gmean(x.dropna()),
+                                         axis=1))
+            data_time_df['gmean'] = modify_gmean_by_sanity(data_time_df)
+            tmp_gmean_time_dict[curr_time] = data_time_df["gmean"].tolist()
+        tmp_df = pd.DataFrame(tmp_gmean_time_dict)
+        #  tmp_df :   T0       T2h  ...
+        # 0  0.471528  0.719920  ...
+        # 1  3.692766  1.743812   ...
+        key_name = f'gmean_arr_{k + 1}'
+        inner_gmean_dict[key_name] = list(np.around(tmp_df.to_numpy(), 6))
+
+    return {'metabo_time_profile': pd.DataFrame(inner_gmean_dict)}
+
+
+def compute_isotopologue_meaning(isotopologues_list: List[str]
+                                 ) -> pd.DataFrame:
+    """output: df :
+    isotopologue_name  metabolite  m+x
+    Cit_m+0            Cit         0
+    ...
+    """
+    isotopologues_uniq = sorted(list(np.unique(np.array(isotopologues_list))))
+    clue_isotopologue = pd.DataFrame({
+        "isotopologue_name": isotopologues_uniq
+    })
+    clue_isotopologue[["metabolite", "m+x"]] = (
+        clue_isotopologue["isotopologue_name"].str.split(
+         "_m+", expand=True, regex=False))
+    clue_isotopologue["m+x"] = clue_isotopologue["m+x"].astype(int)
+    clue_isotopologue = clue_isotopologue.sort_values(
+        by=["metabolite", "m+x"])
+    return clue_isotopologue
+
+
+def conditions_to_comparisons(conditions: List[str]) -> List[List[str]]:
+    """
+    for the bi-variate analysis generates comparisons ONLY FOR CONDITIONS
+    example:  input: conditions = [A, B, C]
+    output : [[A, B], [A, C], [B, C]]
+    The order of the comparison in each inner list, e.g. [A, B] or [B, A]
+    does not affect the result of the bi-variate test (verified)
+    """
+    comparisons = list()
+    for i in conditions:
+        for j in conditions:
+            if i != j:
+                pair = np.sort(np.array([i, j]), axis=None)
+                if list(pair) not in comparisons:
+                    comparisons.append(list(pair))
+    return comparisons
+
+
+def timepoints_to_comparisons(metadata_df: pd.DataFrame) -> List[List[str]]:
+    """generates comparisons specifically as consecutive time points
+    output: e.g. [[T0, T1], [T1, T2]]"""
+    # order the time by the numeric part
+    time_df = metadata_df[["timepoint", "timenum"]].copy().drop_duplicates()
+    time_df["timenum"] = time_df["timenum"].astype(float)
+    time_df = time_df.sort_values(by='timenum')
+    comparisons = list()
+    for i in range(time_df.shape[0] - 1):
+        j = i + 1
+        comparisons.append([
+            time_df["timepoint"].tolist()[j], time_df["timepoint"].tolist()[i]])
+
+    return comparisons
+
+
+def set_comparisons_by_behavior(behavior: str, cfg: DictConfig,
+                                metadata_df: pd.DataFrame) -> List[List[str]]:
+    """
+    For the bi-variate analysis, builds a list of lists, each list is a
+    comparison. The 'behavior' determines if conditions (1) or time-points (2)
+    Output:
+        e.g.(1): comparisons = [['Ctl', 'Treated1'], ['Ctl', 'Treated2']]
+        or e.g.(2): comparisons = [[T1, T0], [T2, T1]]
+    """
+    if behavior in ["conditions_metabolite_time_profiles",
+                    "conditions_MDV_comparison"]:
+        comparisons = conditions_to_comparisons(cfg.analysis.conditions)
+    elif behavior == "timepoints_MDV_comparison":
+        comparisons = timepoints_to_comparisons(metadata_df)
+    return comparisons
+
+
+def save_output(result_df: pd.DataFrame, compartment: str, dataset: Dataset,
+                file_name: data_files_keys_type, out_file_name_str: str,
+                test: str, out_table_dir: str, cfg: DictConfig) -> None:
+    """saves result to tab delimited file, for one comparison"""
+
+    result = result_df.sort_values(["correlation_coefficient", "padj"],
+                                   ascending=True)
+    for name_col in ['correlation_coefficient', 'pvalue', 'padj']:
+        result[name_col] = np.around(result[name_col].to_numpy(), 6)
+
+    result['compartment'] = compartment
+    out_order_columns = ['correlation_coefficient', 'pvalue',
+                         'padj', 'compartment', 'gmean_arr_1', 'gmean_arr_2']
+    result = result[out_order_columns]  # fix the order of columns
+    if not cfg.analysis.method.output_include_gmean_arr_columns:
+        result = result.drop(columns=['gmean_arr_1', 'gmean_arr_2'])
+
+    base_file_name = dataset.get_file_for_label(file_name)
+    base_file_name += f"--{compartment}-{out_file_name_str}-{test}"
+    output_file_name = os.path.join(out_table_dir,
+                                    f"{base_file_name}.tsv")
+    result.to_csv(
+        output_file_name,
+        index_label="metabolite", header=True, sep="\t"
+    )
+    logger.info(f"Saved the result in {output_file_name}")
+
+
+def bivariate_run_and_save_current_comparison(
+        file_name: data_files_keys_type, df: pd.DataFrame,
+        metadata_df: pd.DataFrame, compartment: str, dataset: Dataset,
+        cfg: DictConfig, comparison: List[str],
+        behavior: str, test: str, out_table_dir: str) -> None:
+    """
+    Runs a bivariate analysis for blocks of values, handling 3 behavior types:
+    a - conditions_MDV_comparison:
+        comparison of the MDV arrays between 2 user specified conditions (separately for each time point)
+    b - timepoints_MDV_comparison:
+        comparison of the MDV arrays between 2 consecutive time points (separately for each condition)
+    c - conditions_metabolite_time_profiles:
+        comparison of the time course profiles of the metabolites
+        total abundances and mean enrichment, between two conditions
+    Finally, computes correction for multiple tests, and saves results
+    """
+    df_dict = compute_bivariate_by_behavior(
+         df, metadata_df, comparison, behavior, test
+    )
+    for akey in df_dict.keys():
+        df = df_dict[akey]
+
+        df.set_index("metabolite", inplace=True)
+
+        # apply multiple test correction method to df results
+        result_df = compute_padj(df, 0.05,
+                                 cfg.analysis.method.correction_method)
+
+        comparison_str = "-".join(comparison)
+        out_file_name_str = f"-MDV-{comparison_str}--{akey}"
+        if akey == "metabo_time_profile":
+            out_file_name_str = f"{comparison_str}"
+
+        save_output(result_df, compartment, dataset, file_name,
+                    out_file_name_str, test, out_table_dir, cfg)
+
+
+def bivariate_comparison(
+        file_name: data_files_keys_type, dataset: Dataset, cfg: DictConfig,
+        behavior: str, out_table_dir: str
+) -> None:
+    """
+    Bi-variate analysis is performed on compartmentalized versions
+    of data files
+    Attention: we replace zero values using the provided method
+    Writes the table with computed statistics in the relevant output directory
+    """
+    assert_literal(file_name, data_files_keys_type, "file name")
+    assert behavior in ["conditions_metabolite_time_profiles",
+                        "conditions_MDV_comparison",
+                        "timepoints_MDV_comparison"], "wrong behavior chosen"
+    impute_value = cfg.analysis.method.impute_values[file_name]
+    test = cfg.analysis.method[behavior][file_name]  # e.g. pearson
+
+    for compartment, compartmentalized_df in \
+            dataset.compartmentalized_dfs[file_name].items():
+        df = compartmentalized_df
+        metadata_df_subset = dataset.metadata_df.loc[
+            dataset.metadata_df['compartment'] == compartment, :]
+        metadata_df_subset = metadata_df_subset.loc[
+            metadata_df_subset['condition'].isin(cfg.analysis.conditions), :]
+
+        df = df[metadata_df_subset['name_to_plot']]
+
+        val_instead_zero = arg_repl_zero2value(impute_value, df)
+        df = df.replace(to_replace=0, value=val_instead_zero)
+        # note: do not drop rows all zero or all nan, blocks can break !
+        if file_name == "abundances":  # only reduction values if abundances
+            df = row_wise_nanstd_reduction(df)
+        df = df.round(decimals=6)
+
+        automatic_comparisons = set_comparisons_by_behavior(
+            behavior, cfg, metadata_df_subset
+        )
+        for comparison in automatic_comparisons:
+            # e.g. comparison = ['A', 'B'], list of exactly two elements
+            bivariate_run_and_save_current_comparison(
+                file_name, df, metadata_df_subset, compartment,
+                dataset, cfg, comparison,
+                behavior,  # specifies the type of comparison
+                test, out_table_dir)
diff --git a/src/dimet/processing/differential_analysis.py b/src/dimet/processing/differential_analysis.py
index 01504e9..61dd37c 100644
--- a/src/dimet/processing/differential_analysis.py
+++ b/src/dimet/processing/differential_analysis.py
@@ -81,30 +81,6 @@ def distance_or_overlap(df: pd.DataFrame, groups: List) -> pd.DataFrame:
     return df
 
 
-def select_rows_with_sufficient_non_nan_values___previous_version(
-        df: pd.DataFrame) -> (pd.DataFrame, pd.DataFrame):
-    """
-    Identifies rows in the input DataFrame that have enough replicates.
-    Separates the DataFrame into two parts based on the presence or absence
-    of enough replicates,
-    returning them as separate DataFrames.
-    """
-    # TODO:  this is too stringent --> !
-    #  ! -- > changed  to: len([not NaN values]) >= 2  in each group
-    # see the  re-writen function select_rows_with_sufficient_non_nan_values
-    try:
-        bad_df = df[(df["count_nan_samples_group1"] > 0) | (
-                    df["count_nan_samples_group2"] > 0)]
-        good_df = df[(df["count_nan_samples_group1"] == 0) & (
-                    df["count_nan_samples_group2"] == 0)]
-
-    except Exception as e:
-        print(e)
-        print("Error in separate_before_stats (enough replicates ?)")
-
-    return good_df, bad_df
-
-
 def select_rows_with_sufficient_non_nan_values(
         df: pd.DataFrame, groups: List[List[str]]
 ) -> (pd.DataFrame, pd.DataFrame):
diff --git a/tests/test_bivariate_analysis.py b/tests/test_bivariate_analysis.py
new file mode 100644
index 0000000..22586ca
--- /dev/null
+++ b/tests/test_bivariate_analysis.py
@@ -0,0 +1,144 @@
+from unittest import TestCase
+
+import numpy as np
+import pandas as pd
+from scipy import stats
+
+from dimet.processing import bivariate_analysis
+
+
+class TestBivariateAnalysis(TestCase):
+
+    def test_conditions_to_comparisons(self):
+        conditions_list = ["A", "B", "C", "D"]
+        result = bivariate_analysis.conditions_to_comparisons(
+            conditions_list
+        )
+        self.assertListEqual(result[0], ['A', 'B'])
+        self.assertListEqual(result[1], ['A', 'C'])
+        self.assertListEqual(result[2], ['A', 'D'])
+        self.assertListEqual(result[3], ['B', 'C'])
+        self.assertListEqual(result[4], ['B', 'D'])
+
+    def test_metabolite_time_profiles_gmean_df_dict(self):
+        df = pd.DataFrame({'metabolite': [1, 2],
+                           'A-T0-1': [4.6, 4.6], 'A-T0-2': [6.5, 6.5],
+                           'A-T1-1': [2, 2],
+                           'A-T1-2': [10.1, 8.4], 'A-T4-1': [5.6, 3.6],
+                           'A-T4-1': [1.6, 1.8],
+                           'A-T7-1': [7.6, 7.6], 'A-T7-1': [8.1, 9.3],
+                           'B-T0-1': [8.1, 9.3], 'B-T0-2': [1.6, 1.8],
+                           'B-T1-1': [6, 7],
+                           'B-T1-2': [10.1, 8.4], 'B-T4-1': [3.2, 3.6],
+                           'B-T4-1': [6.5, 6.5],
+                           'B-T7-1': [4.6, 4.6], 'B-T7-1': [7.6, 7.6]})
+        metadata_df = pd.DataFrame({
+            'condition': ['A', 'A', 'A', 'A', 'A', 'A', 'A', 'A',
+                          'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B'],
+            'timenum': [0, 0, 1.5, 1.5, 4, 4, 7, 7,
+                        0, 0, 1.5, 1.5, 4, 4, 7, 7],
+            'timepoint': ['0h', '0h', '1.5h', '1.5h', '4h', '4h', '7h', '7h',
+                          '0h', '0h', '1.5h', '1.5h', '4h', '4h', '7h', '7h'],
+            'name_to_plot': ['A-T0-1', 'A-T0-2', 'A-T1-1', 'A-T1-2',
+                             'A-T4-1', 'A-T4-1', 'A-T7-1', 'A-T7-1',
+                             'B-T0-1', 'B-T0-2', 'B-T1-1', 'B-T1-2',
+                             'B-T4-1', 'B-T4-1', 'B-T7-1', 'B-T7-1']})
+        comparison = ["A", "B"]
+        result = bivariate_analysis.metabolite_time_profiles_gmean_df_dict(
+            df, metadata_df, comparison
+        )  # result :
+        # {'metabo_time_profile':
+        #  metabolite       gmean_arr_1                 gmean_arr_2
+        #  0    [5.468089, 4.494441, 1.6, 8.1]   [3.6, 7.7846, 6.5, 7.6]
+        #  1    [5.468089, 4.09878, 1.8, 9.3]  [4.091455, 7.668116, 6.5, 7.6]}
+        self.assertTrue(list(result.keys())[0] == 'metabo_time_profile')
+        self.assertListEqual(
+            result['metabo_time_profile'].iloc[0, 1].tolist(),
+            [5.468089, 4.494441, 1.6, 8.1])
+
+    def test_modify_gmean_by_sanity(self):
+        df = pd.DataFrame({'A-T0-1': [4.6, np.nan],
+                           'A-T0-2': [6.5, np.nan],
+                           'A-T0-3': [np.nan, 2]})
+        df.index = ["metabolite1", "metabolite2"]
+        df = df.assign(gmean=df.apply(lambda x: stats.gmean(x.dropna()),
+                                      axis=1))
+        # df.gmean.to_numpy() :  [5.46808925,  2. ]
+        result = bivariate_analysis.modify_gmean_by_sanity(df)
+        # result : [5.46808925, nan] np array
+        self.assertFalse(np.all(df["gmean"].to_numpy() == result))
+        self.assertAlmostEqual(result[0], 5.46808925, places=5)
+        self.assertTrue(np.isnan(result[1]))
+
+    def test_inner_gmean_dict_filler(self):
+        k = 0
+        inner_gmean_dict = {"metabolite": ['CoA', 'Ala'],
+                            "gmean_arr_1": list()}
+
+        df_a_group = pd.DataFrame({
+            'A-T0-1': [0.2, 0.3, 0.5, 0.1, 0.2, 0.3, 0.4],
+            'A-T0-2': [0.17, 0.33, 0.5, 0.1, 0.22, 0.27, 0.41],
+            'A-T0-3': [0.2, 0.28, 0.52, 0.11, 0.18, 0.29, 0.42]})
+        df_a_group.index = ['CoA_m+0', 'CoA_m+1', 'CoA_m+2',
+                            'Ala_m+0', 'Ala_m+1', 'Ala_m+2', 'Ala_m+3']
+
+        clue_isotopologue_df = pd.DataFrame({
+            'isotopologue_name': ['CoA_m+0', 'CoA_m+1', 'CoA_m+2',
+                                  'Ala_m+0', 'Ala_m+1', 'Ala_m+2', 'Ala_m+3'],
+            'metabolite': ['CoA', 'CoA', 'CoA', 'Ala', 'Ala', 'Ala', 'Ala'],
+            'm+x': [0, 1, 2, 0, 1, 2, 3]})
+        clue_isotopologue_df['m+x'] = clue_isotopologue_df['m+x'].astype(int)
+
+        result = bivariate_analysis.inner_gmean_dict_filler(
+            k, inner_gmean_dict, df_a_group, clue_isotopologue_df
+        )  # result:
+        # {'metabolite': ['CoA', 'Ala'],
+        # ' gmean_arr_1': [array([0.189454, 0.302643, 0.50658 ]),
+        #                 array([0.103228, 0.199331, 0.286392, 0.409919])]}
+        self.assertTrue(np.allclose(np.array(
+            [0.189454, 0.302643, 0.50658]),
+            result['gmean_arr_1'][0], rtol=1e-6))
+        self.assertTrue(np.allclose(np.array(
+            [0.103228, 0.199331, 0.286392, 0.409919]),
+            result['gmean_arr_1'][1], rtol=1e-6))
+
+    def test_compute_test_for_df_dict(self):
+        df = pd.DataFrame({
+            'metabolite': ['CoA', 'Ala'],
+            'gmean_arr_1': [np.array([0.1894, 0.3026, 0.506]),
+                            np.array([0.1032, 0.1993, 0.2863, 0.4099])],
+            'gmean_arr_2': [np.array([0.506, 0.3026, 0.1894]),
+                            np.array([0.4099, 0.2863, 0.1993, 0.1032])],
+        })
+        df_dict = {'T0': df}  # df comparing two conditions A vs B, at T0
+        test = "pearson"
+        result = bivariate_analysis.compute_test_for_df_dict(df_dict, test)
+        self.assertEqual('T0', list(result.keys())[0])
+        self.assertTrue(
+            set(list(['metabolite', 'pvalue', 'correlation_coefficient']
+                     )).issubset(set(list(result['T0'].columns)))
+        )
+
+    def test_compute_statistical_correlation(self):
+        df = pd.DataFrame({
+            'metabolite': ['CoA', 'Ala'],
+            'gmean_arr_1': [np.array([0.1894, 0.3026, 0.506]),
+                            np.array([0.1032, 0.1993, 0.2863, 0.4099])],
+            'gmean_arr_2': [np.array([0.506, 0.3026, 0.1894]),
+                            np.array([0.4099, 0.2863, 0.1993, 0.1032])],
+        })
+        df.index = ['CoA', 'Ala']
+        test = "pearson"
+        result = bivariate_analysis.compute_statistical_correlation(df, test)
+
+        self.assertAlmostEqual(result.loc['CoA', 'correlation_coefficient'],
+                               -0.947312, places=5)
+        self.assertAlmostEqual(result.loc['CoA', 'pvalue'],
+                               0.207574, places=5)
+        self.assertAlmostEqual(result.loc['Ala', 'correlation_coefficient'],
+                               -0.992587, places=5)
+        self.assertAlmostEqual(result.loc['Ala', 'pvalue'],
+                               0.007413, places=5)
+
+
+