From 49c8b5f0b6e983e3c213da9c13d610e953178c69 Mon Sep 17 00:00:00 2001 From: johaGL Date: Tue, 6 Aug 2024 14:51:28 +0200 Subject: [PATCH] clean code, flake8 fix linted errors, add important comments --- tracegroomer/__main__.py | 4 +- tracegroomer/tests/test_tidy.py | 4 +- tracegroomer/tests/test_utils.py | 24 ++++++------ tracegroomer/tidy.py | 63 +++++++++++++++++++------------- tracegroomer/utils.py | 5 +-- 5 files changed, 56 insertions(+), 44 deletions(-) diff --git a/tracegroomer/__main__.py b/tracegroomer/__main__.py index 317d26d..50b4df6 100644 --- a/tracegroomer/__main__.py +++ b/tracegroomer/__main__.py @@ -66,7 +66,7 @@ def prep_args() -> argparse.ArgumentParser: action=argparse.BooleanOptionalAction, default=True, help="Stomps fractional contributions (synonym: \ mean enrichment), and isotopologue proportions, \ - to max 1.0 and min 0.0") #meanenrich_or_fracfontrib + to max 1.0 and min 0.0") # meanenrich_or_fracfontrib # for total abundance only if VIB data parser.add_argument("--under_detection_limit_set_nan", @@ -95,7 +95,7 @@ def main() -> int: parser = prep_args() args = parser.parse_args() logger.info( - f"Running TraceGroomer with the following parameters:") + "Running TraceGroomer with the following parameters:") for x in vars(args).keys(): logger.info(f"{x} = {vars(args)[x]} ") diff --git a/tracegroomer/tests/test_tidy.py b/tracegroomer/tests/test_tidy.py index 373310c..fd5cfc4 100644 --- a/tracegroomer/tests/test_tidy.py +++ b/tracegroomer/tests/test_tidy.py @@ -126,8 +126,8 @@ def test_stomp_fraction_values(self): ) def test_pull_internal_standard(self): - df = pd.DataFrame({ "sample-a": [87, 64, 14], - "sample-b": [85, 37, 17]}) + df = pd.DataFrame({"sample-a": [87, 64, 14], + "sample-b": [85, 37, 17]}) df.index = ["X", "W_acid", "Z"] config = { "mean_enrichment": "FracContribs", "isotopologues": None, diff --git a/tracegroomer/tests/test_utils.py b/tracegroomer/tests/test_utils.py index dd70f82..26306bf 100644 --- a/tracegroomer/tests/test_utils.py +++ b/tracegroomer/tests/test_utils.py @@ -69,7 +69,7 @@ def test_compute_isotopologues_proportions_from_absolute(self): df, metabolites2isotopologues_df ) - self.assertAlmostEqual(result.loc['acCoA_m+0','sampleA2'], + self.assertAlmostEqual(result.loc['acCoA_m+0', 'sampleA2'], 0.529913, places=6) self.assertAlmostEqual(result.loc['acCoA_m+1', 'sampleB1'], 0.313908, places=6) @@ -121,14 +121,14 @@ def test_compute_sums_isotopol_props(self): def test_impute_custom_levels_to_df(self): melted_df = pd.DataFrame({ "metabolite": ["AcCoA", "AcCoA", "AcCoA", "unknown", "unknown", - "gly", "gly", "gly", "AcCoA", "AcCoA", "AcCoA", - "unknown", "unknown", "gly", "gly", "gly" ], - "isotopologue_type": [0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 2, 0, 1, 0, 1, - 2], - "samples": ["s1", "s1", "s1", "s1", "s1", "s1", "s1", "s1", "s2", - "s2", "s2", "s2", "s2", "s2", "s2", "s2"], - "value": [0.3, 0.6, 0.1, 0.4, 0.6, 0.2, 0.5, 0.3, - 0.25, 0.62, 0.15, 0.5, 0.5, 0.23, 0.46, 0.31] + "gly", "gly", "gly", "AcCoA", "AcCoA", "AcCoA", + "unknown", "unknown", "gly", "gly", "gly"], + "isotopologue_type": [0, 1, 2, 0, 1, 0, 1, 2, 0, 1, 2, + 0, 1, 0, 1, 2], + "samples": ["s1", "s1", "s1", "s1", "s1", "s1", "s1", "s1", "s2", + "s2", "s2", "s2", "s2", "s2", "s2", "s2"], + "value": [0.3, 0.6, 0.1, 0.4, 0.6, 0.2, 0.5, 0.3, + 0.25, 0.62, 0.15, 0.5, 0.5, 0.23, 0.46, 0.31] }) result = utils.impute_custom_levels_to_df(melted_df) @@ -188,8 +188,8 @@ def test_divide_by_amount_material(self): confdict = {"isotopologues": "MyIsotopes"} result = utils.divide_by_amount_material( - frames_dict,confdict, material_df=micrograms_weight, - alternative_method=True, metric="isotopologues" ) + frames_dict, confdict, material_df=micrograms_weight, + alternative_method=True, metric="isotopologues") witness = (df.loc["acCoA_m+1", "sampleA2"] / micrograms_weight.loc[ "sampleA2", "0"]) * micrograms_weight["0"].mean() @@ -202,7 +202,7 @@ def test_divide_by_amount_material(self): np.around( result['MyIsotopes'].loc['acCoA_m+0', :], 6 ) == np.array( - [ 10277.580938, 8869.973775, 6527.904882, 56000.880682])) + [10277.580938, 8869.973775, 6527.904882, 56000.880682])) ) self.assertTrue(np.all( np.around( diff --git a/tracegroomer/tidy.py b/tracegroomer/tidy.py index f31fdd1..6672439 100644 --- a/tracegroomer/tidy.py +++ b/tracegroomer/tidy.py @@ -30,6 +30,7 @@ def __init__(self, type_of_file): 'isotopologue_proportions', 'isotopologues', 'abundances'] + self.metabolites2isotopologues_df = None self.user_given_names = dict() def load_metadata(self, metadata_path): @@ -124,19 +125,21 @@ def transpose_frames(self): def load_metabolite_to_isotopologue_df(self, confdict): """df of correspondences between isotopologues and metabolites proper to the given data""" - try: - isotopologues_full = list(self.frames_dict[confdict[ - "isotopologue_proportions"]].index) - # ok apply same proposed solution whether Key or Type error: - except TypeError: - isotopologues_full = list(self.frames_dict[confdict[ - "isotopologues"]].index) - except KeyError: - isotopologues_full = list(self.frames_dict[confdict[ - "isotopologues"]].index) - - self.metabolites2isotopologues_df = ut.isotopologues_meaning_df( - isotopologues_full) + if (confdict['isotopologue_proportions'] is not None) or ( + confdict['isotopologues'] is not None): + try: + isotopologues_full = list(self.frames_dict[confdict[ + "isotopologue_proportions"]].index) + # ok apply same proposed solution whether Key or Type error: + except TypeError: + isotopologues_full = list(self.frames_dict[confdict[ + "isotopologues"]].index) + except KeyError: + isotopologues_full = list(self.frames_dict[confdict[ + "isotopologues"]].index) + + self.metabolites2isotopologues_df = ut.isotopologues_meaning_df( + isotopologues_full) def set_user_given_names(self, confdict): """ @@ -178,19 +181,23 @@ def update_truly_available_frames(self, confdict): for h in self.frames_dict.keys(): # if the quantification content and key exists if (self.frames_dict[h] is not None) and (h is not None): - avail_dict[reverse_dict[h]] = h - true_reverse_dict[h] = reverse_dict[h] + try: + avail_dict[reverse_dict[h]] = h + true_reverse_dict[h] = reverse_dict[h] + except Exception as e: + print(e) + continue self.available_frames = avail_dict self.reverse_available_frames = true_reverse_dict def save_isotopologues_preview(self, args, confdict, groom_out_path): - compartmentalized_dict = ut.df_to__dic_bycomp( - self.frames_dict[confdict['isotopologue_proportions']], - self.metadata) - output_plots_dir = os.path.join(groom_out_path, "preview_plots") if args.isotopologues_preview: - logger.info(f"prepare isotopologue proportions overview figures") + compartmentalized_dict = ut.df_to__dic_bycomp( + self.frames_dict[confdict['isotopologue_proportions']], + self.metadata) + output_plots_dir = os.path.join(groom_out_path, "preview_plots") + logger.info("prepare isotopologue proportions overview figures") if not os.path.exists(output_plots_dir): os.makedirs(output_plots_dir) ut.save_isos_preview( @@ -204,8 +211,8 @@ def pull_internal_standard(self, confdict, args): ): try: x = self.frames_dict[confdict['abundances']].columns.tolist() - y = self.frames_dict[confdict['abundances'] - ].loc[args.use_internal_standard, :].tolist() + y = self.frames_dict[confdict['abundances']].loc[ + args.use_internal_standard, :].tolist() instandard_abun_df = pd.DataFrame( {"sample": x, args.use_internal_standard: y @@ -255,8 +262,13 @@ def normalize_by_internal_standard(self, args, confdict): self.frames_dict = frames_dict def set_final_files_names(self): + """ + Set final names of output files: + 1. set final names dictionary and add as attribute, and + 2. assign the values of 1. to the keys of the object.frames_dict + """ not_user_defined_dict = ut.retrieve_dict_not_user_defined() - final_files_names_d = dict() + final_files_names_d = dict() # set final names dictionary for valuename in self.reverse_available_frames.keys(): keyname = self.reverse_available_frames[valuename] if keyname in list(self.user_given_names.keys()): @@ -267,7 +279,7 @@ def set_final_files_names(self): keyname] # end for frames_names_list = list(self.frames_dict.keys()) - for frame_name in frames_names_list: + for frame_name in frames_names_list: # assign final names to frames if frame_name == "abundances_computed": self.frames_dict[final_files_names_d[ "abundances"]] = self.frames_dict[frame_name] @@ -313,7 +325,8 @@ def drop_metabolites(self): def frames_filterby_min_admited_isotopol_proportions( self, confdict, isosprop_min_admitted: float ): - isos_propor_dic = self.frames_dict[confdict['isotopologue_proportions']] + isos_propor_dic = self.frames_dict[ + confdict['isotopologue_proportions']] bad_mets = dict() for co in isos_propor_dic.keys(): tmp = isos_propor_dic[co] diff --git a/tracegroomer/utils.py b/tracegroomer/utils.py index 355beb5..bd30615 100644 --- a/tracegroomer/utils.py +++ b/tracegroomer/utils.py @@ -401,7 +401,8 @@ def save_isos_preview(dict_isos_prop, metadata, output_plots_dir, dfmelt = impute_custom_levels_to_df(dfmelt) table_minimalbymet(dfmelt, os.path.join(output_plots_dir, f"minextremesIso_{k}.{output_extension}")) - outputfigure = os.path.join(output_plots_dir, f"allsampleIsos_{k}.pdf") + outputfigure = os.path.join( + output_plots_dir, f"allsampleIsos_{k}.pdf") figtitle = f"{k} compartment, Isotopologues (proportions) \ across all samples" save_rawisos_plot(dfmelt, figuretitle=figtitle, @@ -742,5 +743,3 @@ def transformmyisotopologues(isos_list, style) -> List[str]: outli = isos_list raise ValueError("isotopologues style not vib nor generic") return outli - -