From 2bd54142400dffa616d268fefba72daaf0d15c6a Mon Sep 17 00:00:00 2001 From: delaossa Date: Wed, 24 Jan 2024 17:09:23 +0100 Subject: [PATCH 01/15] Implement `plot_history` in `ExplorationDiagnostics` --- .../diagnostics/exploration_diagnostics.py | 194 ++++++++++++++++++ tests/test_exploration_diagnostics.py | 4 + 2 files changed, 198 insertions(+) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index 9b8a1e96..a1dfb480 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -524,6 +524,200 @@ def plot_worker_timeline( ax.set_ylabel("Worker") ax.set_xlabel("Time (s)") + def get_df_with_selection( + self, + df: pd.DataFrame, + select: dict + ) -> pd.DataFrame: + """Return the DataFrame after applying selection criterium. + + Parameters + ---------- + df : DataFrame + The DataFrame object + select: dict + A dictionary containing the selection criteria to apply. + e.g. {'f' : [None, -10.]} (get data with f < -10) + """ + condition = '' + for key in select: + if select[key][0] is not None: + if condition != '': + condition += ' and ' + condition += '%s > %f' % (key, select[key][0]) + if select[key][1] is not None: + if condition != '': + condition += ' and ' + condition += '%s < %f' % (key, select[key][1]) + print('Selecting according to the condition: ', condition) + return df.query(condition) + + def plot_history( + self, + parnames: Optional[list] = None, + xname: Optional[str] = None, + select: Optional[dict] = None, + sort: Optional[dict] = None, + top: Optional[dict] = None + ) -> None: + """Print selected parameters versus simulation index. + + Parameters + ---------- + parnames: list of strings, optional + List with the names of the parameters to show. + + xname: string, optional + Name of the parameter to plot in the x axis. + By default is the index of the history DataFrame. + + select: dict, optional + Contains a set of rules to filter the dataframe, e.g. + 'f' : [None, -10.] (get data with f < -10) + + sort: dict, optional + A dict containing as keys the names of the parameres to sort by + and, as values, a Bool indicating if ordering ascendingly (True) + or descendingly (False) + e.g. {'f': False} sort simulations according to `f` descendingly. + + top: int, optional + Highight the top n simulations according to the objectives. + """ + + # Copy the history DataFrame + df = self.history.copy() + + # Get lists of variable names + objective_names = [obj.name for obj in self.objectives] + varpar_names = [var.name for var in self.varying_parameters] + + # Order list of simulations and re-index + if sort is not None: + df = df.sort_values(by=list(sort.keys()), ascending=tuple(sort.values())).reset_index(drop=True) + + # Define the quantity to plot in the x axis + if xname is not None: + xvalues = df[xname] + else: + xvalues = list(df.index) + + # Apply selection to the history DataFrame + if select is not None: + df_select = self.get_df_with_selection(df, select) + else: + df_select = None + + # Select top cases in separate DataFrames + if top is not None: + df_top = [] + for obj_name in objective_names: + for o in self.objectives: + if o.name == obj_name: + ascending = o.minimize + index_list = list(df.sort_values(by=obj_name, ascending=ascending).index) + df_top.append(df.loc[index_list[:top]]) + else: + df_top = None + + # Default list of parameters to show + if parnames is None: + parnames = objective_names.copy() + parnames.extend(varpar_names) + + # Make figure + nplots = len(parnames) + + # Definitions for the axes + l_margin, width1, width2 = 0.08, 0.72, 0.15 + b_margin, t_margin = 0.1, 0.04 + xspacing = 0.015 + yspacing = 0.04 + height = (1. - b_margin - t_margin - (nplots - 1) * yspacing) / nplots + nbins = 25 + + # Actual plotting + ax_histy_list = [] + histy_list = [] + for i in range(nplots): + bottom1 = b_margin + (nplots - 1 - i) * (yspacing + height) + rect_scatter = [l_margin, bottom1, width1, height] + rect_histy = [l_margin + width1 + xspacing, bottom1, width2, height] + + h = df[parnames[i]] + ax_scatter = plt.axes(rect_scatter) + plt.grid(color='lightgray', linestyle='dotted') + plt.plot(xvalues, h, 'o') + if df_select is not None: + xvalues_cut = list(df_select.index) + if xname is not None: + xvalues_cut = df_select[xname] + h_cut = df_select[parnames[i]] + plt.plot(xvalues_cut, h_cut, 'o') + + if df_top is not None: + for df_t in df_top: + if xname is not None: + xvalues_top = df_t[xname] + else: + xvalues_top = list(df_t.index) + h_top = df_t[parnames[i]] + plt.plot(xvalues_top, h_top, 'o') + + # Plot cummin only when proceeds + if (parnames[i] in objective_names) and (not sort) and (xname is None): + for o in self.objectives: + if o.name == parnames[i]: + minimize = o.minimize + if minimize: + cum = df[parnames[i]].cummin().values + else: + cum = df[parnames[i]].cummax().values + plt.plot(xvalues, cum, '-', c='black') + + plt.title(parnames[i].replace('_', ' '), fontdict={'fontsize': 'x-small'}, loc='right', pad=2) + + ax_histy = plt.axes(rect_histy) + plt.grid(color='lightgray', linestyle='dotted') + ymin, ymax = ax_scatter.get_ylim() + binwidth = (ymax - ymin) / nbins + bins = np.arange(ymin, ymax + binwidth, binwidth) + histy, _, _ = ax_histy.hist(h, bins=bins, + weights=100. * np.ones(len(h)) / len(h), orientation='horizontal') + + if df_select is not None: + h_cut = df_select[parnames[i]] + ax_histy.hist(h_cut, bins=bins, + weights=100. * np.ones(len(h_cut)) / len(h), + orientation='horizontal') + + if df_top is not None: + for df_t in df_top: + h_top = df_t[parnames[i]] + ax_histy.hist(h_top, bins=bins, + weights=100. * np.ones(len(h_top)) / len(h), + orientation='horizontal') + + ax_histy.set_ylim(ax_scatter.get_ylim()) + + ax_histy_list.append(ax_histy) + histy_list.append(histy) + + if i != nplots - 1: + ax_scatter.tick_params(labelbottom=False) + ax_histy.tick_params(labelbottom=False, labelleft=False) + ax_histy.set_xticks([]) + else: + ax_histy.tick_params(labelbottom=False, labelleft=False) + ax_histy.set_xticks([]) + ax_scatter.set_xlabel('Evaluation number') + if xname is not None: + ax_scatter.set_xlabel(xname.replace('_', ' ')) + + histmax = 1.1 * max([h.max() for h in histy_list]) + for i, ax_h in enumerate(ax_histy_list): + ax_h.set_xlim(-1, histmax) + def _check_pareto_objectives( self, objectives: Optional[List[Union[str, Objective]]] = None, diff --git a/tests/test_exploration_diagnostics.py b/tests/test_exploration_diagnostics.py index 8e5aed54..be9fec7a 100644 --- a/tests/test_exploration_diagnostics.py +++ b/tests/test_exploration_diagnostics.py @@ -89,6 +89,10 @@ def test_exploration_diagnostics(): diags.plot_worker_timeline() plt.savefig(os.path.join(exploration_dir_path, "timeline.png")) + plt.clf() + diags.plot_history(top=3) + plt.savefig(os.path.join(exploration_dir_path, "history.png")) + # Check the simulation paths. delete_index = 10 if i == 0: From 73b2e0da767fbbfe6d8b7ee2f9a432a2f825f264 Mon Sep 17 00:00:00 2001 From: delaossa Date: Thu, 25 Jan 2024 15:30:13 +0100 Subject: [PATCH 02/15] Fine tuning implementation of `plot_history` --- .../diagnostics/exploration_diagnostics.py | 47 ++++++------------- optimas/utils/other.py | 28 +++++++++++ tests/test_exploration_diagnostics.py | 1 - 3 files changed, 42 insertions(+), 34 deletions(-) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index a1dfb480..f8db63d1 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -10,11 +10,13 @@ import numpy.typing as npt import pandas as pd import matplotlib.pyplot as plt +from matplotlib.figure import Figure from optimas.core import VaryingParameter, Objective, Parameter from optimas.generators.base import Generator from optimas.evaluators.base import Evaluator from optimas.explorations import Exploration +from optimas.utils.other import get_df_with_selection class ExplorationDiagnostics: @@ -284,7 +286,7 @@ def plot_pareto_front( str(id), (x_pareto[i], y_pareto[i]), (2, -2), - fontsize=6, + fontsize="xx-small", va="top", textcoords="offset points", ) @@ -524,41 +526,14 @@ def plot_worker_timeline( ax.set_ylabel("Worker") ax.set_xlabel("Time (s)") - def get_df_with_selection( - self, - df: pd.DataFrame, - select: dict - ) -> pd.DataFrame: - """Return the DataFrame after applying selection criterium. - - Parameters - ---------- - df : DataFrame - The DataFrame object - select: dict - A dictionary containing the selection criteria to apply. - e.g. {'f' : [None, -10.]} (get data with f < -10) - """ - condition = '' - for key in select: - if select[key][0] is not None: - if condition != '': - condition += ' and ' - condition += '%s > %f' % (key, select[key][0]) - if select[key][1] is not None: - if condition != '': - condition += ' and ' - condition += '%s < %f' % (key, select[key][1]) - print('Selecting according to the condition: ', condition) - return df.query(condition) - def plot_history( self, parnames: Optional[list] = None, xname: Optional[str] = None, select: Optional[dict] = None, sort: Optional[dict] = None, - top: Optional[dict] = None + top: Optional[dict] = None, + fig: Optional[Figure] = None, ) -> None: """Print selected parameters versus simulation index. @@ -583,6 +558,9 @@ def plot_history( top: int, optional Highight the top n simulations according to the objectives. + + fig: Figure, optional + Matplotlib Figure object to use for this plot. """ # Copy the history DataFrame @@ -604,7 +582,7 @@ def plot_history( # Apply selection to the history DataFrame if select is not None: - df_select = self.get_df_with_selection(df, select) + df_select = get_df_with_selection(df, select) else: df_select = None @@ -626,6 +604,9 @@ def plot_history( parnames.extend(varpar_names) # Make figure + if fig is None: + _ = plt.figure() + nplots = len(parnames) # Definitions for the axes @@ -664,7 +645,7 @@ def plot_history( h_top = df_t[parnames[i]] plt.plot(xvalues_top, h_top, 'o') - # Plot cummin only when proceeds + # Plot trace only when proceeds if (parnames[i] in objective_names) and (not sort) and (xname is None): for o in self.objectives: if o.name == parnames[i]: @@ -673,7 +654,7 @@ def plot_history( cum = df[parnames[i]].cummin().values else: cum = df[parnames[i]].cummax().values - plt.plot(xvalues, cum, '-', c='black') + plt.step(xvalues, cum, where='post', ls='-', c='black') plt.title(parnames[i].replace('_', ' '), fontdict={'fontsize': 'x-small'}, loc='right', pad=2) diff --git a/optimas/utils/other.py b/optimas/utils/other.py index 497da62a..6f3def80 100644 --- a/optimas/utils/other.py +++ b/optimas/utils/other.py @@ -65,3 +65,31 @@ def convert_to_dataframe( return pd.DataFrame(data) else: raise ValueError(f"Cannot convert {type(data)} to a pandas dataframe.") + + +def get_df_with_selection( + df: pd.DataFrame, + select: dict +) -> pd.DataFrame: + """Return the DataFrame after applying selection criterium. + + Parameters + ---------- + df : DataFrame + The DataFrame object + select: dict + A dictionary containing the selection criteria to apply. + e.g. {'f' : [None, -10.]} (get data with f < -10) + """ + condition = '' + for key in select: + if select[key][0] is not None: + if condition != '': + condition += ' and ' + condition += '%s > %f' % (key, select[key][0]) + if select[key][1] is not None: + if condition != '': + condition += ' and ' + condition += '%s < %f' % (key, select[key][1]) + print('Selecting according to the condition: ', condition) + return df.query(condition) \ No newline at end of file diff --git a/tests/test_exploration_diagnostics.py b/tests/test_exploration_diagnostics.py index be9fec7a..b2365136 100644 --- a/tests/test_exploration_diagnostics.py +++ b/tests/test_exploration_diagnostics.py @@ -89,7 +89,6 @@ def test_exploration_diagnostics(): diags.plot_worker_timeline() plt.savefig(os.path.join(exploration_dir_path, "timeline.png")) - plt.clf() diags.plot_history(top=3) plt.savefig(os.path.join(exploration_dir_path, "history.png")) From 4bbef2bfcb4b395f51f3eb40c1866a30e513e9e4 Mon Sep 17 00:00:00 2001 From: delaossa Date: Fri, 26 Jan 2024 10:26:04 +0100 Subject: [PATCH 03/15] Add legend and comments --- .../diagnostics/exploration_diagnostics.py | 107 ++++++++++-------- optimas/utils/other.py | 2 +- tests/test_exploration_diagnostics.py | 2 +- 3 files changed, 64 insertions(+), 47 deletions(-) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index f8db63d1..40eccbc5 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -533,9 +533,10 @@ def plot_history( select: Optional[dict] = None, sort: Optional[dict] = None, top: Optional[dict] = None, - fig: Optional[Figure] = None, + show_legend: Optional[bool] = False, + **subplots_kw ) -> None: - """Print selected parameters versus simulation index. + """Print selected parameters versus evaluation index. Parameters ---------- @@ -554,13 +555,16 @@ def plot_history( A dict containing as keys the names of the parameres to sort by and, as values, a Bool indicating if ordering ascendingly (True) or descendingly (False) - e.g. {'f': False} sort simulations according to `f` descendingly. + e.g. {'f': False} sort simulations according to f descendingly. top: int, optional - Highight the top n simulations according to the objectives. + Highight the top n simulations of every objective. - fig: Figure, optional - Matplotlib Figure object to use for this plot. + show_legend : bool, optional + Whether to show the legend. + + **subplots_kw + All additional keyword arguments are passed to the `pyplot.subplots` call. """ # Copy the history DataFrame @@ -604,38 +608,33 @@ def plot_history( parnames.extend(varpar_names) # Make figure - if fig is None: - _ = plt.figure() - nplots = len(parnames) + _, axs = plt.subplots(nplots, 2, + width_ratios=[0.8, 0.2], + **subplots_kw) + plt.subplots_adjust(wspace=0.05) - # Definitions for the axes - l_margin, width1, width2 = 0.08, 0.72, 0.15 - b_margin, t_margin = 0.1, 0.04 - xspacing = 0.015 - yspacing = 0.04 - height = (1. - b_margin - t_margin - (nplots - 1) * yspacing) / nplots - nbins = 25 - # Actual plotting ax_histy_list = [] histy_list = [] - for i in range(nplots): - bottom1 = b_margin + (nplots - 1 - i) * (yspacing + height) - rect_scatter = [l_margin, bottom1, width1, height] - rect_histy = [l_margin + width1 + xspacing, bottom1, width2, height] - + for i in range(nplots): + + # Draw scatter plot + ax_scatter = axs[i][0] + ax_scatter.grid(color='lightgray', linestyle='dotted') h = df[parnames[i]] - ax_scatter = plt.axes(rect_scatter) - plt.grid(color='lightgray', linestyle='dotted') - plt.plot(xvalues, h, 'o') + ax_scatter.plot(xvalues, h, 'o') + + # Draw selection if df_select is not None: xvalues_cut = list(df_select.index) if xname is not None: xvalues_cut = df_select[xname] h_cut = df_select[parnames[i]] - plt.plot(xvalues_cut, h_cut, 'o') + ax_scatter.plot(xvalues_cut, h_cut, 'o', + label='select') + # Draw top evaluations if df_top is not None: for df_t in df_top: if xname is not None: @@ -643,9 +642,11 @@ def plot_history( else: xvalues_top = list(df_t.index) h_top = df_t[parnames[i]] - plt.plot(xvalues_top, h_top, 'o') + label='top %i' % top + ax_scatter.plot(xvalues_top, h_top, 'o', + label=label) - # Plot trace only when proceeds + # Draw the trace only for `objective` parameters if (parnames[i] in objective_names) and (not sort) and (xname is None): for o in self.objectives: if o.name == parnames[i]: @@ -654,50 +655,66 @@ def plot_history( cum = df[parnames[i]].cummin().values else: cum = df[parnames[i]].cummax().values - plt.step(xvalues, cum, where='post', ls='-', c='black') + ax_scatter.step(xvalues, cum, zorder=-1, where='post', c='black') - plt.title(parnames[i].replace('_', ' '), fontdict={'fontsize': 'x-small'}, loc='right', pad=2) - ax_histy = plt.axes(rect_histy) - plt.grid(color='lightgray', linestyle='dotted') + # Draw projected histogram + ax_histy = axs[i][1] + ax_histy.grid(color='lightgray', linestyle='dotted') ymin, ymax = ax_scatter.get_ylim() + nbins = 25 binwidth = (ymax - ymin) / nbins bins = np.arange(ymin, ymax + binwidth, binwidth) - histy, _, _ = ax_histy.hist(h, bins=bins, - weights=100. * np.ones(len(h)) / len(h), orientation='horizontal') + histy, *_ = ax_histy.hist(h, bins=bins, + weights=100. * np.ones(len(h)) / len(h), + orientation='horizontal') + # Draw selection if df_select is not None: h_cut = df_select[parnames[i]] ax_histy.hist(h_cut, bins=bins, weights=100. * np.ones(len(h_cut)) / len(h), - orientation='horizontal') + orientation='horizontal', + label='selection') + # Draw top evaluations if df_top is not None: for df_t in df_top: h_top = df_t[parnames[i]] + label = 'top %i' % top ax_histy.hist(h_top, bins=bins, weights=100. * np.ones(len(h_top)) / len(h), - orientation='horizontal') + orientation='horizontal', + label=label) ax_histy.set_ylim(ax_scatter.get_ylim()) - ax_histy_list.append(ax_histy) - histy_list.append(histy) + # Tuning axes and labels + ax_scatter.set_title(parnames[i].replace('_', ' '), + fontdict={'fontsize': 'x-small'}, + loc='right', pad=2) if i != nplots - 1: ax_scatter.tick_params(labelbottom=False) ax_histy.tick_params(labelbottom=False, labelleft=False) - ax_histy.set_xticks([]) else: - ax_histy.tick_params(labelbottom=False, labelleft=False) - ax_histy.set_xticks([]) ax_scatter.set_xlabel('Evaluation number') if xname is not None: ax_scatter.set_xlabel(xname.replace('_', ' ')) - - histmax = 1.1 * max([h.max() for h in histy_list]) - for i, ax_h in enumerate(ax_histy_list): - ax_h.set_xlim(-1, histmax) + ax_histy.set_xlabel('%') + ax_histy.tick_params(labelbottom=True, labelleft=False) + if show_legend: + ax_histy.legend(fontsize='xx-small') + + # Make loist of histograms and axes for further manipulation + # outside the loop + ax_histy_list.append(ax_histy) + histy_list.append(histy) + + # Set the range of the histograms axes + histmax = 1.1 * max([h.max() for h in histy_list]) + for i, ax_h in enumerate(ax_histy_list): + ax_h.set_xlim(-1, histmax) def _check_pareto_objectives( self, diff --git a/optimas/utils/other.py b/optimas/utils/other.py index 6f3def80..617e0d73 100644 --- a/optimas/utils/other.py +++ b/optimas/utils/other.py @@ -91,5 +91,5 @@ def get_df_with_selection( if condition != '': condition += ' and ' condition += '%s < %f' % (key, select[key][1]) - print('Selecting according to the condition: ', condition) + return df.query(condition) \ No newline at end of file diff --git a/tests/test_exploration_diagnostics.py b/tests/test_exploration_diagnostics.py index b2365136..2f8b0b0b 100644 --- a/tests/test_exploration_diagnostics.py +++ b/tests/test_exploration_diagnostics.py @@ -89,7 +89,7 @@ def test_exploration_diagnostics(): diags.plot_worker_timeline() plt.savefig(os.path.join(exploration_dir_path, "timeline.png")) - diags.plot_history(top=3) + diags.plot_history(top=5, show_legend=True) plt.savefig(os.path.join(exploration_dir_path, "history.png")) # Check the simulation paths. From 3f1bc49e9147764c25c100e844a9198b8cfbdb65 Mon Sep 17 00:00:00 2001 From: delaossa Date: Fri, 26 Jan 2024 11:47:20 +0100 Subject: [PATCH 04/15] Add `**subplots_kw` to other ploting functions. --- .../diagnostics/exploration_diagnostics.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index 40eccbc5..fdddd299 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -160,6 +160,7 @@ def plot_objective( show_trace: Optional[bool] = False, use_time_axis: Optional[bool] = False, relative_start_time: Optional[bool] = True, + **subplots_kw, ) -> None: """Plot the values that where reached during the optimization. @@ -179,7 +180,8 @@ def plot_objective( relative_start_time : bool, optional Whether the time axis should be relative to the start time of the exploration. By default, True. - + **subplots_kw + All additional keyword arguments are passed to the `pyplot.subplots` call. """ if fidelity_parameter is not None: fidelity = self.history[fidelity_parameter] @@ -206,7 +208,7 @@ def plot_objective( else: x = history.trial_index xlabel = "Number of evaluations" - _, ax = plt.subplots() + _, ax = plt.subplots(**subplots_kw) ax.scatter(x, history[objective.name], c=fidelity) ax.set_ylabel(objective.name) ax.set_xlabel(xlabel) @@ -224,6 +226,7 @@ def plot_pareto_front( self, objectives: Optional[List[Union[str, Objective]]] = None, show_best_evaluation_indices: Optional[bool] = False, + **subplots_kw, ) -> None: """Plot Pareto front of two optimization objectives. @@ -235,6 +238,8 @@ def plot_pareto_front( show_best_evaluation_indices : bool, optional Whether to show the indices of the best evaluations. By default ``False``. + **subplots_kw + All additional keyword arguments are passed to the `pyplot.subplots` call. """ objectives = self._check_pareto_objectives(objectives) pareto_evals = self.get_pareto_front_evaluations(objectives) @@ -244,7 +249,7 @@ def plot_pareto_front( y_pareto = pareto_evals[objectives[1].name].to_numpy() # Create figure - _, axes = plt.subplots() + _, axes = plt.subplots(**subplots_kw) # Plot all evaluations axes.scatter( @@ -481,6 +486,7 @@ def plot_worker_timeline( self, fidelity_parameter: Optional[str] = None, relative_start_time: Optional[bool] = True, + **subplots_kw, ) -> None: """Plot the timeline of worker utilization. @@ -492,6 +498,8 @@ def plot_worker_timeline( relative_start_time : bool, optional Whether the time axis should be relative to the start time of the exploration. By default, True. + **subplots_kw + All additional keyword arguments are passed to the `pyplot.subplots` call. """ df = self.history if fidelity_parameter is not None: @@ -503,7 +511,7 @@ def plot_worker_timeline( if relative_start_time: sim_started_time = sim_started_time - df["gen_started_time"].min() sim_ended_time = sim_ended_time - df["gen_started_time"].min() - _, ax = plt.subplots() + _, ax = plt.subplots(**subplots_kw) for i in range(len(df)): start = sim_started_time.iloc[i] duration = sim_ended_time.iloc[i] - start @@ -542,27 +550,21 @@ def plot_history( ---------- parnames: list of strings, optional List with the names of the parameters to show. - xname: string, optional Name of the parameter to plot in the x axis. By default is the index of the history DataFrame. - select: dict, optional Contains a set of rules to filter the dataframe, e.g. 'f' : [None, -10.] (get data with f < -10) - sort: dict, optional A dict containing as keys the names of the parameres to sort by and, as values, a Bool indicating if ordering ascendingly (True) or descendingly (False) e.g. {'f': False} sort simulations according to f descendingly. - top: int, optional Highight the top n simulations of every objective. - show_legend : bool, optional Whether to show the legend. - **subplots_kw All additional keyword arguments are passed to the `pyplot.subplots` call. """ From 684febaf2b02f481b6a5946106b5bcdb0536eabd Mon Sep 17 00:00:00 2001 From: delaossa Date: Fri, 26 Jan 2024 12:49:18 +0100 Subject: [PATCH 05/15] Filter out non-simulated evaluations when doing plots vs running time --- optimas/diagnostics/exploration_diagnostics.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index fdddd299..c799c6bc 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -201,6 +201,7 @@ def plot_objective( history = self.history history = history[history.sim_ended] if use_time_axis: + history = history[history.sim_id >= 0] x = history.sim_ended_time xlabel = "Time (s)" if relative_start_time: @@ -502,6 +503,8 @@ def plot_worker_timeline( All additional keyword arguments are passed to the `pyplot.subplots` call. """ df = self.history + df = df[df.sim_id >= 0] + if fidelity_parameter is not None: min_fidelity = df[fidelity_parameter].min() max_fidelity = df[fidelity_parameter].max() From dac9a932a39b517b60fa26265ffe8d2eb101deb0 Mon Sep 17 00:00:00 2001 From: delaossa Date: Fri, 26 Jan 2024 13:09:30 +0100 Subject: [PATCH 06/15] Minor tweaks --- .../diagnostics/exploration_diagnostics.py | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index c799c6bc..09482eab 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -587,7 +587,7 @@ def plot_history( if xname is not None: xvalues = df[xname] else: - xvalues = list(df.index) + xvalues = df.index # Apply selection to the history DataFrame if select is not None: @@ -627,16 +627,16 @@ def plot_history( # Draw scatter plot ax_scatter = axs[i][0] ax_scatter.grid(color='lightgray', linestyle='dotted') - h = df[parnames[i]] - ax_scatter.plot(xvalues, h, 'o') + yvalues = df[parnames[i]] + ax_scatter.plot(xvalues, yvalues, 'o') # Draw selection if df_select is not None: - xvalues_cut = list(df_select.index) + xvalues_cut = df_select.index if xname is not None: xvalues_cut = df_select[xname] - h_cut = df_select[parnames[i]] - ax_scatter.plot(xvalues_cut, h_cut, 'o', + yvalues_cut = df_select[parnames[i]] + ax_scatter.plot(xvalues_cut, yvalues_cut, 'o', label='select') # Draw top evaluations @@ -645,10 +645,10 @@ def plot_history( if xname is not None: xvalues_top = df_t[xname] else: - xvalues_top = list(df_t.index) - h_top = df_t[parnames[i]] + xvalues_top = df_t.index + yvalues_top = df_t[parnames[i]] label='top %i' % top - ax_scatter.plot(xvalues_top, h_top, 'o', + ax_scatter.plot(xvalues_top, yvalues_top, 'o', label=label) # Draw the trace only for `objective` parameters @@ -670,25 +670,25 @@ def plot_history( nbins = 25 binwidth = (ymax - ymin) / nbins bins = np.arange(ymin, ymax + binwidth, binwidth) - histy, *_ = ax_histy.hist(h, bins=bins, - weights=100. * np.ones(len(h)) / len(h), + histy, *_ = ax_histy.hist(yvalues, bins=bins, + weights=100. * np.ones(len(yvalues)) / len(yvalues), orientation='horizontal') # Draw selection if df_select is not None: - h_cut = df_select[parnames[i]] - ax_histy.hist(h_cut, bins=bins, - weights=100. * np.ones(len(h_cut)) / len(h), + yvalues_cut = df_select[parnames[i]] + ax_histy.hist(yvalues_cut, bins=bins, + weights=100. * np.ones(len(yvalues_cut)) / len(yvalues), orientation='horizontal', label='selection') # Draw top evaluations if df_top is not None: for df_t in df_top: - h_top = df_t[parnames[i]] + yvalues_top = df_t[parnames[i]] label = 'top %i' % top - ax_histy.hist(h_top, bins=bins, - weights=100. * np.ones(len(h_top)) / len(h), + ax_histy.hist(yvalues_top, bins=bins, + weights=100. * np.ones(len(yvalues_top)) / len(yvalues), orientation='horizontal', label=label) From 5d17cf4fa8629113f6711f114e77b98702ad5b07 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 13:23:22 +0000 Subject: [PATCH 07/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../diagnostics/exploration_diagnostics.py | 126 ++++++++++-------- optimas/utils/other.py | 21 ++- 2 files changed, 81 insertions(+), 66 deletions(-) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index 09482eab..29d5727d 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -181,7 +181,7 @@ def plot_objective( Whether the time axis should be relative to the start time of the exploration. By default, True. **subplots_kw - All additional keyword arguments are passed to the `pyplot.subplots` call. + All additional keyword arguments are passed to the `pyplot.subplots` call. """ if fidelity_parameter is not None: fidelity = self.history[fidelity_parameter] @@ -240,7 +240,7 @@ def plot_pareto_front( Whether to show the indices of the best evaluations. By default ``False``. **subplots_kw - All additional keyword arguments are passed to the `pyplot.subplots` call. + All additional keyword arguments are passed to the `pyplot.subplots` call. """ objectives = self._check_pareto_objectives(objectives) pareto_evals = self.get_pareto_front_evaluations(objectives) @@ -500,7 +500,7 @@ def plot_worker_timeline( Whether the time axis should be relative to the start time of the exploration. By default, True. **subplots_kw - All additional keyword arguments are passed to the `pyplot.subplots` call. + All additional keyword arguments are passed to the `pyplot.subplots` call. """ df = self.history df = df[df.sim_id >= 0] @@ -538,14 +538,14 @@ def plot_worker_timeline( ax.set_xlabel("Time (s)") def plot_history( - self, + self, parnames: Optional[list] = None, - xname: Optional[str] = None, - select: Optional[dict] = None, - sort: Optional[dict] = None, + xname: Optional[str] = None, + select: Optional[dict] = None, + sort: Optional[dict] = None, top: Optional[dict] = None, show_legend: Optional[bool] = False, - **subplots_kw + **subplots_kw, ) -> None: """Print selected parameters versus evaluation index. @@ -560,7 +560,7 @@ def plot_history( Contains a set of rules to filter the dataframe, e.g. 'f' : [None, -10.] (get data with f < -10) sort: dict, optional - A dict containing as keys the names of the parameres to sort by + A dict containing as keys the names of the parameres to sort by and, as values, a Bool indicating if ordering ascendingly (True) or descendingly (False) e.g. {'f': False} sort simulations according to f descendingly. @@ -569,19 +569,21 @@ def plot_history( show_legend : bool, optional Whether to show the legend. **subplots_kw - All additional keyword arguments are passed to the `pyplot.subplots` call. + All additional keyword arguments are passed to the `pyplot.subplots` call. """ # Copy the history DataFrame df = self.history.copy() - # Get lists of variable names + # Get lists of variable names objective_names = [obj.name for obj in self.objectives] varpar_names = [var.name for var in self.varying_parameters] # Order list of simulations and re-index if sort is not None: - df = df.sort_values(by=list(sort.keys()), ascending=tuple(sort.values())).reset_index(drop=True) + df = df.sort_values( + by=list(sort.keys()), ascending=tuple(sort.values()) + ).reset_index(drop=True) # Define the quantity to plot in the x axis if xname is not None: @@ -602,7 +604,9 @@ def plot_history( for o in self.objectives: if o.name == obj_name: ascending = o.minimize - index_list = list(df.sort_values(by=obj_name, ascending=ascending).index) + index_list = list( + df.sort_values(by=obj_name, ascending=ascending).index + ) df_top.append(df.loc[index_list[:top]]) else: df_top = None @@ -611,24 +615,21 @@ def plot_history( if parnames is None: parnames = objective_names.copy() parnames.extend(varpar_names) - + # Make figure nplots = len(parnames) - _, axs = plt.subplots(nplots, 2, - width_ratios=[0.8, 0.2], - **subplots_kw) + _, axs = plt.subplots(nplots, 2, width_ratios=[0.8, 0.2], **subplots_kw) plt.subplots_adjust(wspace=0.05) # Actual plotting ax_histy_list = [] histy_list = [] - for i in range(nplots): - + for i in range(nplots): # Draw scatter plot ax_scatter = axs[i][0] - ax_scatter.grid(color='lightgray', linestyle='dotted') + ax_scatter.grid(color="lightgray", linestyle="dotted") yvalues = df[parnames[i]] - ax_scatter.plot(xvalues, yvalues, 'o') + ax_scatter.plot(xvalues, yvalues, "o") # Draw selection if df_select is not None: @@ -636,8 +637,7 @@ def plot_history( if xname is not None: xvalues_cut = df_select[xname] yvalues_cut = df_select[parnames[i]] - ax_scatter.plot(xvalues_cut, yvalues_cut, 'o', - label='select') + ax_scatter.plot(xvalues_cut, yvalues_cut, "o", label="select") # Draw top evaluations if df_top is not None: @@ -647,12 +647,15 @@ def plot_history( else: xvalues_top = df_t.index yvalues_top = df_t[parnames[i]] - label='top %i' % top - ax_scatter.plot(xvalues_top, yvalues_top, 'o', - label=label) - - # Draw the trace only for `objective` parameters - if (parnames[i] in objective_names) and (not sort) and (xname is None): + label = "top %i" % top + ax_scatter.plot(xvalues_top, yvalues_top, "o", label=label) + + # Draw the trace only for `objective` parameters + if ( + (parnames[i] in objective_names) + and (not sort) + and (xname is None) + ): for o in self.objectives: if o.name == parnames[i]: minimize = o.minimize @@ -660,58 +663,73 @@ def plot_history( cum = df[parnames[i]].cummin().values else: cum = df[parnames[i]].cummax().values - ax_scatter.step(xvalues, cum, zorder=-1, where='post', c='black') - - + ax_scatter.step( + xvalues, cum, zorder=-1, where="post", c="black" + ) + # Draw projected histogram ax_histy = axs[i][1] - ax_histy.grid(color='lightgray', linestyle='dotted') + ax_histy.grid(color="lightgray", linestyle="dotted") ymin, ymax = ax_scatter.get_ylim() nbins = 25 binwidth = (ymax - ymin) / nbins bins = np.arange(ymin, ymax + binwidth, binwidth) - histy, *_ = ax_histy.hist(yvalues, bins=bins, - weights=100. * np.ones(len(yvalues)) / len(yvalues), - orientation='horizontal') + histy, *_ = ax_histy.hist( + yvalues, + bins=bins, + weights=100.0 * np.ones(len(yvalues)) / len(yvalues), + orientation="horizontal", + ) # Draw selection if df_select is not None: yvalues_cut = df_select[parnames[i]] - ax_histy.hist(yvalues_cut, bins=bins, - weights=100. * np.ones(len(yvalues_cut)) / len(yvalues), - orientation='horizontal', - label='selection') + ax_histy.hist( + yvalues_cut, + bins=bins, + weights=100.0 * np.ones(len(yvalues_cut)) / len(yvalues), + orientation="horizontal", + label="selection", + ) # Draw top evaluations if df_top is not None: for df_t in df_top: yvalues_top = df_t[parnames[i]] - label = 'top %i' % top - ax_histy.hist(yvalues_top, bins=bins, - weights=100. * np.ones(len(yvalues_top)) / len(yvalues), - orientation='horizontal', - label=label) - + label = "top %i" % top + ax_histy.hist( + yvalues_top, + bins=bins, + weights=100.0 + * np.ones(len(yvalues_top)) + / len(yvalues), + orientation="horizontal", + label=label, + ) + ax_histy.set_ylim(ax_scatter.get_ylim()) # Tuning axes and labels - ax_scatter.set_title(parnames[i].replace('_', ' '), - fontdict={'fontsize': 'x-small'}, - loc='right', pad=2) + ax_scatter.set_title( + parnames[i].replace("_", " "), + fontdict={"fontsize": "x-small"}, + loc="right", + pad=2, + ) if i != nplots - 1: ax_scatter.tick_params(labelbottom=False) ax_histy.tick_params(labelbottom=False, labelleft=False) else: - ax_scatter.set_xlabel('Evaluation number') + ax_scatter.set_xlabel("Evaluation number") if xname is not None: - ax_scatter.set_xlabel(xname.replace('_', ' ')) - ax_histy.set_xlabel('%') + ax_scatter.set_xlabel(xname.replace("_", " ")) + ax_histy.set_xlabel("%") ax_histy.tick_params(labelbottom=True, labelleft=False) if show_legend: - ax_histy.legend(fontsize='xx-small') + ax_histy.legend(fontsize="xx-small") - # Make loist of histograms and axes for further manipulation + # Make loist of histograms and axes for further manipulation # outside the loop ax_histy_list.append(ax_histy) histy_list.append(histy) diff --git a/optimas/utils/other.py b/optimas/utils/other.py index 617e0d73..0747b03e 100644 --- a/optimas/utils/other.py +++ b/optimas/utils/other.py @@ -67,10 +67,7 @@ def convert_to_dataframe( raise ValueError(f"Cannot convert {type(data)} to a pandas dataframe.") -def get_df_with_selection( - df: pd.DataFrame, - select: dict -) -> pd.DataFrame: +def get_df_with_selection(df: pd.DataFrame, select: dict) -> pd.DataFrame: """Return the DataFrame after applying selection criterium. Parameters @@ -81,15 +78,15 @@ def get_df_with_selection( A dictionary containing the selection criteria to apply. e.g. {'f' : [None, -10.]} (get data with f < -10) """ - condition = '' + condition = "" for key in select: if select[key][0] is not None: - if condition != '': - condition += ' and ' - condition += '%s > %f' % (key, select[key][0]) + if condition != "": + condition += " and " + condition += "%s > %f" % (key, select[key][0]) if select[key][1] is not None: - if condition != '': - condition += ' and ' - condition += '%s < %f' % (key, select[key][1]) + if condition != "": + condition += " and " + condition += "%s < %f" % (key, select[key][1]) - return df.query(condition) \ No newline at end of file + return df.query(condition) From 7847b68e6ee4f09bd860b9a207b6c1bb1b38f454 Mon Sep 17 00:00:00 2001 From: delaossa Date: Fri, 26 Jan 2024 15:00:37 +0100 Subject: [PATCH 08/15] Remove a blank line causing pydocstyle test to fail --- optimas/diagnostics/exploration_diagnostics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index 09482eab..b227f722 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -571,7 +571,6 @@ def plot_history( **subplots_kw All additional keyword arguments are passed to the `pyplot.subplots` call. """ - # Copy the history DataFrame df = self.history.copy() From 430501ec944178447a5daf00eb2e53d4bb3373bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81ngel=20Ferran=20Pousa?= Date: Fri, 26 Jan 2024 17:00:16 +0100 Subject: [PATCH 09/15] Add missing data to history --- optimas/explorations/base.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/optimas/explorations/base.py b/optimas/explorations/base.py index 7edcea89..bc7a636b 100644 --- a/optimas/explorations/base.py +++ b/optimas/explorations/base.py @@ -3,6 +3,7 @@ import os import glob import json +import time from typing import Optional, Union, Dict, List, Literal import numpy as np @@ -396,6 +397,14 @@ def attach_evaluations( for field in fields: if field in history_new.dtype.names: history_new[field] = evaluation_data[field] + + current_time = time.time() + history_new["gen_started_time"] = current_time + history_new["gen_ended_time"] = current_time + history_new["gen_informed_time"] = current_time + history_new["sim_started_time"] = current_time + history_new["sim_ended_time"] = current_time + history_new["gen_informed"] = True history_new["sim_started"] = True history_new["sim_ended"] = True history_new["trial_index"] = np.arange( From 8dbbdff25c9816981121774c2137e6ff337fd9be Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 16:00:35 +0000 Subject: [PATCH 10/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- optimas/explorations/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimas/explorations/base.py b/optimas/explorations/base.py index bc7a636b..b1ea4850 100644 --- a/optimas/explorations/base.py +++ b/optimas/explorations/base.py @@ -397,7 +397,7 @@ def attach_evaluations( for field in fields: if field in history_new.dtype.names: history_new[field] = evaluation_data[field] - + current_time = time.time() history_new["gen_started_time"] = current_time history_new["gen_ended_time"] = current_time From ab495b19e9325b9d9ea50894930118c765d48755 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81ngel=20Ferran=20Pousa?= Date: Fri, 26 Jan 2024 17:03:59 +0100 Subject: [PATCH 11/15] Remove unused import --- optimas/diagnostics/exploration_diagnostics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index f1f04a6a..d17e49ac 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -10,7 +10,6 @@ import numpy.typing as npt import pandas as pd import matplotlib.pyplot as plt -from matplotlib.figure import Figure from optimas.core import VaryingParameter, Objective, Parameter from optimas.generators.base import Generator From 926c67ea9bbccd2cbcede5145586c343d15af9ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81ngel=20Ferran=20Pousa?= Date: Fri, 26 Jan 2024 17:41:48 +0100 Subject: [PATCH 12/15] Fic bug when loading history --- optimas/explorations/base.py | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/optimas/explorations/base.py b/optimas/explorations/base.py index bc7a636b..e4fb851f 100644 --- a/optimas/explorations/base.py +++ b/optimas/explorations/base.py @@ -360,6 +360,13 @@ def attach_evaluations( a problem in the data. If set to `True`, the error will be ignored. """ evaluation_data = convert_to_dataframe(evaluation_data) + + # Determine if evaluations come from past history array and, if so, + # keep only those that finished. + is_history = "sim_ended" in evaluation_data + if is_history: + evaluation_data = evaluation_data[evaluation_data["sim_ended"]] + n_evals = len(evaluation_data) if n_evals == 0: return @@ -398,20 +405,21 @@ def attach_evaluations( if field in history_new.dtype.names: history_new[field] = evaluation_data[field] - current_time = time.time() - history_new["gen_started_time"] = current_time - history_new["gen_ended_time"] = current_time - history_new["gen_informed_time"] = current_time - history_new["sim_started_time"] = current_time - history_new["sim_ended_time"] = current_time - history_new["gen_informed"] = True - history_new["sim_started"] = True - history_new["sim_ended"] = True - history_new["trial_index"] = np.arange( - self.generator._trial_count, - self.generator._trial_count + n_evals, - dtype=int, - ) + if not is_history: + current_time = time.time() + history_new["gen_started_time"] = current_time + history_new["gen_ended_time"] = current_time + history_new["gen_informed_time"] = current_time + history_new["sim_started_time"] = current_time + history_new["sim_ended_time"] = current_time + history_new["gen_informed"] = True + history_new["sim_started"] = True + history_new["sim_ended"] = True + history_new["trial_index"] = np.arange( + self.generator._trial_count, + self.generator._trial_count + n_evals, + dtype=int, + ) # Incorporate new history into generator. self.generator.incorporate_history(history_new) From abac990ab9d018635d27362b6420bf2b419b1667 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81ngel=20Ferran=20Pousa?= Date: Fri, 26 Jan 2024 17:49:01 +0100 Subject: [PATCH 13/15] Plot also attached evaluations --- optimas/diagnostics/exploration_diagnostics.py | 1 - 1 file changed, 1 deletion(-) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index d17e49ac..d9018959 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -200,7 +200,6 @@ def plot_objective( history = self.history history = history[history.sim_ended] if use_time_axis: - history = history[history.sim_id >= 0] x = history.sim_ended_time xlabel = "Time (s)" if relative_start_time: From f76c27f570c4504ec8722e0f541c6268af30e804 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 16:53:24 +0000 Subject: [PATCH 14/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- optimas/explorations/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimas/explorations/base.py b/optimas/explorations/base.py index e4fb851f..d52d28ad 100644 --- a/optimas/explorations/base.py +++ b/optimas/explorations/base.py @@ -404,7 +404,7 @@ def attach_evaluations( for field in fields: if field in history_new.dtype.names: history_new[field] = evaluation_data[field] - + if not is_history: current_time = time.time() history_new["gen_started_time"] = current_time From f423a19d9ec55ba490b9ad7a0a3270378428fab0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81ngel=20Ferran=20Pousa?= Date: Fri, 26 Jan 2024 17:58:07 +0100 Subject: [PATCH 15/15] `dict` -> `Dict` --- optimas/diagnostics/exploration_diagnostics.py | 8 ++++---- optimas/utils/other.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/optimas/diagnostics/exploration_diagnostics.py b/optimas/diagnostics/exploration_diagnostics.py index d9018959..204248a4 100644 --- a/optimas/diagnostics/exploration_diagnostics.py +++ b/optimas/diagnostics/exploration_diagnostics.py @@ -4,7 +4,7 @@ from warnings import warn import pathlib import json -from typing import Optional, List, Tuple, Union +from typing import Optional, List, Dict, Tuple, Union import numpy as np import numpy.typing as npt @@ -539,9 +539,9 @@ def plot_history( self, parnames: Optional[list] = None, xname: Optional[str] = None, - select: Optional[dict] = None, - sort: Optional[dict] = None, - top: Optional[dict] = None, + select: Optional[Dict] = None, + sort: Optional[Dict] = None, + top: Optional[Dict] = None, show_legend: Optional[bool] = False, **subplots_kw, ) -> None: diff --git a/optimas/utils/other.py b/optimas/utils/other.py index 0747b03e..29e56206 100644 --- a/optimas/utils/other.py +++ b/optimas/utils/other.py @@ -67,7 +67,7 @@ def convert_to_dataframe( raise ValueError(f"Cannot convert {type(data)} to a pandas dataframe.") -def get_df_with_selection(df: pd.DataFrame, select: dict) -> pd.DataFrame: +def get_df_with_selection(df: pd.DataFrame, select: Dict) -> pd.DataFrame: """Return the DataFrame after applying selection criterium. Parameters