Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds a new plot to ExplorationDiagnostics #168

Merged
merged 18 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 218 additions & 6 deletions optimas/diagnostics/exploration_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from warnings import warn
import pathlib
import json
from typing import Optional, List, Tuple, Union
from typing import Optional, List, Dict, Tuple, Union

import numpy as np
import numpy.typing as npt
Expand All @@ -15,6 +15,7 @@
from optimas.generators.base import Generator
from optimas.evaluators.base import Evaluator
from optimas.explorations import Exploration
from optimas.utils.other import get_df_with_selection


class ExplorationDiagnostics:
Expand Down Expand Up @@ -158,6 +159,7 @@ def plot_objective(
show_trace: Optional[bool] = False,
use_time_axis: Optional[bool] = False,
relative_start_time: Optional[bool] = True,
**subplots_kw,
) -> None:
"""Plot the values that where reached during the optimization.

Expand All @@ -177,7 +179,8 @@ def plot_objective(
relative_start_time : bool, optional
Whether the time axis should be relative to the start time
of the exploration. By default, True.

**subplots_kw
All additional keyword arguments are passed to the `pyplot.subplots` call.
"""
if fidelity_parameter is not None:
fidelity = self.history[fidelity_parameter]
Expand All @@ -204,7 +207,7 @@ def plot_objective(
else:
x = history.trial_index
xlabel = "Number of evaluations"
_, ax = plt.subplots()
_, ax = plt.subplots(**subplots_kw)
ax.scatter(x, history[objective.name], c=fidelity)
ax.set_ylabel(objective.name)
ax.set_xlabel(xlabel)
Expand All @@ -222,6 +225,7 @@ def plot_pareto_front(
self,
objectives: Optional[List[Union[str, Objective]]] = None,
show_best_evaluation_indices: Optional[bool] = False,
**subplots_kw,
) -> None:
"""Plot Pareto front of two optimization objectives.

Expand All @@ -233,6 +237,8 @@ def plot_pareto_front(
show_best_evaluation_indices : bool, optional
Whether to show the indices of the best evaluations. By default
``False``.
**subplots_kw
All additional keyword arguments are passed to the `pyplot.subplots` call.
"""
objectives = self._check_pareto_objectives(objectives)
pareto_evals = self.get_pareto_front_evaluations(objectives)
Expand All @@ -242,7 +248,7 @@ def plot_pareto_front(
y_pareto = pareto_evals[objectives[1].name].to_numpy()

# Create figure
_, axes = plt.subplots()
_, axes = plt.subplots(**subplots_kw)

# Plot all evaluations
axes.scatter(
Expand Down Expand Up @@ -284,7 +290,7 @@ def plot_pareto_front(
str(id),
(x_pareto[i], y_pareto[i]),
(2, -2),
fontsize=6,
fontsize="xx-small",
va="top",
textcoords="offset points",
)
Expand Down Expand Up @@ -479,6 +485,7 @@ def plot_worker_timeline(
self,
fidelity_parameter: Optional[str] = None,
relative_start_time: Optional[bool] = True,
**subplots_kw,
) -> None:
"""Plot the timeline of worker utilization.

Expand All @@ -490,8 +497,12 @@ def plot_worker_timeline(
relative_start_time : bool, optional
Whether the time axis should be relative to the start time
of the exploration. By default, True.
**subplots_kw
All additional keyword arguments are passed to the `pyplot.subplots` call.
"""
df = self.history
df = df[df.sim_id >= 0]

if fidelity_parameter is not None:
min_fidelity = df[fidelity_parameter].min()
max_fidelity = df[fidelity_parameter].max()
Expand All @@ -501,7 +512,7 @@ def plot_worker_timeline(
if relative_start_time:
sim_started_time = sim_started_time - df["gen_started_time"].min()
sim_ended_time = sim_ended_time - df["gen_started_time"].min()
_, ax = plt.subplots()
_, ax = plt.subplots(**subplots_kw)
for i in range(len(df)):
start = sim_started_time.iloc[i]
duration = sim_ended_time.iloc[i] - start
Expand All @@ -524,6 +535,207 @@ def plot_worker_timeline(
ax.set_ylabel("Worker")
ax.set_xlabel("Time (s)")

def plot_history(
self,
parnames: Optional[list] = None,
xname: Optional[str] = None,
select: Optional[Dict] = None,
sort: Optional[Dict] = None,
top: Optional[Dict] = None,
show_legend: Optional[bool] = False,
**subplots_kw,
) -> None:
"""Print selected parameters versus evaluation index.

Parameters
----------
parnames: list of strings, optional
List with the names of the parameters to show.
xname: string, optional
Name of the parameter to plot in the x axis.
By default is the index of the history DataFrame.
select: dict, optional
Contains a set of rules to filter the dataframe, e.g.
'f' : [None, -10.] (get data with f < -10)
sort: dict, optional
A dict containing as keys the names of the parameres to sort by
and, as values, a Bool indicating if ordering ascendingly (True)
or descendingly (False)
e.g. {'f': False} sort simulations according to f descendingly.
top: int, optional
Highight the top n simulations of every objective.
show_legend : bool, optional
Whether to show the legend.
**subplots_kw
All additional keyword arguments are passed to the `pyplot.subplots` call.
"""
# Copy the history DataFrame
df = self.history.copy()

# Get lists of variable names
objective_names = [obj.name for obj in self.objectives]
varpar_names = [var.name for var in self.varying_parameters]

# Order list of simulations and re-index
if sort is not None:
df = df.sort_values(
by=list(sort.keys()), ascending=tuple(sort.values())
).reset_index(drop=True)

# Define the quantity to plot in the x axis
if xname is not None:
xvalues = df[xname]
else:
xvalues = df.index

# Apply selection to the history DataFrame
if select is not None:
df_select = get_df_with_selection(df, select)
else:
df_select = None

# Select top cases in separate DataFrames
if top is not None:
df_top = []
for obj_name in objective_names:
for o in self.objectives:
if o.name == obj_name:
ascending = o.minimize
index_list = list(
df.sort_values(by=obj_name, ascending=ascending).index
)
df_top.append(df.loc[index_list[:top]])
else:
df_top = None

# Default list of parameters to show
if parnames is None:
parnames = objective_names.copy()
parnames.extend(varpar_names)

# Make figure
nplots = len(parnames)
_, axs = plt.subplots(nplots, 2, width_ratios=[0.8, 0.2], **subplots_kw)
plt.subplots_adjust(wspace=0.05)

# Actual plotting
ax_histy_list = []
histy_list = []
for i in range(nplots):
# Draw scatter plot
ax_scatter = axs[i][0]
ax_scatter.grid(color="lightgray", linestyle="dotted")
yvalues = df[parnames[i]]
ax_scatter.plot(xvalues, yvalues, "o")

# Draw selection
if df_select is not None:
xvalues_cut = df_select.index
if xname is not None:
xvalues_cut = df_select[xname]
yvalues_cut = df_select[parnames[i]]
ax_scatter.plot(xvalues_cut, yvalues_cut, "o", label="select")

# Draw top evaluations
if df_top is not None:
for df_t in df_top:
if xname is not None:
xvalues_top = df_t[xname]
else:
xvalues_top = df_t.index
yvalues_top = df_t[parnames[i]]
label = "top %i" % top
ax_scatter.plot(xvalues_top, yvalues_top, "o", label=label)

# Draw the trace only for `objective` parameters
if (
(parnames[i] in objective_names)
and (not sort)
and (xname is None)
):
for o in self.objectives:
if o.name == parnames[i]:
minimize = o.minimize
if minimize:
cum = df[parnames[i]].cummin().values
else:
cum = df[parnames[i]].cummax().values
ax_scatter.step(
xvalues, cum, zorder=-1, where="post", c="black"
)

# Draw projected histogram
ax_histy = axs[i][1]
ax_histy.grid(color="lightgray", linestyle="dotted")
ymin, ymax = ax_scatter.get_ylim()
nbins = 25
binwidth = (ymax - ymin) / nbins
bins = np.arange(ymin, ymax + binwidth, binwidth)
histy, *_ = ax_histy.hist(
yvalues,
bins=bins,
weights=100.0 * np.ones(len(yvalues)) / len(yvalues),
orientation="horizontal",
)

# Draw selection
if df_select is not None:
yvalues_cut = df_select[parnames[i]]
ax_histy.hist(
yvalues_cut,
bins=bins,
weights=100.0 * np.ones(len(yvalues_cut)) / len(yvalues),
orientation="horizontal",
label="selection",
)

# Draw top evaluations
if df_top is not None:
for df_t in df_top:
yvalues_top = df_t[parnames[i]]
label = "top %i" % top
ax_histy.hist(
yvalues_top,
bins=bins,
weights=100.0
* np.ones(len(yvalues_top))
/ len(yvalues),
orientation="horizontal",
label=label,
)

ax_histy.set_ylim(ax_scatter.get_ylim())

# Tuning axes and labels
ax_scatter.set_title(
parnames[i].replace("_", " "),
fontdict={"fontsize": "x-small"},
loc="right",
pad=2,
)

if i != nplots - 1:
ax_scatter.tick_params(labelbottom=False)
ax_histy.tick_params(labelbottom=False, labelleft=False)
else:
ax_scatter.set_xlabel("Evaluation number")
if xname is not None:
ax_scatter.set_xlabel(xname.replace("_", " "))
ax_histy.set_xlabel("%")
ax_histy.tick_params(labelbottom=True, labelleft=False)
if show_legend:
ax_histy.legend(fontsize="xx-small")

# Make loist of histograms and axes for further manipulation
# outside the loop
ax_histy_list.append(ax_histy)
histy_list.append(histy)

# Set the range of the histograms axes
histmax = 1.1 * max([h.max() for h in histy_list])
for i, ax_h in enumerate(ax_histy_list):
ax_h.set_xlim(-1, histmax)

def _check_pareto_objectives(
self,
objectives: Optional[List[Union[str, Objective]]] = None,
Expand Down
31 changes: 24 additions & 7 deletions optimas/explorations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import glob
import json
import time
from typing import Optional, Union, Dict, List, Literal

import numpy as np
Expand Down Expand Up @@ -359,6 +360,13 @@ def attach_evaluations(
a problem in the data. If set to `True`, the error will be ignored.
"""
evaluation_data = convert_to_dataframe(evaluation_data)

# Determine if evaluations come from past history array and, if so,
# keep only those that finished.
is_history = "sim_ended" in evaluation_data
if is_history:
evaluation_data = evaluation_data[evaluation_data["sim_ended"]]

n_evals = len(evaluation_data)
if n_evals == 0:
return
Expand Down Expand Up @@ -396,13 +404,22 @@ def attach_evaluations(
for field in fields:
if field in history_new.dtype.names:
history_new[field] = evaluation_data[field]
history_new["sim_started"] = True
history_new["sim_ended"] = True
history_new["trial_index"] = np.arange(
self.generator._trial_count,
self.generator._trial_count + n_evals,
dtype=int,
)

if not is_history:
current_time = time.time()
history_new["gen_started_time"] = current_time
history_new["gen_ended_time"] = current_time
history_new["gen_informed_time"] = current_time
history_new["sim_started_time"] = current_time
history_new["sim_ended_time"] = current_time
history_new["gen_informed"] = True
history_new["sim_started"] = True
history_new["sim_ended"] = True
history_new["trial_index"] = np.arange(
self.generator._trial_count,
self.generator._trial_count + n_evals,
dtype=int,
)

# Incorporate new history into generator.
self.generator.incorporate_history(history_new)
Expand Down
25 changes: 25 additions & 0 deletions optimas/utils/other.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,28 @@ def convert_to_dataframe(
return pd.DataFrame(data)
else:
raise ValueError(f"Cannot convert {type(data)} to a pandas dataframe.")


def get_df_with_selection(df: pd.DataFrame, select: Dict) -> pd.DataFrame:
"""Return the DataFrame after applying selection criterium.

Parameters
----------
df : DataFrame
The DataFrame object
select: dict
A dictionary containing the selection criteria to apply.
e.g. {'f' : [None, -10.]} (get data with f < -10)
"""
condition = ""
for key in select:
if select[key][0] is not None:
if condition != "":
condition += " and "
condition += "%s > %f" % (key, select[key][0])
if select[key][1] is not None:
if condition != "":
condition += " and "
condition += "%s < %f" % (key, select[key][1])

return df.query(condition)
Loading
Loading