Skip to content

Commit

Permalink
fixup 2
Browse files Browse the repository at this point in the history
  • Loading branch information
knelli2 committed Mar 5, 2024
1 parent 6fd0ea5 commit 47372cb
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 104 deletions.
185 changes: 83 additions & 102 deletions src/Visualization/Python/PlotMemoryMonitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import click
import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from spectre.Visualization.ReadH5 import available_subfiles, to_dataframe
Expand All @@ -36,16 +37,9 @@
# Plotting options
@click.option(
"--use-mb/--use-gb",
default=True,
show_default=True,
help="Plot the y-axis in Megabytes or Gigabytes",
)
@click.option(
"--color-components",
is_flag=True,
default=False,
show_default=True,
help="Plot individual parallel components in color instead of grey",
help="Plot the y-axis in Megabytes or Gigabytes",
)
@click.option(
"--x-label",
Expand Down Expand Up @@ -74,7 +68,6 @@ def plot_memory_monitors_command(
reduction_files: Sequence[str],
output: Optional[str],
use_mb: bool,
color_components: Optional[bool],
x_label: Optional[str],
x_bounds: Optional[Sequence[float]],
stylesheet,
Expand Down Expand Up @@ -107,15 +100,11 @@ def check_memory_monitor_dir(h5_filename: str):

return list(memory_monitor_dir.keys())

# Get a list of all components that we have monitored from the first
# reductions file
memory_filenames = check_memory_monitor_dir(reduction_files[0])
df_dict = {}

# Given an h5 file and a subfile, make sure that the subfile exists inside
# the h5 file. Then return the subfile as a DataFrame
def check_memory_monitor_file(h5_filename: str, subfile_name: str):
h5file = h5py.File(h5_filename, "r")
def check_memory_monitor_file(
h5_filename: str, h5file: h5py.File, subfile_name: str
):
subfile_path = f"/MemoryMonitors/{subfile_name}"
subfile = h5file.get(subfile_path)
if subfile_path is None:
Expand All @@ -127,24 +116,8 @@ def check_memory_monitor_file(h5_filename: str, subfile_name: str):

return to_dataframe(subfile)

# First just concat all individual memory monitors. This will also check
# that all components from the first reductions file are also in all
# subsequent reductions files
for memory_filename in memory_filenames:
df_dict[memory_filename] = pd.concat(
check_memory_monitor_file(reduction_file, memory_filename)
for reduction_file in reduction_files
)

# Restrict to x-bounds if there are any
if x_bounds is not None:
df_dict[memory_filename] = df_dict[memory_filename][
(df_dict[memory_filename]["Time"] >= x_bounds[0])
& (df_dict[memory_filename]["Time"] <= x_bounds[1])
]

# Given a DataFrame, sum all the columns that list totals for each node
def compute_individual_total(df: pd.DataFrame):
def total_over_nodes(df: pd.DataFrame):
total = None
for column in df.columns:
# "Size (MB)" is for a singleton. "Size on node" is for groups,
Expand All @@ -160,46 +133,78 @@ def compute_individual_total(df: pd.DataFrame):

return total

# Get a list of all components that we have monitored from the first
# reductions file
memory_filenames = check_memory_monitor_dir(reduction_files[0])

# Open every h5file. For each h5file, turn each subfile into a DataFrame.
# Then concat all DataFrames together into one that's indexed by the
# subfile/component name.
totals_df = pd.DataFrame()
for reduction_file in reduction_files:
h5file = h5py.File(reduction_file)
local_totals_df = pd.DataFrame()

for subfile_name in memory_filenames:
df = check_memory_monitor_file(reduction_file, h5file, subfile_name)
if "Time" not in local_totals_df:
local_totals_df["Time"] = df["Time"]
local_totals_df[subfile_name] = total_over_nodes(df)

totals_df = pd.concat([totals_df, local_totals_df])

# Restrict to x-bounds if there are any
for subfile in memory_filenames:
if x_bounds is not None:
totals_df[subfile] = totals_df[subfile][
(totals_df["Time"] >= x_bounds[0])
& (totals_df["Time"] <= x_bounds[1])
]

# Need .dat because all other components have that extension
the_rest_str = "The Rest.dat"

# Given a dictionary of DataFrames, sum all the columns from all the
# DataFrames that list totals for each node to get a grand total memory
# usage. Then return a DataFrame with the time and this new total
def make_totals_df(component_df_dict: dict):
totals_df = None
for _, component_df in component_df_dict.items():
if totals_df is not None:
totals_df["Total"] += compute_individual_total(component_df)
def compute_overall_total_and_the_rest(df: pd.DataFrame):
total = None
for component_name in memory_filenames:
if total is not None:
total += df[component_name]
else:
totals_df = pd.DataFrame()
totals_df["Time"] = component_df["Time"]
totals_df["Total"] = compute_individual_total(component_df)
total = df[component_name].copy()

average_max = totals_df["Total"].mean()
average_max = total.mean()
# Always plot total
components_to_plot = []

# If the max of a component is too small (less than 1% here) just
# group them all together in a category called "The Rest". Otherwise
# keep the original component
for component_name, component_df in component_df_dict.items():
component_total = compute_individual_total(component_df)
the_rest = None
for component_name in memory_filenames:
component_total = df[component_name]
if component_total.max() < 0.01 * average_max:
if the_rest_str in totals_df:
totals_df[the_rest_str] += component_total
if the_rest is not None:
the_rest += component_total
else:
totals_df[the_rest_str] = component_total.copy()
the_rest = component_total.copy()
else:
totals_df[component_name] = component_total
components_to_plot.append(component_name)

if the_rest_str in totals_df:
if the_rest is not None:
components_to_plot.append(the_rest_str)

return totals_df, components_to_plot
return total, the_rest, components_to_plot

# Now make a DataFrame for the total memory across all components
totals_df, components_to_plot = make_totals_df(df_dict)
# Now add the total and the rest to the totals DataFrame
total_series, the_rest_series, components_to_plot = (
compute_overall_total_and_the_rest(totals_df)
)
totals_df["Total"] = total_series
if the_rest_series is not None:
totals_df[the_rest_str] = the_rest_series

# For plotting in MB vs GB
divisor = 1.0 if use_mb else 1000.0
Expand All @@ -211,64 +216,40 @@ def make_totals_df(component_df_dict: dict):
fig = plt.figure()
ax = fig.add_subplot(111)

# If we are coloring components, add this to the legend
if color_components:
ax.plot(
totals_df["Time"],
totals_df["Total"] / divisor,
color="black",
label="Total",
)

# Determine plotting order
maxes = [totals_df[component].max() for component in components_to_plot]
permutation = np.argsort(maxes)[::-1]

# Plot the individual components
for idx in permutation:
component = components_to_plot[idx]
ax.plot(
totals_df["Time"],
totals_df["Total"] / divisor,
color="black",
label="Total",
totals_df[component] / divisor,
linewidth=0.2,
# Remove .dat extension
label=component[:-4],
)
else:
ax.plot(totals_df["Time"], totals_df["Total"] / divisor, color="black")

for component in components_to_plot:
# Plot in color (with a legend)
if color_components:
ax.plot(
totals_df["Time"],
totals_df[component] / divisor,
linewidth=0.2,
# Remove .dat extension
label=component[:-4],
)
else:
ax.plot(
totals_df["Time"],
totals_df[component] / divisor,
color="grey",
linewidth=0.2,
)

gb_or_mb = "MB" if use_mb else "GB"
plt.title(f"Total Memory Usage ({gb_or_mb})")
if x_label is not None:
ax.set_xlabel(x_label)
if color_components:
# Total is always first
maxes = [inf]
for component in components_to_plot:
maxes.append(totals_df[component].max())
# Now that we have all the maxes, we need to sort them and get the
# permutation of the sort for the legend so it's in order of largest at
# the top, smallest at the bottom
maxes_tuple = [(maxes[i], i) for i in range(len(maxes))]
maxes_tuple.sort()
# Reverse because of the order of plotting
maxes_tuple.reverse()
_, permutation = zip(*maxes_tuple)

handles, labels = plt.gca().get_legend_handles_labels()
leg = plt.legend(
[handles[idx] for idx in permutation],
[labels[idx] for idx in permutation],
loc="center left",
bbox_to_anchor=(1, 0.5),
)
# The lines in the legend are a bit small because of the plot linewidth,
# so make the legend lines a bit bigger
for line in leg.get_lines():
line.set_linewidth(1.0)
# The lines in the legend are a bit small because of the plot linewidth,
# so make the legend lines a bit bigger
leg = plt.legend(
loc="center left",
bbox_to_anchor=(1, 0.5),
)
for line in leg.get_lines():
line.set_linewidth(1.0)

if output is not None:
output = output.split(".pdf")[0]
Expand Down
2 changes: 0 additions & 2 deletions tests/Unit/Visualization/Python/Test_PlotMemoryMonitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,6 @@ def test_plot_size(self):
os.path.join(self.work_dir, self.reductions_file_names[1]),
"-o",
os.path.join(self.work_dir, "MultiFile"),
"--use-gb",
"--color-components",
"--x-label",
"Time (M)",
],
Expand Down

0 comments on commit 47372cb

Please sign in to comment.