fixup 2

sxs-collaboration · Mar 5, 2024 · 47372cb · 47372cb
1 parent 6fd0ea5
commit 47372cb
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 104 deletions.
diff --git a/src/Visualization/Python/PlotMemoryMonitors.py b/src/Visualization/Python/PlotMemoryMonitors.py
@@ -10,6 +10,7 @@
 import click
 import h5py
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
 
 from spectre.Visualization.ReadH5 import available_subfiles, to_dataframe
@@ -36,16 +37,9 @@
 # Plotting options
 @click.option(
     "--use-mb/--use-gb",
-    default=True,
-    show_default=True,
-    help="Plot the y-axis in Megabytes or Gigabytes",
-)
-@click.option(
-    "--color-components",
-    is_flag=True,
     default=False,
     show_default=True,
-    help="Plot individual parallel components in color instead of grey",
+    help="Plot the y-axis in Megabytes or Gigabytes",
 )
 @click.option(
     "--x-label",
@@ -74,7 +68,6 @@ def plot_memory_monitors_command(
     reduction_files: Sequence[str],
     output: Optional[str],
     use_mb: bool,
-    color_components: Optional[bool],
     x_label: Optional[str],
     x_bounds: Optional[Sequence[float]],
     stylesheet,
@@ -107,15 +100,11 @@ def check_memory_monitor_dir(h5_filename: str):
 
         return list(memory_monitor_dir.keys())
 
-    # Get a list of all components that we have monitored from the first
-    # reductions file
-    memory_filenames = check_memory_monitor_dir(reduction_files[0])
-    df_dict = {}
-
     # Given an h5 file and a subfile, make sure that the subfile exists inside
     # the h5 file. Then return the subfile as a DataFrame
-    def check_memory_monitor_file(h5_filename: str, subfile_name: str):
-        h5file = h5py.File(h5_filename, "r")
+    def check_memory_monitor_file(
+        h5_filename: str, h5file: h5py.File, subfile_name: str
+    ):
         subfile_path = f"/MemoryMonitors/{subfile_name}"
         subfile = h5file.get(subfile_path)
         if subfile_path is None:
@@ -127,24 +116,8 @@ def check_memory_monitor_file(h5_filename: str, subfile_name: str):
 
         return to_dataframe(subfile)
 
-    # First just concat all individual memory monitors. This will also check
-    # that all components from the first reductions file are also in all
-    # subsequent reductions files
-    for memory_filename in memory_filenames:
-        df_dict[memory_filename] = pd.concat(
-            check_memory_monitor_file(reduction_file, memory_filename)
-            for reduction_file in reduction_files
-        )
-
-        # Restrict to x-bounds if there are any
-        if x_bounds is not None:
-            df_dict[memory_filename] = df_dict[memory_filename][
-                (df_dict[memory_filename]["Time"] >= x_bounds[0])
-                & (df_dict[memory_filename]["Time"] <= x_bounds[1])
-            ]
-
     # Given a DataFrame, sum all the columns that list totals for each node
-    def compute_individual_total(df: pd.DataFrame):
+    def total_over_nodes(df: pd.DataFrame):
         total = None
         for column in df.columns:
             # "Size (MB)" is for a singleton. "Size on node" is for groups,
@@ -160,46 +133,78 @@ def compute_individual_total(df: pd.DataFrame):
 
         return total
 
+    # Get a list of all components that we have monitored from the first
+    # reductions file
+    memory_filenames = check_memory_monitor_dir(reduction_files[0])
+
+    # Open every h5file. For each h5file, turn each subfile into a DataFrame.
+    # Then concat all DataFrames together into one that's indexed by the
+    # subfile/component name.
+    totals_df = pd.DataFrame()
+    for reduction_file in reduction_files:
+        h5file = h5py.File(reduction_file)
+        local_totals_df = pd.DataFrame()
+
+        for subfile_name in memory_filenames:
+            df = check_memory_monitor_file(reduction_file, h5file, subfile_name)
+            if "Time" not in local_totals_df:
+                local_totals_df["Time"] = df["Time"]
+            local_totals_df[subfile_name] = total_over_nodes(df)
+
+        totals_df = pd.concat([totals_df, local_totals_df])
+
+    # Restrict to x-bounds if there are any
+    for subfile in memory_filenames:
+        if x_bounds is not None:
+            totals_df[subfile] = totals_df[subfile][
+                (totals_df["Time"] >= x_bounds[0])
+                & (totals_df["Time"] <= x_bounds[1])
+            ]
+
     # Need .dat because all other components have that extension
     the_rest_str = "The Rest.dat"
 
     # Given a dictionary of DataFrames, sum all the columns from all the
     # DataFrames that list totals for each node to get a grand total memory
     # usage. Then return a DataFrame with the time and this new total
-    def make_totals_df(component_df_dict: dict):
-        totals_df = None
-        for _, component_df in component_df_dict.items():
-            if totals_df is not None:
-                totals_df["Total"] += compute_individual_total(component_df)
+    def compute_overall_total_and_the_rest(df: pd.DataFrame):
+        total = None
+        for component_name in memory_filenames:
+            if total is not None:
+                total += df[component_name]
             else:
-                totals_df = pd.DataFrame()
-                totals_df["Time"] = component_df["Time"]
-                totals_df["Total"] = compute_individual_total(component_df)
+                total = df[component_name].copy()
 
-        average_max = totals_df["Total"].mean()
+        average_max = total.mean()
+        # Always plot total
         components_to_plot = []
 
         # If the max of a component is too small (less than 1% here) just
         # group them all together in a category called "The Rest". Otherwise
         # keep the original component
-        for component_name, component_df in component_df_dict.items():
-            component_total = compute_individual_total(component_df)
+        the_rest = None
+        for component_name in memory_filenames:
+            component_total = df[component_name]
             if component_total.max() < 0.01 * average_max:
-                if the_rest_str in totals_df:
-                    totals_df[the_rest_str] += component_total
+                if the_rest is not None:
+                    the_rest += component_total
                 else:
-                    totals_df[the_rest_str] = component_total.copy()
+                    the_rest = component_total.copy()
             else:
-                totals_df[component_name] = component_total
                 components_to_plot.append(component_name)
 
-        if the_rest_str in totals_df:
+        if the_rest is not None:
             components_to_plot.append(the_rest_str)
 
-        return totals_df, components_to_plot
+        return total, the_rest, components_to_plot
 
-    # Now make a DataFrame for the total memory across all components
-    totals_df, components_to_plot = make_totals_df(df_dict)
+    # Now add the total and the rest to the totals DataFrame
+    total_series, the_rest_series, components_to_plot = (
+        compute_overall_total_and_the_rest(totals_df)
+    )
+    totals_df["Total"] = total_series
+    if the_rest_series is not None:
+        totals_df[the_rest_str] = the_rest_series
 
     # For plotting in MB vs GB
     divisor = 1.0 if use_mb else 1000.0
@@ -211,64 +216,40 @@ def make_totals_df(component_df_dict: dict):
     fig = plt.figure()
     ax = fig.add_subplot(111)
 
-    # If we are coloring components, add this to the legend
-    if color_components:
+    ax.plot(
+        totals_df["Time"],
+        totals_df["Total"] / divisor,
+        color="black",
+        label="Total",
+    )
+
+    # Determine plotting order
+    maxes = [totals_df[component].max() for component in components_to_plot]
+    permutation = np.argsort(maxes)[::-1]
+
+    # Plot the individual components
+    for idx in permutation:
+        component = components_to_plot[idx]
         ax.plot(
             totals_df["Time"],
-            totals_df["Total"] / divisor,
-            color="black",
-            label="Total",
+            totals_df[component] / divisor,
+            linewidth=0.2,
+            # Remove .dat extension
+            label=component[:-4],
         )
-    else:
-        ax.plot(totals_df["Time"], totals_df["Total"] / divisor, color="black")
-
-    for component in components_to_plot:
-        # Plot in color (with a legend)
-        if color_components:
-            ax.plot(
-                totals_df["Time"],
-                totals_df[component] / divisor,
-                linewidth=0.2,
-                # Remove .dat extension
-                label=component[:-4],
-            )
-        else:
-            ax.plot(
-                totals_df["Time"],
-                totals_df[component] / divisor,
-                color="grey",
-                linewidth=0.2,
-            )
 
     gb_or_mb = "MB" if use_mb else "GB"
     plt.title(f"Total Memory Usage ({gb_or_mb})")
     if x_label is not None:
         ax.set_xlabel(x_label)
-    if color_components:
-        # Total is always first
-        maxes = [inf]
-        for component in components_to_plot:
-            maxes.append(totals_df[component].max())
-        # Now that we have all the maxes, we need to sort them and get the
-        # permutation of the sort for the legend so it's in order of largest at
-        # the top, smallest at the bottom
-        maxes_tuple = [(maxes[i], i) for i in range(len(maxes))]
-        maxes_tuple.sort()
-        # Reverse because of the order of plotting
-        maxes_tuple.reverse()
-        _, permutation = zip(*maxes_tuple)
-
-        handles, labels = plt.gca().get_legend_handles_labels()
-        leg = plt.legend(
-            [handles[idx] for idx in permutation],
-            [labels[idx] for idx in permutation],
-            loc="center left",
-            bbox_to_anchor=(1, 0.5),
-        )
-        # The lines in the legend are a bit small because of the plot linewidth,
-        # so make the legend lines a bit bigger
-        for line in leg.get_lines():
-            line.set_linewidth(1.0)
+    # The lines in the legend are a bit small because of the plot linewidth,
+    # so make the legend lines a bit bigger
+    leg = plt.legend(
+        loc="center left",
+        bbox_to_anchor=(1, 0.5),
+    )
+    for line in leg.get_lines():
+        line.set_linewidth(1.0)
 
     if output is not None:
         output = output.split(".pdf")[0]

diff --git a/tests/Unit/Visualization/Python/Test_PlotMemoryMonitors.py b/tests/Unit/Visualization/Python/Test_PlotMemoryMonitors.py
@@ -104,8 +104,6 @@ def test_plot_size(self):
                 os.path.join(self.work_dir, self.reductions_file_names[1]),
                 "-o",
                 os.path.join(self.work_dir, "MultiFile"),
-                "--use-gb",
-                "--color-components",
                 "--x-label",
                 "Time (M)",
             ],