Skip to content

Commit

Permalink
feat: add benchmark section to report (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
lkstrp authored Sep 10, 2024
1 parent daaacf2 commit 4a3d2dd
Show file tree
Hide file tree
Showing 6 changed files with 410 additions and 41 deletions.
22 changes: 19 additions & 3 deletions action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,10 @@ runs:
--config_file ${{ inputs.snakemake_config }} \
--main_command "${{ inputs.main_command }}" \
--pre_command "${{ inputs.pre_command }}"
shell: bash

- name: Upload artifacts (logs)
if: ${{ inputs.step == 'run-self-hosted-validation' }}
uses: actions/upload-artifact@v4
Expand Down Expand Up @@ -300,7 +300,6 @@ runs:
mkdir -p _validation-images/main
# Copy plots
echo "Plots: ${plots_array[@]}"
for plotpath in "${plots_array[@]}"
do
subpath="${plotpath%/*}"
Expand All @@ -315,6 +314,23 @@ runs:
cp "$HOME/artifacts/results/feature/results/${PREFIX_FEATURE}/${subpath}/${plot}" "_validation-images/feature/${subpath}/" || true # ignore if run failed
done
# Get benchmark plot list (from benchmark script)
read -a plots_array_benchmark <<< "$(python scripts/plot_benchmarks.py)"
mkdir -p _validation-images/benchmarks
# Copy benchmark plots
for plot in "${plots_array_benchmark[@]}"
do
echo "Copying benchmark plot: ${plot}
# Create directories
mkdir -p "_validation-images/benchmarks
cp "${plot}" "_validation-images/benchmarks" || true # ignore if run failed
cp "${plot}" "_validation-images/benchmarks" || true # ignore if run failed
done
# Add plots to repo branch
echo "Adding plots to repo branch"
git add _validation-images
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
numpy
pandas
openpyxl
openpyxl
matplotlib
seaborn
128 changes: 94 additions & 34 deletions scripts/draft_comment.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any

import numpy as np
import pandas as pd
from metrics import min_max_normalized_mae, normalized_root_mean_square_error
from numpy.typing import ArrayLike
from utils import get_env_var


def create_numeric_mask(arr: ArrayLike) -> np.ndarray:
Expand All @@ -35,17 +35,6 @@ def create_numeric_mask(arr: ArrayLike) -> np.ndarray:
return np.vectorize(lambda x: isinstance(x, (int, float)) and np.isfinite(x))(arr)


def get_env_var(var_name: str, default: Any = None) -> Any:
"""Get environment variable or raise an error if not set and no default provided."""
value = os.getenv(var_name, default)
if value == "" and default is None:
msg = f"The environment variable '{var_name}' is not set."
raise OSError(msg)
if str(value).lower() in ["true", "false"]:
return str(value).lower() == "true"
return value


@dataclass
class CommentData:
"""Class to store data for comment generation."""
Expand All @@ -69,6 +58,13 @@ class CommentData:

_sucessfull_run = None

def __init__(self):
"""Initialize comment data class."""
self.plots_base_url = (
f"https://raw.githubusercontent.com/lkstrp/"
f"pypsa-validator/{self.plots_hash}/_validation-images/"
)

def errors(self, branch_type: str) -> list:
"""Return errors for branch type."""
if branch_type not in ["main", "feature"]:
Expand Down Expand Up @@ -115,6 +111,7 @@ def sucessfull_run(self) -> bool:


def get_deviation_df(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
"""Calculate deviation dataframe between two dataframes."""
nrmse_series = df1.apply(
lambda row: normalized_root_mean_square_error(
row.values,
Expand All @@ -138,11 +135,27 @@ def get_deviation_df(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:
return deviation_df


def create_details_block(summary: str, content: str) -> str:
"""Wrap content in a details block (if content is not empty)."""
if content:
return (
f"<details>\n"
f" <summary>{summary}</summary>\n"
f"{content}"
f"</details>\n"
f"\n"
f"\n"
)
else:
return ""


class RunSuccessfull(CommentData):
"""Class to generate successfull run component."""

def __init__(self):
"""Initialize class."""
"""Initialize successfull run component."""
super().__init__()
self.dir_main = [
file
for file in (self.dir_artifacts / "results/main/results").iterdir()
Expand Down Expand Up @@ -173,8 +186,6 @@ def __init__(self):

self._variables_deviation_df = None

self.plots_base_url = f"https://raw.githubusercontent.com/lkstrp/pypsa-validator/{self.plots_hash}/_validation-images/"

# Status strings for file comparison table
STATUS_FILE_MISSING = " :warning: Missing"
STATUS_EQUAL = ":white_check_mark: Equal"
Expand All @@ -191,6 +202,7 @@ def __init__(self):

@property
def variables_deviation_df(self):
"""Get the deviation dataframe for variables."""
if self._variables_deviation_df is not None:
return self._variables_deviation_df
vars1 = pd.read_excel(self.dir_main / self.VARIABLES_FILE)
Expand All @@ -216,6 +228,7 @@ def variables_deviation_df(self):

@property
def variables_plot_strings(self):
"""Return list of variable plot strings."""
plots = (
self.variables_deviation_df.index.to_series()
.apply(lambda x: re.sub(r"[ |/]", "-", x))
Expand All @@ -226,6 +239,7 @@ def variables_plot_strings(self):

@property
def variables_comparison(self) -> str:
"""Return variables comparison table."""
if (
not (self.dir_main / self.VARIABLES_FILE).exists()
or not (self.dir_feature / self.VARIABLES_FILE).exists()
Expand All @@ -247,6 +261,7 @@ def variables_comparison(self) -> str:

@property
def changed_variables_plots(self) -> str:
"""Return plots for variables that have changed significantly."""
if (
not (self.dir_main / self.VARIABLES_FILE).exists()
or not (self.dir_feature / self.VARIABLES_FILE).exists()
Expand Down Expand Up @@ -279,8 +294,8 @@ def plots_table(self) -> str:
url_b = self.plots_base_url + "feature/" + plot
rows.append(
[
f'<img src="{url_a}" alt="Image not found in results">',
f'<img src="{url_b}" alt="Image not found in results">',
f'<img src="{url_a}" alt="Image not available">',
f'<img src="{url_b}" alt="Image not available">',
]
)

Expand Down Expand Up @@ -460,20 +475,6 @@ def files_table(self) -> str:
@property
def body(self) -> str:
"""Body text for successfull run."""

def create_details_block(summary: str, content: str) -> str:
if content:
return (
f"<details>\n"
f" <summary>{summary}</summary>\n"
f"{content}"
f"</details>\n"
f"\n"
f"\n"
)
else:
return ""

if self.variables_comparison and self.changed_variables_plots:
if self.variables_deviation_df.empty:
variables_txt = (
Expand All @@ -489,7 +490,8 @@ def create_details_block(summary: str, content: str) -> str:
)
elif self.variables_comparison or self.changed_variables_plots:
raise ValueError(
"Both variables_comparison and changed_variables_plots must be set or unset."
"Both variables_comparison and changed_variables_plots must be set or "
"unset."
)
else:
variables_txt = ""
Expand All @@ -508,6 +510,10 @@ def __call__(self) -> str:
class RunFailed(CommentData):
"""Class to generate failed run component."""

def __init__(self):
"""Initialize failed run component."""
super().__init__()

def body(self) -> str:
"""Body text for failed run."""
main_errors = self.errors("main")
Expand Down Expand Up @@ -539,9 +545,45 @@ def __call__(self) -> str:
return self.body()


class ModelMetrics(CommentData):
"""Class to generate model metrics component."""

def __init__(self):
"""Initialize model metrics component."""
super().__init__()

@property
def benchmark_plots(self) -> str:
"""Benchmark plots."""
"execution_time.png", "memory_peak.png", "memory_scatter.png"
return (
f'<img src="{self.plots_base_url}benchmarks/execution_time.png" '
'alt="Image not available">\n'
f'<img src="{self.plots_base_url}benchmarks/memory_peak.png" '
'alt="Image not available">\n'
f'<img src="{self.plots_base_url}benchmarks/memory_scatter.png" '
'alt="Image not available">\n'
)

def body(self) -> str:
"""Body text for Model Metrics."""
return (
f"**Model Metrics**\n"
f"{create_details_block('Benchmarks', self.benchmark_plots)}\n"
)

def __call__(self) -> str:
"""Return text for model metrics component."""
return self.body()


class Comment(CommentData):
"""Class to generate pypsa validator comment for GitHub PRs."""

def __init__(self) -> None:
"""Initialize comment class. It will put all text components together."""
super().__init__()

@property
def header(self) -> str:
"""
Expand Down Expand Up @@ -603,7 +645,15 @@ def subtext(self) -> str:
f"Last updated on `{time}`."
)

def needed_plots(self):
def dynamic_plots(self) -> str:
"""
Return a list of dynamic results plots needed for the comment.
Returns
-------
str: Space separated list of dynamic plots.
"""
if self.sucessfull_run:
body_sucessfull = RunSuccessfull()
plots_string = " ".join(body_sucessfull.variables_plot_strings)
Expand All @@ -613,13 +663,15 @@ def needed_plots(self):

def __repr__(self) -> str:
"""Return full formatted comment."""
body_benchmarks = ModelMetrics()
if self.sucessfull_run:
body_sucessfull = RunSuccessfull()

return (
f"{self.header}"
f"{self.config_diff if self.git_diff_config else ''}"
f"{body_sucessfull()}"
f"{body_benchmarks()}"
f"{self.subtext}"
)

Expand All @@ -630,11 +682,19 @@ def __repr__(self) -> str:
f"{self.header}"
f"{body_failed()}"
f"{self.config_diff if self.git_diff_config else ''}"
f"{body_benchmarks()}"
f"{self.subtext}"
)


def main():
"""
Run draft comment script.
Command line interface for the draft comment script. Use no arguments to print the
comment, or use the "plots" argument to print the dynamic plots which will be needed
for the comment.
"""
parser = argparse.ArgumentParser(description="Process some comments.")
parser.add_argument(
"command", nargs="?", default="", help='Command to run, e.g., "plots".'
Expand All @@ -644,7 +704,7 @@ def main():
comment = Comment()

if args.command == "plots":
print(comment.needed_plots())
print(comment.dynamic_plots())

else:
print(comment) # noqa T201
Expand Down
10 changes: 7 additions & 3 deletions scripts/metrics.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""
Helper module for calculating evaluation metrics.
"""
"""Helper module for calculating evaluation metrics."""

import numpy as np
from numpy.typing import ArrayLike
Expand Down Expand Up @@ -64,6 +62,10 @@ def mean_absolute_percentage_error(
Predicted values
epsilon : float, optional (default=1e-9)
Small value to avoid division by zero
aggregate : bool, optional (default=True)
If True, return the mean MAPE. Otherwise, return an array of individual MAPEs
ignore_inf : bool, optional (default=True)
If True, ignore infinite values in the calculation
Returns
-------
Expand Down Expand Up @@ -111,6 +113,8 @@ def normalized_root_mean_square_error(
If True, ignore infinite values in the calculation
normalization : str, optional (default='min-max')
Method of normalization. Options: 'mean', 'range', 'iqr', 'min-max'
fill_na : float, optional (default=0)
Value to replace NaN values
epsilon : float, optional (default=1e-9)
Small value to add to normalization factor to avoid division by zero
Expand Down
Loading

0 comments on commit 4a3d2dd

Please sign in to comment.