diff --git a/example_solutions/deep_reinforcement_learning.ipynb b/example_solutions/deep_reinforcement_learning.ipynb
deleted file mode 100644
index 912e994..0000000
--- a/example_solutions/deep_reinforcement_learning.ipynb
+++ /dev/null
@@ -1,279 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "import gymnasium\n",
-    "import os\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from matplotlib import pyplot as plt\n",
-    "\n",
-    "from stable_baselines3 import PPO, SAC\n",
-    "from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n",
-    "from stable_baselines3.common.monitor import Monitor\n",
-    "\n",
-    "from building_energy_storage_simulation import BuildingSimulation, Environment\n",
-    "\n",
-    "from observation_wrapper import ObservationWrapper\n",
-    "from helper import read_data, TEST_INDEX_START, TEST_INDEX_END, BATTERY_CAPACITY, BATTERY_POWER"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Applying Reiforcement Learning Using Stable Baselines 3\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "NUM_FORECAST_STEPS = 8\n",
-    "RESULT_PATH = 'rl_example/'\n",
-    "\n",
-    "os.makedirs(RESULT_PATH, exist_ok=True)\n",
-    "\n",
-    "load, price, generation = read_data()\n",
-    "load_train = load[:TEST_INDEX_START]\n",
-    "price_train = price[:TEST_INDEX_START]\n",
-    "generation_train = generation[:TEST_INDEX_START]\n",
-    "\n",
-    "# Create Training Environment\n",
-    "sim = BuildingSimulation(electricity_load_profile=load_train,\n",
-    "                         solar_generation_profile=generation_train,\n",
-    "                         electricity_price=price_train,\n",
-    "                         max_battery_charge_per_timestep=BATTERY_POWER,\n",
-    "                         battery_capacity=BATTERY_CAPACITY)\n",
-    "\n",
-    "env = Environment(sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=len(load_train)-NUM_FORECAST_STEPS)\n",
-    "# ObservationWrapper combines forecast of load and generation to one residual load forecast\n",
-    "env = ObservationWrapper(env, NUM_FORECAST_STEPS)\n",
-    "initial_obs, info = env.reset()\n",
-    "print(initial_obs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Wrap with Monitor() so a log of the training is saved \n",
-    "env = Monitor(env, filename=RESULT_PATH)\n",
-    "# Warp with DummyVecEnc() so the observations and reward can be normalized using VecNormalize()\n",
-    "env = DummyVecEnv([lambda: env])\n",
-    "env = VecNormalize(env, norm_obs=True, norm_reward=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Train :-)\n",
-    "model = SAC(\"MlpPolicy\", env, verbose=1, gamma=0.95)\n",
-    "model.learn(total_timesteps=200000)\n",
-    "# Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())\n",
-    "model.save(RESULT_PATH + 'model')\n",
-    "env.save(RESULT_PATH + 'env.pkl')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "env.save(RESULT_PATH + 'env.pkl')"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Evaluation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "# Plot the training process\n",
-    "training_log = pd.read_csv(RESULT_PATH + 'monitor.csv', skiprows=1)\n",
-    "training_log['r'].plot()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "load, price, generation = read_data()\n",
-    "load_eval = load[TEST_INDEX_START:]\n",
-    "price_eval = price[TEST_INDEX_START:]\n",
-    "generation_eval = generation[TEST_INDEX_START:]\n",
-    "\n",
-    "num_eval_timesteps = TEST_INDEX_END - TEST_INDEX_START\n",
-    "\n",
-    "eval_sim = BuildingSimulation(electricity_load_profile=load_eval,\n",
-    "                              solar_generation_profile=generation_eval,\n",
-    "                              electricity_price=price_eval,\n",
-    "                              max_battery_charge_per_timestep=BATTERY_POWER, \n",
-    "                              battery_capacity=BATTERY_CAPACITY)\n",
-    "\n",
-    "eval_env = Environment(eval_sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=num_eval_timesteps)\n",
-    "eval_env = ObservationWrapper(eval_env, NUM_FORECAST_STEPS)\n",
-    "eval_env = DummyVecEnv([lambda: eval_env])\n",
-    "# It is important to load the environmental statistics here as we use a rolling mean calculation !\n",
-    "eval_env = VecNormalize.load(RESULT_PATH + 'env.pkl', eval_env)     "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "eval_env.training = False\n",
-    "\n",
-    "actions, observations, electricity_consumption, price, rewards = ([], [], [], [], [])\n",
-    "done = False\n",
-    "obs = eval_env.reset()\n",
-    "while not done:\n",
-    "        action = model.predict(obs, deterministic=True)\n",
-    "        obs, r, done, info = eval_env.step([action[0][0]])\n",
-    "\n",
-    "        actions.append(action[0][0][0])\n",
-    "        original_reward = eval_env.get_original_reward()[0]\n",
-    "        original_obs = eval_env.get_original_obs()[0]\n",
-    "        observations.append(original_obs)\n",
-    "        electricity_consumption.append(info[0]['electricity_consumption'])\n",
-    "        price.append(info[0]['electricity_price'])\n",
-    "        rewards.append(r)\n",
-    "        \n",
-    "trajectory = pd.DataFrame({\n",
-    "        'action': actions,\n",
-    "        'observations': observations,\n",
-    "        'electricity_consumption': electricity_consumption,\n",
-    "        'electricity_price': price,\n",
-    "        'reward': rewards\n",
-    "    })        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "plot_data = trajectory[200:500]\n",
-    "observation_df = plot_data['observations'].apply(pd.Series)\n",
-    "augmented_load = observation_df[1] + plot_data['action'] * BATTERY_POWER\n",
-    "plt.rcParams[\"figure.figsize\"] = (16,10)\n",
-    "\n",
-    "fig1 = plt.figure()\n",
-    "ax = plt.subplot()\n",
-    "ax.plot(observation_df[1], label='Residual Load')\n",
-    "ax.plot(augmented_load, label='Augmented Load')\n",
-    "ax.plot(plot_data['electricity_price'], '--', label='Price')\n",
-    "ax.plot(plot_data['action']*50, label='Battery Power')\n",
-    "plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)')\n",
-    "plt.xlabel('Time Step')\n",
-    "ax.legend()\n",
-    "ax.grid()\n",
-    "plt.show()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Compare to Baseline"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "eval_env.training = False\n",
-    "\n",
-    "cost = []\n",
-    "done = False\n",
-    "obs = eval_env.reset()\n",
-    "while not done:\n",
-    "        action = model.predict(obs, deterministic=True)\n",
-    "        obs, r, done, info = eval_env.step([action[0][0]])\n",
-    "        cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n",
-    "\n",
-    "cost = sum(cost)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "eval_env.training = False\n",
-    "\n",
-    "baseline_cost = []\n",
-    "done = False\n",
-    "obs = eval_env.reset()\n",
-    "while not done:\n",
-    "        # Always taking noop as action. This is the electricity demand if there would be no battery\n",
-    "        action = [0]\n",
-    "        obs, r, done, info = eval_env.step(action)\n",
-    "        baseline_cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n",
-    "\n",
-    "baseline_cost = sum(baseline_cost)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# how much energy did we save by utilizing the battery?\n",
-    "1 - (cost / baseline_cost)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/example_solutions/deep_reinforcement_learning/evaluate.py b/example_solutions/deep_reinforcement_learning/evaluate.py
new file mode 100644
index 0000000..696fb74
--- /dev/null
+++ b/example_solutions/deep_reinforcement_learning/evaluate.py
@@ -0,0 +1,87 @@
+# Plot the training process
+import pandas as pd
+from stable_baselines3 import SAC
+from stable_baselines3.common.results_plotter import plot_results
+from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
+
+from building_energy_storage_simulation import BuildingSimulation, Environment
+from example_solutions.deep_reinforcement_learning.train import RESULT_PATH, NUM_FORECAST_STEPS
+from example_solutions.helper import read_data, TEST_INDEX_START, TEST_INDEX_END, BATTERY_POWER, BATTERY_CAPACITY, \
+    plot_control_trajectory
+from example_solutions.observation_wrapper import ObservationWrapper
+
+
+def evaluate(env, agent=None):
+    # Do the evaluation
+    actions, observations, electricity_consumption, price, rewards = ([], [], [], [], [])
+    done = False
+    obs = env.reset()
+    while not done:
+        if agent is None:
+            action = [[0]]
+        else:
+            action = [agent.predict(obs, deterministic=True)[0][0]]
+
+        obs, r, done, info = env.step([action[0][0]])
+
+        actions.append(action[0][0])
+        original_obs = env.get_original_obs()[0]
+        observations.append(original_obs)
+        electricity_consumption.append(info[0]['electricity_consumption'])
+        price.append(info[0]['electricity_price'])
+        rewards.append(r)
+
+    return pd.DataFrame({
+        'action': actions,
+        'observations': observations,
+        'electricity_consumption': electricity_consumption,
+        'electricity_price': price,
+        'reward': rewards
+    })
+
+
+if __name__ == "__main__":
+    # Plot evolution of reward during training
+    try:
+        plot_results(RESULT_PATH, x_axis='timesteps', task_name='title', num_timesteps=None)
+    except:
+        print('Training Reward Plot could not be created')
+
+    load, price, generation = read_data()
+    load_eval = load[TEST_INDEX_START:]
+    price_eval = price[TEST_INDEX_START:]
+    generation_eval = generation[TEST_INDEX_START:]
+
+    num_eval_timesteps = TEST_INDEX_END - TEST_INDEX_START
+
+    eval_sim = BuildingSimulation(electricity_load_profile=load_eval,
+                                  solar_generation_profile=generation_eval,
+                                  electricity_price=price_eval,
+                                  max_battery_charge_per_timestep=BATTERY_POWER,
+                                  battery_capacity=BATTERY_CAPACITY)
+
+    eval_env = Environment(eval_sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=num_eval_timesteps)
+    eval_env = ObservationWrapper(eval_env, NUM_FORECAST_STEPS)
+    eval_env = DummyVecEnv([lambda: eval_env])
+    # It is important to load the environmental statistics here as we use a rolling mean calculation !
+    eval_env = VecNormalize.load(RESULT_PATH + 'env.pkl', eval_env)
+    eval_env.training = False
+
+    model = SAC.load(RESULT_PATH + 'model')
+
+    trajectory = evaluate(eval_env, model)
+    baseline_trajectory = evaluate(eval_env, None)
+
+    cost = sum(trajectory['electricity_price'] * trajectory['electricity_consumption'])
+    baseline_cost = sum(baseline_trajectory['electricity_price'] * baseline_trajectory['electricity_consumption'])
+
+    print('baseline cost: ' + str(baseline_cost))
+    print('cost: ' + str(cost))
+    print('savings in %: ' + str(1 - cost / baseline_cost))
+
+    observation_df = trajectory['observations'].apply(pd.Series)
+    augmented_load = observation_df[1] + trajectory['action'] * BATTERY_POWER
+    plot_control_trajectory(residual_load=observation_df[1],
+                            augmented_load=augmented_load,
+                            price=trajectory['electricity_price'],
+                            battery_power=trajectory['action'] * BATTERY_POWER)
diff --git a/example_solutions/deep_reinforcement_learning/train.py b/example_solutions/deep_reinforcement_learning/train.py
new file mode 100644
index 0000000..6591cd7
--- /dev/null
+++ b/example_solutions/deep_reinforcement_learning/train.py
@@ -0,0 +1,46 @@
+import os
+
+from stable_baselines3 import SAC
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
+
+from building_energy_storage_simulation import BuildingSimulation, Environment
+from example_solutions.helper import read_data, TEST_INDEX_START, BATTERY_CAPACITY, BATTERY_POWER
+from example_solutions.observation_wrapper import ObservationWrapper
+
+NUM_FORECAST_STEPS = 8
+RESULT_PATH = 'rl_example/'
+
+if __name__ == "__main__":
+    os.makedirs(RESULT_PATH, exist_ok=True)
+
+    load, price, generation = read_data()
+    load_train = load[:TEST_INDEX_START]
+    price_train = price[:TEST_INDEX_START]
+    generation_train = generation[:TEST_INDEX_START]
+
+    # Create Training Environment
+    sim = BuildingSimulation(electricity_load_profile=load_train,
+                             solar_generation_profile=generation_train,
+                             electricity_price=price_train,
+                             max_battery_charge_per_timestep=BATTERY_POWER,
+                             battery_capacity=BATTERY_CAPACITY)
+
+    env = Environment(sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=len(load_train) - NUM_FORECAST_STEPS)
+    # ObservationWrapper combines forecast of load and generation to one residual load forecast
+    env = ObservationWrapper(env, NUM_FORECAST_STEPS)
+    initial_obs, info = env.reset()
+    print(initial_obs)
+
+    # Wrap with Monitor() so a log of the training is saved
+    env = Monitor(env, filename=RESULT_PATH)
+    # Warp with DummyVecEnc() so the observations and reward can be normalized using VecNormalize()
+    env = DummyVecEnv([lambda: env])
+    env = VecNormalize(env, norm_obs=True, norm_reward=True)
+
+    # Train :-)
+    model = SAC("MlpPolicy", env, verbose=1, gamma=0.95)
+    model.learn(total_timesteps=200_000)
+    # Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())
+    model.save(RESULT_PATH + 'model')
+    env.save(RESULT_PATH + 'env.pkl')
\ No newline at end of file
diff --git a/example_solutions/helper.py b/example_solutions/helper.py
index 173dde3..45929b6 100644
--- a/example_solutions/helper.py
+++ b/example_solutions/helper.py
@@ -1,7 +1,11 @@
+from pathlib import Path
+
 import pandas as pd
 import numpy as np
 from typing import Tuple
 
+from matplotlib import pyplot as plt
+
 # Start and end Index of data used for testing
 TEST_INDEX_START = 4380
 TEST_INDEX_END = 8500
@@ -11,12 +15,26 @@
 
 
 def read_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
-    load = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_load_profile.csv')[
+    base_path = Path(__file__).parent
+    folder_path = (base_path / "../building_energy_storage_simulation/data/preprocessed/").resolve()
+
+    load = pd.read_csv(folder_path / 'electricity_load_profile.csv')[
         'Load [kWh]']
-    price = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_price_profile.csv')[
+    price = pd.read_csv(folder_path / 'electricity_price_profile.csv')[
         'Day Ahead Auction']
-    generation = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/solar_generation_profile.csv')[
+    generation = pd.read_csv(folder_path / 'solar_generation_profile.csv')[
         'Generation [kWh]']
     return np.array(load), np.array(price), np.array(generation)
 
 
+def plot_control_trajectory(residual_load, augmented_load, price, battery_power) -> None:
+    ax = plt.subplot()
+    ax.plot(residual_load, label='Residual Load')
+    ax.plot(augmented_load, label='Augmented Load')
+    ax.plot(price, '--', label='Price')
+    ax.plot(battery_power, label='Battery Power')
+    plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)')
+    plt.xlabel('Time Step')
+    ax.legend()
+    ax.grid()
+    plt.show()
diff --git a/example_solutions/model_predictive_control.py b/example_solutions/model_predictive_control.py
index 91b4f5f..6408fc0 100644
--- a/example_solutions/model_predictive_control.py
+++ b/example_solutions/model_predictive_control.py
@@ -1,10 +1,9 @@
 import pyomo.environ as pyo
 import numpy as np
-import matplotlib.pyplot as plt
 
 from building_energy_storage_simulation import BuildingSimulation, Environment
 from optimal_control_problem import build_optimization_problem
-from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_POWER, BATTERY_CAPACITY
+from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_POWER, BATTERY_CAPACITY, plot_control_trajectory
 
 FORECAST_LENGTH = 24
 
@@ -69,16 +68,8 @@ def normalize_to_minus_one_to_one(x, min_value, max_value):
 print('cost: ' + str(cost))
 print('savings in %: ' + str(1 - cost/baseline_cost))
 
-time = range(len(actions))
-
-fig1 = plt.figure()
-ax = plt.subplot()
-ax.plot(residual_loads, label='Residual Load')
-ax.plot(residual_loads + actions, label='Augmented Load')
-ax.plot(actions, label='Battery Power Applied')
-ax.plot(prices, '--', label='Price')
-plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)')
-plt.xlabel('Time Step')
-ax.legend()
-ax.grid()
-plt.show()
+plot_control_trajectory(residual_load=residual_loads,
+                        augmented_load=residual_loads + actions,
+                        price=prices,
+                        battery_power=actions)
+
diff --git a/example_solutions/optimal_control_problem.py b/example_solutions/optimal_control_problem.py
index e4be513..cdcb243 100644
--- a/example_solutions/optimal_control_problem.py
+++ b/example_solutions/optimal_control_problem.py
@@ -1,8 +1,7 @@
 import pyomo.environ as pyo
 import numpy as np
-import matplotlib.pyplot as plt
 
-from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_CAPACITY, BATTERY_POWER
+from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_CAPACITY, BATTERY_POWER, plot_control_trajectory
 
 
 def build_optimization_problem(residual_fixed_load, price, soc, battery_power, battery_capacity, delta_time_hours=1):
@@ -67,14 +66,8 @@ def soc_constraint_rule(m, i):
     print('cost: ' + str(cost))
     print('savings in %: ' + str(1 - cost/baseline_cost))
 
-    fig1 = plt.figure()
-    ax = plt.subplot()
-    ax.plot([(residual_fixed_load_eval[i]) for i in time], label='Residual Load')
-    ax.plot(augmented_load, label='Augmented Load')
-    ax.plot(price_eval, '--', label='Price')
-    ax.plot([(pyo.value(m.power[i])) for i in time], label='Battery Power')
-    plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)')
-    plt.xlabel('Time Step')
-    ax.legend()
-    ax.grid()
-    plt.show()
+    plot_control_trajectory(residual_load=[(residual_fixed_load_eval[i]) for i in time],
+                            augmented_load=augmented_load,
+                            price=price_eval,
+                            battery_power=[(pyo.value(m.power[i])) for i in time]
+                            )