diff --git a/example_solutions/deep_reinforcement_learning.ipynb b/example_solutions/deep_reinforcement_learning.ipynb deleted file mode 100644 index 912e994..0000000 --- a/example_solutions/deep_reinforcement_learning.ipynb +++ /dev/null @@ -1,279 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import gymnasium\n", - "import os\n", - "import pandas as pd\n", - "import numpy as np\n", - "from matplotlib import pyplot as plt\n", - "\n", - "from stable_baselines3 import PPO, SAC\n", - "from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize\n", - "from stable_baselines3.common.monitor import Monitor\n", - "\n", - "from building_energy_storage_simulation import BuildingSimulation, Environment\n", - "\n", - "from observation_wrapper import ObservationWrapper\n", - "from helper import read_data, TEST_INDEX_START, TEST_INDEX_END, BATTERY_CAPACITY, BATTERY_POWER" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Applying Reiforcement Learning Using Stable Baselines 3\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "NUM_FORECAST_STEPS = 8\n", - "RESULT_PATH = 'rl_example/'\n", - "\n", - "os.makedirs(RESULT_PATH, exist_ok=True)\n", - "\n", - "load, price, generation = read_data()\n", - "load_train = load[:TEST_INDEX_START]\n", - "price_train = price[:TEST_INDEX_START]\n", - "generation_train = generation[:TEST_INDEX_START]\n", - "\n", - "# Create Training Environment\n", - "sim = BuildingSimulation(electricity_load_profile=load_train,\n", - " solar_generation_profile=generation_train,\n", - " electricity_price=price_train,\n", - " max_battery_charge_per_timestep=BATTERY_POWER,\n", - " battery_capacity=BATTERY_CAPACITY)\n", - "\n", - "env = Environment(sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=len(load_train)-NUM_FORECAST_STEPS)\n", - "# ObservationWrapper combines forecast of load and generation to one residual load forecast\n", - "env = ObservationWrapper(env, NUM_FORECAST_STEPS)\n", - "initial_obs, info = env.reset()\n", - "print(initial_obs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Wrap with Monitor() so a log of the training is saved \n", - "env = Monitor(env, filename=RESULT_PATH)\n", - "# Warp with DummyVecEnc() so the observations and reward can be normalized using VecNormalize()\n", - "env = DummyVecEnv([lambda: env])\n", - "env = VecNormalize(env, norm_obs=True, norm_reward=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Train :-)\n", - "model = SAC(\"MlpPolicy\", env, verbose=1, gamma=0.95)\n", - "model.learn(total_timesteps=200000)\n", - "# Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize())\n", - "model.save(RESULT_PATH + 'model')\n", - "env.save(RESULT_PATH + 'env.pkl')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "env.save(RESULT_PATH + 'env.pkl')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "# Plot the training process\n", - "training_log = pd.read_csv(RESULT_PATH + 'monitor.csv', skiprows=1)\n", - "training_log['r'].plot()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "load, price, generation = read_data()\n", - "load_eval = load[TEST_INDEX_START:]\n", - "price_eval = price[TEST_INDEX_START:]\n", - "generation_eval = generation[TEST_INDEX_START:]\n", - "\n", - "num_eval_timesteps = TEST_INDEX_END - TEST_INDEX_START\n", - "\n", - "eval_sim = BuildingSimulation(electricity_load_profile=load_eval,\n", - " solar_generation_profile=generation_eval,\n", - " electricity_price=price_eval,\n", - " max_battery_charge_per_timestep=BATTERY_POWER, \n", - " battery_capacity=BATTERY_CAPACITY)\n", - "\n", - "eval_env = Environment(eval_sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=num_eval_timesteps)\n", - "eval_env = ObservationWrapper(eval_env, NUM_FORECAST_STEPS)\n", - "eval_env = DummyVecEnv([lambda: eval_env])\n", - "# It is important to load the environmental statistics here as we use a rolling mean calculation !\n", - "eval_env = VecNormalize.load(RESULT_PATH + 'env.pkl', eval_env) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "eval_env.training = False\n", - "\n", - "actions, observations, electricity_consumption, price, rewards = ([], [], [], [], [])\n", - "done = False\n", - "obs = eval_env.reset()\n", - "while not done:\n", - " action = model.predict(obs, deterministic=True)\n", - " obs, r, done, info = eval_env.step([action[0][0]])\n", - "\n", - " actions.append(action[0][0][0])\n", - " original_reward = eval_env.get_original_reward()[0]\n", - " original_obs = eval_env.get_original_obs()[0]\n", - " observations.append(original_obs)\n", - " electricity_consumption.append(info[0]['electricity_consumption'])\n", - " price.append(info[0]['electricity_price'])\n", - " rewards.append(r)\n", - " \n", - "trajectory = pd.DataFrame({\n", - " 'action': actions,\n", - " 'observations': observations,\n", - " 'electricity_consumption': electricity_consumption,\n", - " 'electricity_price': price,\n", - " 'reward': rewards\n", - " }) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_data = trajectory[200:500]\n", - "observation_df = plot_data['observations'].apply(pd.Series)\n", - "augmented_load = observation_df[1] + plot_data['action'] * BATTERY_POWER\n", - "plt.rcParams[\"figure.figsize\"] = (16,10)\n", - "\n", - "fig1 = plt.figure()\n", - "ax = plt.subplot()\n", - "ax.plot(observation_df[1], label='Residual Load')\n", - "ax.plot(augmented_load, label='Augmented Load')\n", - "ax.plot(plot_data['electricity_price'], '--', label='Price')\n", - "ax.plot(plot_data['action']*50, label='Battery Power')\n", - "plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)')\n", - "plt.xlabel('Time Step')\n", - "ax.legend()\n", - "ax.grid()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Compare to Baseline" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "eval_env.training = False\n", - "\n", - "cost = []\n", - "done = False\n", - "obs = eval_env.reset()\n", - "while not done:\n", - " action = model.predict(obs, deterministic=True)\n", - " obs, r, done, info = eval_env.step([action[0][0]])\n", - " cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n", - "\n", - "cost = sum(cost)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "eval_env.training = False\n", - "\n", - "baseline_cost = []\n", - "done = False\n", - "obs = eval_env.reset()\n", - "while not done:\n", - " # Always taking noop as action. This is the electricity demand if there would be no battery\n", - " action = [0]\n", - " obs, r, done, info = eval_env.step(action)\n", - " baseline_cost.append(info[0]['electricity_consumption'] * info[0]['electricity_price'])\n", - "\n", - "baseline_cost = sum(baseline_cost)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# how much energy did we save by utilizing the battery?\n", - "1 - (cost / baseline_cost)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/example_solutions/deep_reinforcement_learning/evaluate.py b/example_solutions/deep_reinforcement_learning/evaluate.py new file mode 100644 index 0000000..696fb74 --- /dev/null +++ b/example_solutions/deep_reinforcement_learning/evaluate.py @@ -0,0 +1,87 @@ +# Plot the training process +import pandas as pd +from stable_baselines3 import SAC +from stable_baselines3.common.results_plotter import plot_results +from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize + +from building_energy_storage_simulation import BuildingSimulation, Environment +from example_solutions.deep_reinforcement_learning.train import RESULT_PATH, NUM_FORECAST_STEPS +from example_solutions.helper import read_data, TEST_INDEX_START, TEST_INDEX_END, BATTERY_POWER, BATTERY_CAPACITY, \ + plot_control_trajectory +from example_solutions.observation_wrapper import ObservationWrapper + + +def evaluate(env, agent=None): + # Do the evaluation + actions, observations, electricity_consumption, price, rewards = ([], [], [], [], []) + done = False + obs = env.reset() + while not done: + if agent is None: + action = [[0]] + else: + action = [agent.predict(obs, deterministic=True)[0][0]] + + obs, r, done, info = env.step([action[0][0]]) + + actions.append(action[0][0]) + original_obs = env.get_original_obs()[0] + observations.append(original_obs) + electricity_consumption.append(info[0]['electricity_consumption']) + price.append(info[0]['electricity_price']) + rewards.append(r) + + return pd.DataFrame({ + 'action': actions, + 'observations': observations, + 'electricity_consumption': electricity_consumption, + 'electricity_price': price, + 'reward': rewards + }) + + +if __name__ == "__main__": + # Plot evolution of reward during training + try: + plot_results(RESULT_PATH, x_axis='timesteps', task_name='title', num_timesteps=None) + except: + print('Training Reward Plot could not be created') + + load, price, generation = read_data() + load_eval = load[TEST_INDEX_START:] + price_eval = price[TEST_INDEX_START:] + generation_eval = generation[TEST_INDEX_START:] + + num_eval_timesteps = TEST_INDEX_END - TEST_INDEX_START + + eval_sim = BuildingSimulation(electricity_load_profile=load_eval, + solar_generation_profile=generation_eval, + electricity_price=price_eval, + max_battery_charge_per_timestep=BATTERY_POWER, + battery_capacity=BATTERY_CAPACITY) + + eval_env = Environment(eval_sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=num_eval_timesteps) + eval_env = ObservationWrapper(eval_env, NUM_FORECAST_STEPS) + eval_env = DummyVecEnv([lambda: eval_env]) + # It is important to load the environmental statistics here as we use a rolling mean calculation ! + eval_env = VecNormalize.load(RESULT_PATH + 'env.pkl', eval_env) + eval_env.training = False + + model = SAC.load(RESULT_PATH + 'model') + + trajectory = evaluate(eval_env, model) + baseline_trajectory = evaluate(eval_env, None) + + cost = sum(trajectory['electricity_price'] * trajectory['electricity_consumption']) + baseline_cost = sum(baseline_trajectory['electricity_price'] * baseline_trajectory['electricity_consumption']) + + print('baseline cost: ' + str(baseline_cost)) + print('cost: ' + str(cost)) + print('savings in %: ' + str(1 - cost / baseline_cost)) + + observation_df = trajectory['observations'].apply(pd.Series) + augmented_load = observation_df[1] + trajectory['action'] * BATTERY_POWER + plot_control_trajectory(residual_load=observation_df[1], + augmented_load=augmented_load, + price=trajectory['electricity_price'], + battery_power=trajectory['action'] * BATTERY_POWER) diff --git a/example_solutions/deep_reinforcement_learning/train.py b/example_solutions/deep_reinforcement_learning/train.py new file mode 100644 index 0000000..6591cd7 --- /dev/null +++ b/example_solutions/deep_reinforcement_learning/train.py @@ -0,0 +1,46 @@ +import os + +from stable_baselines3 import SAC +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize + +from building_energy_storage_simulation import BuildingSimulation, Environment +from example_solutions.helper import read_data, TEST_INDEX_START, BATTERY_CAPACITY, BATTERY_POWER +from example_solutions.observation_wrapper import ObservationWrapper + +NUM_FORECAST_STEPS = 8 +RESULT_PATH = 'rl_example/' + +if __name__ == "__main__": + os.makedirs(RESULT_PATH, exist_ok=True) + + load, price, generation = read_data() + load_train = load[:TEST_INDEX_START] + price_train = price[:TEST_INDEX_START] + generation_train = generation[:TEST_INDEX_START] + + # Create Training Environment + sim = BuildingSimulation(electricity_load_profile=load_train, + solar_generation_profile=generation_train, + electricity_price=price_train, + max_battery_charge_per_timestep=BATTERY_POWER, + battery_capacity=BATTERY_CAPACITY) + + env = Environment(sim, num_forecasting_steps=NUM_FORECAST_STEPS, max_timesteps=len(load_train) - NUM_FORECAST_STEPS) + # ObservationWrapper combines forecast of load and generation to one residual load forecast + env = ObservationWrapper(env, NUM_FORECAST_STEPS) + initial_obs, info = env.reset() + print(initial_obs) + + # Wrap with Monitor() so a log of the training is saved + env = Monitor(env, filename=RESULT_PATH) + # Warp with DummyVecEnc() so the observations and reward can be normalized using VecNormalize() + env = DummyVecEnv([lambda: env]) + env = VecNormalize(env, norm_obs=True, norm_reward=True) + + # Train :-) + model = SAC("MlpPolicy", env, verbose=1, gamma=0.95) + model.learn(total_timesteps=200_000) + # Store the trained Model and environment stats (which are needed as we are standardizing the observations and reward using VecNormalize()) + model.save(RESULT_PATH + 'model') + env.save(RESULT_PATH + 'env.pkl') \ No newline at end of file diff --git a/example_solutions/helper.py b/example_solutions/helper.py index 173dde3..45929b6 100644 --- a/example_solutions/helper.py +++ b/example_solutions/helper.py @@ -1,7 +1,11 @@ +from pathlib import Path + import pandas as pd import numpy as np from typing import Tuple +from matplotlib import pyplot as plt + # Start and end Index of data used for testing TEST_INDEX_START = 4380 TEST_INDEX_END = 8500 @@ -11,12 +15,26 @@ def read_data() -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - load = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_load_profile.csv')[ + base_path = Path(__file__).parent + folder_path = (base_path / "../building_energy_storage_simulation/data/preprocessed/").resolve() + + load = pd.read_csv(folder_path / 'electricity_load_profile.csv')[ 'Load [kWh]'] - price = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/electricity_price_profile.csv')[ + price = pd.read_csv(folder_path / 'electricity_price_profile.csv')[ 'Day Ahead Auction'] - generation = pd.read_csv('../building_energy_storage_simulation/data/preprocessed/solar_generation_profile.csv')[ + generation = pd.read_csv(folder_path / 'solar_generation_profile.csv')[ 'Generation [kWh]'] return np.array(load), np.array(price), np.array(generation) +def plot_control_trajectory(residual_load, augmented_load, price, battery_power) -> None: + ax = plt.subplot() + ax.plot(residual_load, label='Residual Load') + ax.plot(augmented_load, label='Augmented Load') + ax.plot(price, '--', label='Price') + ax.plot(battery_power, label='Battery Power') + plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)') + plt.xlabel('Time Step') + ax.legend() + ax.grid() + plt.show() diff --git a/example_solutions/model_predictive_control.py b/example_solutions/model_predictive_control.py index 91b4f5f..6408fc0 100644 --- a/example_solutions/model_predictive_control.py +++ b/example_solutions/model_predictive_control.py @@ -1,10 +1,9 @@ import pyomo.environ as pyo import numpy as np -import matplotlib.pyplot as plt from building_energy_storage_simulation import BuildingSimulation, Environment from optimal_control_problem import build_optimization_problem -from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_POWER, BATTERY_CAPACITY +from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_POWER, BATTERY_CAPACITY, plot_control_trajectory FORECAST_LENGTH = 24 @@ -69,16 +68,8 @@ def normalize_to_minus_one_to_one(x, min_value, max_value): print('cost: ' + str(cost)) print('savings in %: ' + str(1 - cost/baseline_cost)) -time = range(len(actions)) - -fig1 = plt.figure() -ax = plt.subplot() -ax.plot(residual_loads, label='Residual Load') -ax.plot(residual_loads + actions, label='Augmented Load') -ax.plot(actions, label='Battery Power Applied') -ax.plot(prices, '--', label='Price') -plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)') -plt.xlabel('Time Step') -ax.legend() -ax.grid() -plt.show() +plot_control_trajectory(residual_load=residual_loads, + augmented_load=residual_loads + actions, + price=prices, + battery_power=actions) + diff --git a/example_solutions/optimal_control_problem.py b/example_solutions/optimal_control_problem.py index e4be513..cdcb243 100644 --- a/example_solutions/optimal_control_problem.py +++ b/example_solutions/optimal_control_problem.py @@ -1,8 +1,7 @@ import pyomo.environ as pyo import numpy as np -import matplotlib.pyplot as plt -from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_CAPACITY, BATTERY_POWER +from helper import read_data, TEST_INDEX_END, TEST_INDEX_START, BATTERY_CAPACITY, BATTERY_POWER, plot_control_trajectory def build_optimization_problem(residual_fixed_load, price, soc, battery_power, battery_capacity, delta_time_hours=1): @@ -67,14 +66,8 @@ def soc_constraint_rule(m, i): print('cost: ' + str(cost)) print('savings in %: ' + str(1 - cost/baseline_cost)) - fig1 = plt.figure() - ax = plt.subplot() - ax.plot([(residual_fixed_load_eval[i]) for i in time], label='Residual Load') - ax.plot(augmented_load, label='Augmented Load') - ax.plot(price_eval, '--', label='Price') - ax.plot([(pyo.value(m.power[i])) for i in time], label='Battery Power') - plt.ylabel('Load and Battery Power Applied (kW) & Price (Cent per kWh)') - plt.xlabel('Time Step') - ax.legend() - ax.grid() - plt.show() + plot_control_trajectory(residual_load=[(residual_fixed_load_eval[i]) for i in time], + augmented_load=augmented_load, + price=price_eval, + battery_power=[(pyo.value(m.power[i])) for i in time] + )