diff --git a/src/pymgrid/algos/rbc/rbc.py b/src/pymgrid/algos/rbc/rbc.py
index bb916d8b..c49bbf9c 100644
--- a/src/pymgrid/algos/rbc/rbc.py
+++ b/src/pymgrid/algos/rbc/rbc.py
@@ -1,6 +1,7 @@
 from copy import deepcopy
 from tqdm import tqdm
 
+from pymgrid import Microgrid
 from pymgrid.algos.priority_list import PriorityListAlgo
 
 
@@ -23,9 +24,13 @@ class RuleBasedControl(PriorityListAlgo):
         cost to the highest.
 
     """
+
+    microgrid : Microgrid
+    'Microgrid on which to run rule-based control.'
+
     def __init__(self, microgrid, priority_list=None, remove_redundant_gensets=True):
         super().__init__()
-        self._microgrid = microgrid
+        self.microgrid = microgrid
         self._priority_list = self._get_priority_list(priority_list, remove_redundant_gensets)
 
     def _get_priority_list(self, priority_list, remove_redundant_gensets):
@@ -43,23 +48,20 @@ def _get_priority_list(self, priority_list, remove_redundant_gensets):
 
         return priority_list
 
-    def _get_action(self):
-        """
-        Given the priority list, define an action.
-        """
-        return self._populate_action(self._priority_list)
-
     def reset(self):
         """
         Reset the underlying microgrid.
 
+        If already reset (e.g. current step is initial step), do nothing.
+
         Returns
         -------
         obs : dict[str, list[float]]
             Observations from resetting the modules as well as the flushed balance log.
 
         """
-        return self._microgrid.reset()
+        if self.microgrid.current_step != self.microgrid.initial_step:
+            self.microgrid.reset()
 
     def run(self, max_steps=None, verbose=False):
         """
@@ -82,51 +84,44 @@ def run(self, max_steps=None, verbose=False):
         self.reset()
 
         for _ in tqdm(range(self._get_num_iter(max_steps)), desc="RBC Progress", disable=(not verbose)):
-            action = self._get_action()
-            _, _, done, _ = self._microgrid.run(action, normalized=False)
+            action = self.get_action()
+            _, _, done, _ = self.microgrid.run(action, normalized=False)
             if done:
                 break
 
-        return self._microgrid.get_log(as_frame=True)
+        return self.microgrid.get_log(as_frame=True)
 
     def _get_num_iter(self, max_steps):
         if max_steps is not None:
             return max_steps
         return self.microgrid.final_step - self.microgrid.initial_step
 
-    def get_empty_action(self):
+    def get_action(self):
         """
-        :meta private:
+        Given the priority list, define an action.
         """
-        return self._microgrid.get_empty_action()
+        return self._populate_action(self._priority_list)
 
-    @property
-    def microgrid(self):
+    def get_empty_action(self):
         """
-        View of the microgrid.
-
-        Returns
-        -------
-        microgrid : :class:`pymgrid.Microgrid`
-            The microgrid that RBC is being run on.
-
+        :meta private:
         """
-        return self._microgrid
+        return self.microgrid.get_empty_action()
 
     @property
     def fixed(self):
         """:meta private:"""
-        return self._microgrid.fixed
+        return self.microgrid.fixed
 
     @property
     def flex(self):
         """:meta private:"""
-        return self._microgrid.flex
+        return self.microgrid.flex
 
     @property
     def modules(self):
         """:meta private:"""
-        return self._microgrid.modules
+        return self.microgrid.modules
 
     @property
     def priority_list(self):
diff --git a/src/pymgrid/envs/base/base.py b/src/pymgrid/envs/base/base.py
index 72104a99..e7884a32 100644
--- a/src/pymgrid/envs/base/base.py
+++ b/src/pymgrid/envs/base/base.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd
 
 from gym import Env
@@ -177,6 +178,8 @@ def potential_observation_keys(self):
 
     def reset(self):
         obs = super().reset()
+        obs.pop('balance')
+        obs.pop('other')
         self.reset_callback()
         return self._get_obs(obs)
 
@@ -191,7 +194,7 @@ def step(self, action, normalized=True):
 
         Parameters
         ----------
-        action : dict[str, list[float]]
+        action : int or np.ndarray
             An action provided by the agent.
 
         normalized : bool, default True
@@ -251,7 +254,7 @@ def _get_obs(self, obs):
                 obs = obs.to_frame().unstack(level=1).T.droplevel(level=1, axis=1).to_dict(orient='list')
 
         elif self._flat_spaces:
-            obs = flatten(self._nested_observation_space, obs)
+            obs = self.flatten_obs(self._nested_observation_space, obs)
 
         return obs
 
@@ -268,6 +271,10 @@ def render(self, mode="human"):
         """:meta private:"""
         raise RuntimeError('rendering is not possible in Microgrid environments.')
 
+    @staticmethod
+    def flatten_obs(observation_space, obs):
+        return np.concatenate([flatten(observation_space[k], v) for k, v in obs.items()])
+
     @property
     def unwrapped(self):
         """:meta private:"""
diff --git a/src/pymgrid/envs/continuous/continuous.py b/src/pymgrid/envs/continuous/continuous.py
index c7889f5d..2c9f1770 100644
--- a/src/pymgrid/envs/continuous/continuous.py
+++ b/src/pymgrid/envs/continuous/continuous.py
@@ -1,4 +1,4 @@
-from gym.spaces import Dict, Tuple, flatten_space, unflatten
+from gym.spaces import Dict, Tuple, flatten_space, flatten, unflatten
 
 from pymgrid.envs.base import BaseMicrogridEnv
 
@@ -14,6 +14,18 @@ def _get_action_space(self, remove_redundant_actions=False):
         self._nested_action_space = self._get_nested_action_space()
         return flatten_space(self._nested_action_space) if self._flat_spaces else self._nested_action_space
 
-    def convert_action(self, action):
-        return unflatten(self._nested_action_space, action)
+    def convert_action(self, action, to_microgrid=True, normalize=False):
+        if to_microgrid:
+            converted = unflatten(self._nested_action_space, action)
+            if normalize:
+                converted = self.microgrid_action_space.normalize(converted)
+
+            return converted
+
+        if normalize:
+            action = self.microgrid_action_space.normalize(action)
+
+        assert action in self._nested_action_space
+
+        return flatten(self._nested_action_space, action)
 
diff --git a/src/pymgrid/microgrid/microgrid.py b/src/pymgrid/microgrid/microgrid.py
index 3b45b091..b4ca42e8 100644
--- a/src/pymgrid/microgrid/microgrid.py
+++ b/src/pymgrid/microgrid/microgrid.py
@@ -482,7 +482,8 @@ def get_log(self, as_frame=True, drop_singleton_key=False, drop_forecasts=False)
 
         col_names = ['module_name', 'module_number', 'field']
 
-        df = pd.DataFrame(_log_dict, index=pd.RangeIndex(start=self.initial_step, stop=self.current_step))
+        initial_step = self._modules.get_attrs('initial_step', unique=True).item()
+        df = pd.DataFrame(_log_dict, index=pd.RangeIndex(start=initial_step, stop=self.current_step))
         df.columns = pd.MultiIndex.from_tuples(df.columns.to_list(), names=col_names)
 
         if drop_forecasts:
@@ -714,7 +715,7 @@ def modules(self):
         """
         return self._modules
 
-    def state_dict(self, normalized=False):
+    def state_dict(self, normalized=False, as_run_output=False):
         """
         State of the microgrid as a dict.
 
@@ -725,14 +726,24 @@ def state_dict(self, normalized=False):
         normalized : bool, default False
             Whether to return a dict of normalized values.
 
+        as_run_output : bool, default False
+            Whether to return output in the same format as the output of :meth:`Microgrid.run`.
+            Inner values are numpy arrays and not dict in this case.
+
+
         Returns
         -------
-        state_dict : dict[str, list[dict]]
+        state_dict : dict[str, list[dict]] or dict[str[list[np.ndarray]]]
             State of the microgrid as a nested dict.
 
         """
+        def as_run_output_f(state_dict):
+            if as_run_output:
+                return np.array(list(state_dict.values()))
+            return state_dict
+
         return {name: [
-            module.state_dict(normalized=normalized) for module in modules
+            as_run_output_f(module.state_dict(normalized=normalized)) for module in modules
         ] for name, modules in self._modules.iterdict()}
 
     @property