diff --git a/.gitignore b/.gitignore index 0019092a..912a57e8 100644 --- a/.gitignore +++ b/.gitignore @@ -90,6 +90,7 @@ target/ # Jupyter Notebook .ipynb_checkpoints +.virtual_documents # IPython profile_default/ diff --git a/examples/notebooks/Brax_Experiments_with_PGPE.ipynb b/examples/notebooks/Brax_Experiments_with_PGPE.ipynb index 96f421f7..e24e2406 100644 --- a/examples/notebooks/Brax_Experiments_with_PGPE.ipynb +++ b/examples/notebooks/Brax_Experiments_with_PGPE.ipynb @@ -175,8 +175,11 @@ "metadata": {}, "outputs": [], "source": [ + "ENV_NAME = \"brax::humanoid\" # solve the brax task named \"humanoid\"\n", + "# ENV_NAME = \"brax::old::humanoid\" # solve the \"humanoid\" task defined within 'brax.v1`\n", + "\n", "problem = VecGymNE(\n", - " env=\"brax::humanoid\", # solve the brax task named \"humanoid\"\n", + " env=ENV_NAME,\n", " network=policy,\n", " #\n", " # Collect observation stats, and use those stats to normalize incoming observations\n", @@ -202,6 +205,19 @@ "problem, problem.solution_length" ] }, + { + "cell_type": "markdown", + "id": "bce02d7c-400c-4c22-9bb8-aa70fa4b1da2", + "metadata": {}, + "source": [ + "---\n", + "\n", + "**Note.**\n", + "At the time of writing this (15 June 2023), the [arXiv paper of EvoTorch](https://arxiv.org/abs/2302.12600v3) reports results based on the old implementations of the brax tasks (which were the default until brax v0.1.2). In brax version v0.9.0, these old task implementations moved into the namespace `brax.v1`. If you wish to reproduce the results reported in the arXiv paper of EvoTorch, you might want to specify the environment name as `\"brax::old::humanoid\"` (where the substring `\"old::\"` causes `VecGymNE` to instantiate the environment using the namespace `brax.v1`), so that you will observe scores and execution times compatible with the ones reported in that arXiv paper.\n", + "\n", + "---" + ] + }, { "cell_type": "markdown", "id": "95417793-3835-47b1-b10a-7f36e78fa3ad", @@ -343,11 +359,21 @@ "import jax\n", "\n", "import brax\n", - "import brax.envs\n", - "import brax.jumpy as jp\n", "\n", - "from brax.io import html\n", - "from brax.io import image\n", + "if ENV_NAME.startswith(\"brax::old::\"):\n", + " import brax.v1\n", + " import brax.v1.envs\n", + " import brax.v1.jumpy as jp\n", + " from brax.v1.io import html\n", + " from brax.v1.io import image\n", + "else:\n", + " try:\n", + " import jumpy as jp\n", + " except ImportError:\n", + " import brax.jumpy as jp\n", + " import brax.envs\n", + " from brax.io import html\n", + " from brax.io import image\n", "\n", "from IPython.display import HTML, Image\n", "\n", @@ -417,7 +443,11 @@ "metadata": {}, "outputs": [], "source": [ - "env = brax.envs.create(env_name=\"humanoid\")\n", + "if ENV_NAME.startswith(\"brax::old::\"):\n", + " env = brax.v1.envs.create(env_name=ENV_NAME[11:])\n", + "else:\n", + " env = brax.envs.create(env_name=ENV_NAME[6:])\n", + "\n", "reset = jax.jit(env.reset)\n", "step = jax.jit(env.step)" ] @@ -438,7 +468,11 @@ "outputs": [], "source": [ "seed = random.randint(0, (2 ** 32) - 1)\n", - "state = reset(rng=jp.random_prngkey(seed=seed))\n", + "\n", + "if hasattr(jp, \"random_prngkey\"):\n", + " state = reset(rng=jp.random_prngkey(seed=seed))\n", + "else:\n", + " state = reset(rng=jax.random.PRNGKey(seed=seed))\n", "\n", "h = None\n", "states = []\n", @@ -482,11 +516,26 @@ { "cell_type": "code", "execution_count": null, - "id": "29b9c3aa-8068-40bc-9ec2-e5ff759ed22c", + "id": "7ec60419-1ad0-4f19-bc1e-f2048577ea29", + "metadata": {}, + "outputs": [], + "source": [ + "if ENV_NAME.startswith(\"brax::old::\"):\n", + " env_sys = env.sys\n", + " states_to_render = [state.qp for state in states]\n", + "else:\n", + " env_sys = env.sys.replace(dt=env.dt)\n", + " states_to_render = [state.pipeline_state for state in states]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a07c70f6-2c93-43a1-b4c3-edd3f395302a", "metadata": {}, "outputs": [], "source": [ - "HTML(html.render(env.sys, [state.qp for state in states]))" + "HTML(html.render(env_sys, states_to_render))" ] } ], @@ -506,7 +555,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.8.16" } }, "nbformat": 4, diff --git a/examples/notebooks/Gym_Experiments_with_PGPE_and_CoSyNE.ipynb b/examples/notebooks/Gym_Experiments_with_PGPE_and_CoSyNE.ipynb index 2db0d374..74233144 100644 --- a/examples/notebooks/Gym_Experiments_with_PGPE_and_CoSyNE.ipynb +++ b/examples/notebooks/Gym_Experiments_with_PGPE_and_CoSyNE.ipynb @@ -7,10 +7,10 @@ "source": [ "## Training Policies for Gym using PGPE and CoSyNE\n", "\n", - "This example demonstrates how you can train policies using EvoTorch and Gym. To execute this example, you will need to install Gym's subpackages with:\n", + "This example demonstrates how you can train policies using EvoTorch and Gym. To execute this example, you will need to install the subpackages of `gymnasium` via:\n", "\n", "```bash\n", - " pip install 'gym[box2d,mujoco]'\n", + "pip install 'gymnasium[box2d,mujoco]'\n", "```\n", "\n", "This example is based on our paper [1] where we describe the ClipUp optimiser and compare it to the Adam optimiser. In particular, we will re-implement the experiment for the \"LunarLanderContinuous-v2\" environment. " @@ -279,7 +279,7 @@ "source": [ "#### References\n", "\n", - "[1] Toklu, et. al. \"Clipup: a simple and powerful optimizer for distribution-based policy evolution.\" [International Conference on Parallel Problem Solving from Nature](https://dl.acm.org/doi/abs/10.1007/978-3-030-58115-2_36). Springer, Cham, 2020.\n", + "[1] Toklu, et. al. \"ClipUp: a simple and powerful optimizer for distribution-based policy evolution.\" [International Conference on Parallel Problem Solving from Nature](https://dl.acm.org/doi/abs/10.1007/978-3-030-58115-2_36). Springer, Cham, 2020.\n", "\n", "[2] Gomez, Faustino, et al. [\"Accelerated Neural Evolution through Cooperatively Coevolved Synapses.\"](https://www.jmlr.org/papers/volume9/gomez08a/gomez08a.pdf) Journal of Machine Learning Research 9.5 (2008)." ] @@ -301,7 +301,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.13" + "version": "3.9.16" } }, "nbformat": 4, diff --git a/examples/notebooks/Model_Predictive_Control_with_CEM/reacher_mpc.ipynb b/examples/notebooks/Model_Predictive_Control_with_CEM/reacher_mpc.ipynb index 9a2e8afd..5f8c8986 100644 --- a/examples/notebooks/Model_Predictive_Control_with_CEM/reacher_mpc.ipynb +++ b/examples/notebooks/Model_Predictive_Control_with_CEM/reacher_mpc.ipynb @@ -50,7 +50,7 @@ "Because this example focuses on the `Reacher-v4` reinforcement learning environment, `gym` with `mujoco` support is required. One can install the `mujoco` support for `gym` via:\n", "\n", "```bash\n", - "pip install 'gym[mujoco]'\n", + "pip install 'gymnasium[mujoco]'\n", "```" ] }, @@ -84,7 +84,7 @@ "\n", "from typing import Iterable\n", "\n", - "import gym" + "import gymnasium as gym" ] }, { @@ -124,7 +124,7 @@ "source": [ "## Definitions\n", "\n", - "We begin our definitions with a helper function, $\\text{reacher_state}(\\text{observation})$ which extracts the state ($s_t$) of the robotic arm from the observation vector returned by the environment." + "We begin our definitions with a helper function, $\\text{reacher\\_state}(\\text{observation})$ which extracts the state ($s_t$) of the robotic arm from the observation vector returned by the environment." ] }, { @@ -147,7 +147,7 @@ "id": "4909a2aa-1f07-4c6e-b757-ddcbe7496a78", "metadata": {}, "source": [ - "We now define the function $\\text{predict_next_state}(s_t, a_t)$ which, given a state $s_t$ and an action $a_t$ ($t$ being the current timestep), returns the predicted next state $\\tilde{s}_{t+1}$.\n", + "We now define the function $\\text{predict\\_next\\_state}(s_t, a_t)$ which, given a state $s_t$ and an action $a_t$ ($t$ being the current timestep), returns the predicted next state $\\tilde{s}_{t+1}$.\n", "\n", "Within itself, this function uses the neural network $\\pi$ to make its predictions." ] @@ -173,9 +173,9 @@ "source": [ "Let us now define a _plan_ $p_t$ as a series of actions planned for future timesteps, i.e.: $p_t = (a_t, a_{t+1}, a_{t+2}, ..., a_{t+(H-1)})$ where $H$ is the horizon, determining how far into the future we are planning.\n", "\n", - "With this, we define the function $\\text{predict_plan_outcome}(s_t, p_t)$ which receives the current state $s_t$ and a plan $p_t$ and returns a predicted future state $\\tilde{s}_{t+H}$, which represents the predicted outcome of following the plan. Within $\\text{predict_plan_outcome}(\\cdot)$, the predictions are made with the help of $\\text{predict_next_state}(\\cdot)$ which in turn uses the neural network $\\pi$.\n", + "With this, we define the function $\\text{predict\\_plan\\_outcome}(s_t, p_t)$ which receives the current state $s_t$ and a plan $p_t$ and returns a predicted future state $\\tilde{s}_{t+H}$, which represents the predicted outcome of following the plan. Within $\\text{predict\\_plan\\_outcome}(\\cdot)$, the predictions are made with the help of $\\text{predict\\_next\\_state}(\\cdot)$ which in turn uses the neural network $\\pi$.\n", "\n", - "An implementation detail to be noted here is that, $\\text{predict_plan_outcome}(\\cdot)$ expects not a single plan, but a batch of plans, and uses PyTorch's vectorization capabilities to make predictions for all those plans in a performant manner." + "An implementation detail to be noted here is that, $\\text{predict\\_plan\\_outcome}(\\cdot)$ expects not a single plan, but a batch of plans, and uses PyTorch's vectorization capabilities to make predictions for all those plans in a performant manner." ] }, { @@ -213,7 +213,7 @@ "\\begin{array}{c c l}\n", " p_t =\n", " & \\text{arg min} & ||(\\tilde{s}_{t+H}^x,\\tilde{s}_{t+H}^y)-(g^x, g^y)|| \\\\\n", - " & \\text{subject to} & \\tilde{s}_{t+H} = \\text{predict_plan_outcome}(s_t, p_t)\n", + " & \\text{subject to} & \\tilde{s}_{t+H} = \\text{predict\\_plan\\_outcome}(s_t, p_t)\n", "\\end{array}\n", "$$\n", "\n", @@ -304,19 +304,7 @@ "metadata": {}, "outputs": [], "source": [ - "from packaging.version import Version\n", - "old_render_api = Version(gym.__version__) < Version(\"0.26\")\n", - "\n", - "if old_render_api:\n", - " # For gym versions older than 0.26, we do not have to specify additional\n", - " # keyword arguments for human-mode rendering.\n", - " env = gym.make(\"Reacher-v4\")\n", - "else:\n", - " # For gym versions beginning with 0.26, we have to explicitly specify\n", - " # that the rendering mode is \"human\" if we wish to do the rendering on\n", - " # the screen.\n", - " env = gym.make(\"Reacher-v4\", render_mode=\"human\")\n", - "\n", + "env = gym.make(\"Reacher-v4\", render_mode=\"human\")\n", "env" ] }, @@ -337,15 +325,7 @@ "outputs": [], "source": [ "def run_episode(visualize: bool = False):\n", - " reset_result = env.reset()\n", - " if isinstance(reset_result, tuple):\n", - " # If the result of the `reset()` method is a tuple, then we assume\n", - " # that it returned a tuple in the form (observation, info).\n", - " observation, _ = reset_result\n", - " else:\n", - " # If the result of the `reset()` method is not a tuple, then we\n", - " # assume that it returned the observation by itself.\n", - " observation = reset_result\n", + " observation, _ = env.reset()\n", "\n", " if visualize:\n", " env.render()\n", @@ -354,14 +334,8 @@ " action = do_planning(observation)\n", " action = np.clip(action, -1.0, 1.0)\n", " \n", - " step_result = env.step(action)\n", - " if len(step_result) == 5:\n", - " observation, reward, terminated, truncated, info = step_result\n", - " done = terminated | truncated\n", - " elif len(step_result) == 4:\n", - " observation, reward, done, info = step_result\n", - " else:\n", - " assert False, \"Unexpected number of items returned by `.step(...)`\"\n", + " observation, reward, terminated, truncated, info = env.step(action)\n", + " done = terminated | truncated\n", " \n", " if visualize:\n", " env.render()\n", @@ -419,7 +393,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.13" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/examples/notebooks/Model_Predictive_Control_with_CEM/train_forward_model/reacher_train.ipynb b/examples/notebooks/Model_Predictive_Control_with_CEM/train_forward_model/reacher_train.ipynb index b8906c95..4cf414f5 100644 --- a/examples/notebooks/Model_Predictive_Control_with_CEM/train_forward_model/reacher_train.ipynb +++ b/examples/notebooks/Model_Predictive_Control_with_CEM/train_forward_model/reacher_train.ipynb @@ -45,7 +45,7 @@ "source": [ "import torch\n", "import numpy as np\n", - "import gym\n", + "import gymnasium as gym\n", "from typing import Iterable\n", "import multiprocessing as mp\n", "import math\n", @@ -118,7 +118,7 @@ "\n", "Here, we define helper functions and utilities for the training of our model.\n", "\n", - "We begin by defining the function $\\text{reacher_state}(\\cdot)$ which, given an observation from the reinforcement learning environment `Reacher-v4`, extracts and returns the state vector of the simulated robotic arm." + "We begin by defining the function $\\text{reacher\\_state}(\\cdot)$ which, given an observation from the reinforcement learning environment `Reacher-v4`, extracts and returns the state vector of the simulated robotic arm." ] }, { @@ -141,7 +141,7 @@ "id": "d8fbb94c-881b-4cc9-adca-b487209f8dc4", "metadata": {}, "source": [ - "We now define a function $\\text{collect_data}(\\cdot)$ which collects data from multiple episodes, number of these episodes being specified via the argument `num_episodes`.\n", + "We now define a function $\\text{collect\\_data}(\\cdot)$ which collects data from multiple episodes, number of these episodes being specified via the argument `num_episodes`.\n", "Within each episode, the data we collect is:\n", "\n", "- current state\n", @@ -149,7 +149,7 @@ "- next state (i.e. the state obtained after applying the action)\n", "\n", "The forward model that we wish to train should be able to answer this question: _given the current state and the action, what is the prediction for the next state?_ Therefore, among the data we collect, the current states and the actions are categorized as the inputs, while the next states are categorized as the targets.\n", - "The function $\\text{collect_data}(\\cdot)$ organizes its data into inputs and targets, and finally returns them." + "The function $\\text{collect\\_data}(\\cdot)$ organizes its data into inputs and targets, and finally returns them." ] }, { @@ -165,28 +165,14 @@ "\n", " env = gym.make(ENV_NAME)\n", " for _ in range(num_episodes):\n", - " reset_result = env.reset()\n", - " if isinstance(reset_result, tuple):\n", - " # If the result of the `reset()` method is a tuple, then we assume that it returned\n", - " # us a tuple of the form (observation, info). We get the observation from it.\n", - " observation, _ = reset_result\n", - " else:\n", - " # If the result of the `reset()` method is not a tuple, then we asusme that it returned\n", - " # the observation by itself.\n", - " observation = reset_result\n", + " observation, _ = env.reset()\n", "\n", " while True:\n", " action = np.clip(np.asarray(env.action_space.sample(), dtype=\"float32\"), -1.0, 1.0)\n", " state = reacher_state(observation)\n", " \n", - " step_result = env.step(action)\n", - " if len(step_result) == 5:\n", - " observation, reward, terminated, truncated, info = env.step(action)\n", - " done = terminated | truncated\n", - " elif len(step_result) == 4:\n", - " observation, reward, done, info = env.step(action)\n", - " else:\n", - " assert False, \"Unexpected number of items returned by `.step(...)`\"\n", + " observation, reward, terminated, truncated, info = env.step(action)\n", + " done = terminated | truncated\n", "\n", " next_state = reacher_state(observation)\n", "\n", @@ -486,11 +472,7 @@ "metadata": {}, "outputs": [], "source": [ - "reset_result = env.reset()\n", - "if isinstance(reset_result, tuple):\n", - " observation, _ = reset_result\n", - "else:\n", - " observation = reset_result\n", + "observation, _ = env.reset()\n", "observation = np.asarray(observation, dtype=\"float32\")\n", "\n", "actual_state = reacher_state(observation)\n", @@ -513,14 +495,8 @@ "while True:\n", " action = np.asarray(env.action_space.sample(), dtype=\"float32\")\n", " \n", - " step_result = env.step(action)\n", - " if len(step_result) == 5:\n", - " observation, reward, terminated, truncated, info = env.step(action)\n", - " done = terminated | truncated\n", - " elif len(step_result) == 4:\n", - " observation, reward, done, info = env.step(action)\n", - " else:\n", - " assert False, \"Unexpected number of items returned by `.step(...)`\"\n", + " observation, reward, terminated, truncated, info = env.step(action)\n", + " done = terminated | truncated\n", "\n", " actual_state = reacher_state(observation)\n", " \n", @@ -598,14 +574,6 @@ " f\n", " )" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2d6e33d1-6a79-4f9f-bfc9-70b14c5f9697", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -624,7 +592,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.13" + "version": "3.8.17" } }, "nbformat": 4, diff --git a/examples/scripts/README.md b/examples/scripts/README.md index 27942622..46b080f9 100644 --- a/examples/scripts/README.md +++ b/examples/scripts/README.md @@ -11,7 +11,7 @@ The scripts provided here demonstrate key features of EvoTorch for new users and --- ## Reinforcement Learning -- [rl_gym.py](./rl_gym.py): demonstrates how to solve a simple [Gym](https://www.gymlibrary.ml/) problem using the PGPE algorithm and ClipUp optimizer. +- [rl_gym.py](./rl_gym.py): demonstrates how to solve a simple [Gymnasium](https://gymnasium.farama.org/) problem using the PGPE algorithm and ClipUp optimizer. --- ## Paper re-implementation (RL) @@ -23,7 +23,7 @@ Nihat Engin Toklu, Paweł Liskowski, and Rupesh Kumar Srivastava. International Conference on Parallel Problem Solving from Nature. Springer, Cham, 2020. https://arxiv.org/abs/2008.02387 ``` -It allows you to train policies for the Lunar Lander, Walker-2D and Humanoid environments from [Gym](https://www.gymlibrary.ml/) as well as the [PyBullet](https://pybullet.org/) Humanoid. +It allows you to train policies for the Lunar Lander, Walker-2D and Humanoid environments from [Gymnasium](https://gymnasium.farama.org/) as well as the [PyBullet](https://pybullet.org/) Humanoid. ### Requirements @@ -32,9 +32,9 @@ It allows you to train policies for the Lunar Lander, Walker-2D and Humanoid env # Necessary: Used for configs and logging results to files or databases pip install sacred # Optional: box2d for Lunar Lander, mujoco for Walker-2D and Humanoid -pip install 'gym[box2d,mujoco]' +pip install 'gymnasium[box2d,mujoco]' # Optional: For PyBullet Humanoid -pip install pybullet +pip install pybullet 'gym<0.26' ``` ### Running experiments @@ -88,7 +88,7 @@ Also, the following command: python rl_clipup.py -F RESULTS_DIR with pybullet_humanoid save_interval=50 ``` -...starts an evolutionary computation run for solving `pybullet_envs:HumanoidBulletEnv-v0`, and saves the policy at every 50 generations. Such an explicit `save_interval` value is recommended for pybullet humanoid, since the computational experiments for this environment last long, and one might want to look at how the current agent is behaving without having to wait until the end of the run. +...starts an evolutionary computation run for solving [`wrapped_humanoid_bullet:WrappedHumanoidBulletEnv-v0`](./wrapped_humanoid_bullet.py) (which is a wrapper around the classical gym environment `pybullet_envs:HumanoidBulletEnv-v0` to make the environment available to gymnasium), and saves the policy at every 50 generations. Such an explicit `save_interval` value is recommended for pybullet humanoid, since the computational experiments for this environment last long, and one might want to look at how the current agent is behaving without having to wait until the end of the run. Other available pre-configurations are `walker` (for the MuJoCo environment `Walker-v4`) and `humanoid` (for the MuJoCo environment `Humanoid-v4`). diff --git a/examples/scripts/rl_clipup.py b/examples/scripts/rl_clipup.py index 3f9cc478..acb372d6 100644 --- a/examples/scripts/rl_clipup.py +++ b/examples/scripts/rl_clipup.py @@ -196,8 +196,8 @@ def humanoid(): @ex.named_config def pybullet_humanoid(): - env_name = "tinytraj_humanoid_bullet:TinyTrajHumanoidBulletEnv-v0" - actual_env_name = "pybullet_envs:HumanoidBulletEnv-v0" + env_name = "wrapped_humanoid_bullet:TinyTrajHumanoidBulletEnv-v0" + actual_env_name = "wrapped_humanoid_bullet:WrappedHumanoidBulletEnv-v0" policy = "Linear(obs_length, 64) >> Tanh() >> Linear(64, act_length)" max_speed = 0.15 adam_alpha = 6e-4 # No experiment was ever done with adam for this problem diff --git a/examples/scripts/rl_enjoy.py b/examples/scripts/rl_enjoy.py index a74e13c8..cc664dea 100644 --- a/examples/scripts/rl_enjoy.py +++ b/examples/scripts/rl_enjoy.py @@ -20,24 +20,16 @@ from time import sleep from typing import Optional, Union -import gym +import gymnasium as gym import numpy as np import torch -from packaging.version import Version from evotorch.neuroevolution.net.rl import reset_env, take_step_in_env -new_render_api = Version(gym.__version__) >= Version("0.26") - def make_env_for_rendering(*args, **kwargs): """ - Initialize a new gym environment with human-mode rendering. - - Beginning with gym 0.26, it is required to specify the rendering mode - while initializing the environment. If the gym version is newer than - or equal to 0.26, this function passes the keyword argument - `render_mode="human"` to `gym.make(...)`. + Initialize a new gymnasium environment with human-mode rendering. Args: args: Expected in the form of positional arguments. These are @@ -47,23 +39,13 @@ def make_env_for_rendering(*args, **kwargs): Returns: The newly made gym environment. """ - if new_render_api: - env_config = {"render_mode": "human"} - else: - env_config = {} - - env_config.update(kwargs) + env_config = {**kwargs, "render_mode": "human"} return gym.make(*args, **env_config) def make_env_for_recording(*args, **kwargs): """ - Initialize a new gym environment with human-mode rendering. - - Beginning with gym 0.26, it is required to specify the rendering mode - while initializing the environment. If the gym version is newer than - or equal to 0.26, this function passes the keyword argument - `render_mode="rgb_array"` to `gym.make(...)`. + Initialize a new gymnasium environment for recording. Args: args: Expected in the form of positional arguments. These are @@ -73,28 +55,10 @@ def make_env_for_recording(*args, **kwargs): Returns: The newly made gym environment. """ - if new_render_api: - env_config = {"render_mode": "rgb_array"} - else: - env_config = {} - - env_config.update(kwargs) + env_config = {**kwargs, "render_mode": "rgb_array"} return gym.make(*args, **env_config) -def rgb_array_from_env(env: gym.Env) -> np.ndarray: - """ - Render the current state of the environment into numpy array. - - Returns: - The newly made numpy array containing the rendering result. - """ - if new_render_api: - return env.render() - else: - return env.render(mode="rgb_array") - - def str_if_non_empty(s: Optional[str]) -> Optional[str]: if (s is None) or (isinstance(s, str) and (s == "")): return None @@ -158,8 +122,6 @@ def main( env_name = loaded["env_name"] policy = loaded["policy"] kwargs = {} - if ("BulletEnv" in env_name) and (record_prefix is None) and (not new_render_api): - kwargs["render"] = True if config is not None: kwargs.update(config) @@ -195,7 +157,7 @@ def render(): from matplotlib import pyplot as plt if scene_index % record_period == 0: - img = rgb_array_from_env(env) + img = env.render() if extract is not None: extract_parts = extract.split(",") x1 = int(extract_parts[0]) @@ -294,7 +256,9 @@ def render(): help=( "When provided, this is expected as a Python dictionary." " Each item in this Python dictionary will be set as an attribute of the gym environment." - " For example, for a PyBullet environment (e.g. bullet_envs:HumanoidBulletEnv-v0)," + " For example, for a PyBullet environment" + " (like wrapped_humanoid_bullet:WrappedHumanoidBulletEnv-v0" + " or wrapped_humanoid_bullet:TinyTrajHumanoidBulletEnv-v0)" ' one might want to give the argument --set \'{"_render_width": 640, "_render_height": 480}\'' " to set the attributes _render_width and _render_height as 640 and 480 respectively," " therefore configuring the render size to 640x480 while recording." diff --git a/examples/scripts/tinytraj_humanoid_bullet.py b/examples/scripts/wrapped_humanoid_bullet.py similarity index 56% rename from examples/scripts/tinytraj_humanoid_bullet.py rename to examples/scripts/wrapped_humanoid_bullet.py index a33466c1..d5252bb2 100644 --- a/examples/scripts/tinytraj_humanoid_bullet.py +++ b/examples/scripts/wrapped_humanoid_bullet.py @@ -12,25 +12,45 @@ # See the License for the specific language governing permissions and # limitations under the License. -import gym -from gym.envs.registration import register +from collections.abc import Mapping +from typing import Optional +import gym as classical_gym +import gymnasium as gym +from gymnasium.envs.registration import register +from gymnasium.spaces import Box -class TinyTrajHumanoidBulletEnv(gym.Env): - ID = "TinyTrajHumanoidBulletEnv-v0" - ENTRY_POINT = __name__ + ":TinyTrajHumanoidBulletEnv" - MAX_EPISODE_STEPS = 200 - def __init__(self, trajectory_length=200, **kwargs): +class WrappedHumanoidBulletEnv(gym.Env): + ID = "WrappedHumanoidBulletEnv-v0" + ENTRY_POINT = __name__ + ":WrappedHumanoidBulletEnv" + MAX_EPISODE_STEPS = 1000 + + def __init__(self, trajectory_length: Optional[int] = None, render_mode: Optional[str] = None, **kwargs): gym.Env.__init__(self) + if trajectory_length is None: + trajectory_length = self.MAX_EPISODE_STEPS + self.__tlimit = trajectory_length self.__done = True self.__t = 0 - self.__contained_env = gym.make("pybullet_envs:HumanoidBulletEnv-v0", **kwargs) - - self.observation_space = self.__contained_env.observation_space - self.action_space = self.__contained_env.action_space + self.__contained_env = classical_gym.make( + "pybullet_envs:HumanoidBulletEnv-v0", render=(render_mode == "human"), **kwargs + ) + + self.__render_mode = None if render_mode is None else str(render_mode) + + self.observation_space = Box( + low=self.__contained_env.observation_space.low, + high=self.__contained_env.observation_space.high, + dtype=self.__contained_env.observation_space.dtype, + ) + self.action_space = Box( + low=self.__contained_env.action_space.low, + high=self.__contained_env.action_space.high, + dtype=self.__contained_env.action_space.dtype, + ) self.reward_range = (float("-inf"), float("inf")) def step(self, action): @@ -41,6 +61,12 @@ def step(self, action): if num_step_results == 4: observation, reward, done, info = step_results + if isinstance(info, Mapping) and ("TimeLimit.truncated" in info): + truncated = info["TimeLimit.truncated"] + terminated = done and (not truncated) + else: + truncated = False + terminated = done elif num_step_results == 5: observation, reward, terminated, truncated, info = step_results done = terminated or truncated @@ -56,18 +82,22 @@ def step(self, action): reward = sum(self.__contained_env.rewards[1:]) - if num_step_results == 4: - return observation, reward, done, info - elif num_step_results == 5: - return observation, reward, terminated, truncated, info + return observation, reward, terminated, truncated, info def reset(self, **kwargs): self.__done = False self.__t = 0 - return self.__contained_env.reset(**kwargs) + contained_obs = self.__contained_env.reset(**kwargs) + if isinstance(contained_obs, tuple): + result = contained_obs + else: + result = contained_obs, {} + return result - def render(self, mode="human", **kwargs): - self.__contained_env.render(mode=mode, **kwargs) + def render(self, mode: Optional[str] = None, **kwargs): + if mode is None: + mode = self.__render_mode + return self.__contained_env.render(mode=mode, **kwargs) def close(self): return self.__contained_env.close() @@ -111,6 +141,19 @@ def robot(self): return self.__contained_env +class TinyTrajHumanoidBulletEnv(WrappedHumanoidBulletEnv): + ID = "TinyTrajHumanoidBulletEnv-v0" + ENTRY_POINT = __name__ + ":TinyTrajHumanoidBulletEnv" + MAX_EPISODE_STEPS = 200 + + +register( + id=WrappedHumanoidBulletEnv.ID, + entry_point=WrappedHumanoidBulletEnv.ENTRY_POINT, + max_episode_steps=WrappedHumanoidBulletEnv.MAX_EPISODE_STEPS, +) + + register( id=TinyTrajHumanoidBulletEnv.ID, entry_point=TinyTrajHumanoidBulletEnv.ENTRY_POINT, diff --git a/setup.cfg b/setup.cfg index 7a63d4ea..3bdc7623 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,8 +35,7 @@ tests_require = evotorch[test] install_requires = cma - functorch - gym + gymnasium matplotlib numpy packaging @@ -53,7 +52,7 @@ where = src [options.extras_require] # Logging dependencies mlflow = mlflow -neptune = neptune-client +neptune = neptune sacred = sacred wandb = wandb logging = diff --git a/src/evotorch/algorithms/mapelites.py b/src/evotorch/algorithms/mapelites.py index d7feb7c0..7709c20b 100644 --- a/src/evotorch/algorithms/mapelites.py +++ b/src/evotorch/algorithms/mapelites.py @@ -2,7 +2,11 @@ from typing import Callable, Iterable, Optional, Union import torch -from functorch import vmap + +try: + from torch.func import vmap +except ImportError: + from functorch import vmap from ..core import Problem, SolutionBatch from ..operators import CosynePermutation, CrossOver, GaussianMutation, OnePointCrossOver, SimulatedBinaryCrossOver diff --git a/src/evotorch/core.py b/src/evotorch/core.py index a38b7fc2..6ca1dce6 100644 --- a/src/evotorch/core.py +++ b/src/evotorch/core.py @@ -92,6 +92,7 @@ def warn(cls, operation_name: Optional[str] = None): rank, rowwise_sum, split_workload, + storage_ptr, to_torch_dtype, ) from .tools.cloning import Serializable, deep_clone @@ -3422,7 +3423,7 @@ def safe_slice(t: torch.Tensor, slice_info): self._slice = slice_info self._descending = source._descending - shares_storage = self._data.storage().data_ptr() == source._data.storage().data_ptr() + shares_storage = storage_ptr(self._data) == storage_ptr(source._data) if not shares_storage: self._descending = deepcopy(self._descending) diff --git a/src/evotorch/logging.py b/src/evotorch/logging.py index 4cab6868..3fa64a68 100644 --- a/src/evotorch/logging.py +++ b/src/evotorch/logging.py @@ -23,6 +23,7 @@ from typing import Any, Iterable, Optional, Union import torch +from packaging.version import Version from torch import nn from .algorithms.searchalgorithm import SearchAlgorithm @@ -49,7 +50,10 @@ try: - import neptune.new as neptune + import neptune + + if hasattr(neptune, "__version__") and (Version(neptune.__version__) < Version("1.0")): + import neptune.new as neptune except ImportError: neptune = None diff --git a/src/evotorch/neuroevolution/gymne.py b/src/evotorch/neuroevolution/gymne.py index 09f93a07..74d1ba9b 100644 --- a/src/evotorch/neuroevolution/gymne.py +++ b/src/evotorch/neuroevolution/gymne.py @@ -21,10 +21,9 @@ from pathlib import Path from typing import Any, Callable, Iterable, List, Optional, Union -import gym +import gymnasium as gym import numpy as np import torch -from packaging.version import Version from torch import nn from ..core import BoundsPairLike, Solution, SolutionBatch @@ -41,11 +40,6 @@ ) from .net.statefulmodule import ensure_stateful -# Determine the gym version without failing when __version__ has an unexpected value. -# Perhaps such unexpected values could be encountered when using a custom/modified version -# of gym, and we do not wish this module to fail in those scenarios. -_gym_older_than_0_26 = Version(gym.__version__) < Version("0.26") - def ensure_space_types(env: gym.Env) -> None: if not isinstance(env.observation_space, gym.spaces.Box): @@ -381,14 +375,13 @@ def _rollout( policy = ensure_stateful(policy) policy.reset() - if visualize and (not _gym_older_than_0_26): - # Beginning with gym 0.26, we need to specify the render mode when instantiating the environment. + if visualize: env = self._instantiate_new_env(render_mode="human") else: env = self._get_env() observation = self._normalize_observation(reset_env(env), update_stats=update_stats) - if visualize and _gym_older_than_0_26: + if visualize: env.render() t = 0 diff --git a/src/evotorch/neuroevolution/net/functional.py b/src/evotorch/neuroevolution/net/functional.py index 7e3e9159..4c912dbd 100644 --- a/src/evotorch/neuroevolution/net/functional.py +++ b/src/evotorch/neuroevolution/net/functional.py @@ -12,12 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +from copy import deepcopy from typing import Any import torch -from functorch import make_functional_with_buffers from torch import nn +try: + from torch.func import functional_call +except ImportError: + from torch.nn.utils.stateless import functional_call + +from contextlib import nullcontext + def _shape_length(shape: tuple) -> int: """ @@ -40,9 +47,6 @@ class ModuleExpectingFlatParameters: """ A wrapper which brings a functional interface around a torch module. - For obtaining the functional interface, this class internally uses - the `functorch` library. - Similar to `functorch.FunctionalModule`, `ModuleExpectingFlatParameters` turns a `torch.nn.Module` instance to a function which expects a new leftmost argument representing the parameters of the network. @@ -87,18 +91,33 @@ class ModuleExpectingFlatParameters: ``` """ - def __init__(self, module: nn.Module, disable_autograd_tracking: bool = False): + @torch.no_grad() + def __init__(self, net: nn.Module, *, disable_autograd_tracking: bool = False): + """ + `__init__(...)`: Initialize the `ModuleExpectingFlatParameters` instance. + + Args: + net: The module that is to be wrapped by a functional interface. + disable_autograd_tracking: If given as True, all operations + regarding the wrapped module will be performed in the context + `torch.no_grad()`, forcefully disabling the autograd. + If given as False, autograd will not be affected. + The default is False. + """ + # Declare the variables which will store information regarding the parameters of the module. + self.__param_names = [] self.__param_shapes = [] self.__param_length = 0 self.__param_slices = [] self.__num_params = 0 - self.__buffers = [] # Iterate over the parameters of the module and fill the related information. i = 0 j = 0 - for p in module.parameters(): + for pname, p in net.named_parameters(): + self.__param_names.append(pname) + shape = p.shape self.__param_shapes.append(shape) @@ -111,11 +130,11 @@ def __init__(self, module: nn.Module, disable_autograd_tracking: bool = False): self.__num_params += 1 - self.__fmodel, _, self.__buffers = make_functional_with_buffers( - module, disable_autograd_tracking=bool(disable_autograd_tracking) - ) + self.__buffer_dict = {bname: b.clone() for bname, b in net.named_buffers()} - self.__buffers = list(self.__buffers) + self.__net = deepcopy(net) + self.__net.to("meta") + self.__disable_autograd_tracking = bool(disable_autograd_tracking) def __transfer_buffers(self, x: torch.Tensor): """ @@ -124,14 +143,13 @@ def __transfer_buffers(self, x: torch.Tensor): Args: x: The tensor whose device will also store the buffer tensors. """ - n = len(self.__buffers) - for i in range(n): - self.__buffers[i] = torch.as_tensor(self.__buffers[i], device=x.device) + for bname in self.__buffer_dict.keys(): + self.__buffer_dict[bname] = torch.as_tensor(self.__buffer_dict[bname], device=x.device) @property def buffers(self) -> tuple: """Get the stored buffers""" - return self.__buffers + return tuple(self.__buffer_dict) @property def parameter_length(self) -> int: @@ -161,27 +179,31 @@ def __call__(self, parameter_vector: torch.Tensor, x: torch.Tensor, h: Any = Non ) state_args = [] if h is None else [h] - params = [] - for i in range(self.__num_params): + params_and_buffers = {} + for i, pname in enumerate(self.__param_names): param_slice = self.__param_slices[i] param_shape = self.__param_shapes[i] param = parameter_vector[param_slice].reshape(param_shape) - params.append(param) + params_and_buffers[pname] = param - # Make sure that the tensors are in the same device with x + # Make sure that the buffer tensors are in the same device with x self.__transfer_buffers(x) - # Run the functional module and return the results - return self.__fmodel(params, self.__buffers, x, *state_args) + # Add the buffer tensors to the dictionary `params_and_buffers` + params_and_buffers.update(self.__buffer_dict) + # Prepare the no-gradient context if gradient tracking is disabled + context = torch.no_grad() if self.__disable_autograd_tracking else nullcontext() -def make_functional_module(net: nn.Module) -> ModuleExpectingFlatParameters: + # Run the module and return the results + with context: + return functional_call(self.__net, params_and_buffers, tuple([x, *state_args])) + + +def make_functional_module(net: nn.Module, *, disable_autograd_tracking: bool = False) -> ModuleExpectingFlatParameters: """ Wrap a torch module so that it has a functional interface. - For obtaining a functional interface, this function internally uses the - `functorch` library. - Similar to `functorch.make_functional(...)`, this function turns a `torch.nn.Module` instance to a function which expects a new leftmost argument representing the parameters of the network. @@ -225,8 +247,13 @@ def make_functional_module(net: nn.Module) -> ModuleExpectingFlatParameters: Args: net: The `torch.nn.Module` instance to be wrapped by a functional interface. + disable_autograd_tracking: If given as True, all operations + regarding the wrapped module will be performed in the context + `torch.no_grad()`, forcefully disabling the autograd. + If given as False, autograd will not be affected. + The default is False. Returns: The functional wrapper, as an instance of `evotorch.neuroevolution.net.ModuleExpectingFlatParameters`. """ - return ModuleExpectingFlatParameters(net) + return ModuleExpectingFlatParameters(net, disable_autograd_tracking=disable_autograd_tracking) diff --git a/src/evotorch/neuroevolution/net/rl.py b/src/evotorch/neuroevolution/net/rl.py index 5d375540..09def39a 100644 --- a/src/evotorch/neuroevolution/net/rl.py +++ b/src/evotorch/neuroevolution/net/rl.py @@ -17,9 +17,9 @@ from copy import deepcopy from typing import Any, Iterable, Optional, Union -import gym +import gymnasium as gym import torch -from gym.spaces import Box +from gymnasium.spaces import Box from torch import nn from .misc import device_of_module @@ -62,27 +62,15 @@ def _accumulate_all_across_dicts(dicts: Iterable[dict], keys_and_methods: dict): def reset_env(env: gym.Env) -> Iterable: """ - Reset a gym environment. - - For gym 1.0, the plan is to have a `reset(...)` method which returns - a two-element tuple `(observation, info)` where `info` is an object - providing any additional information regarding the initial state of - the agent. However, the old (pre 1.0) gym API (and some environments - which were written with old gym compatibility in mind) has (or have) - a `reset(...)` method which returns a single object that is the - initial observation. - With the assumption that the observation space of the environment - is NOT tuple, this function can work with both pre-1.0 and (hopefully) - after-1.0 versions of gym, and always returns the initial observation. - - Please do not use this function on environments whose observation - spaces or tuples, because then this function cannot distinguish between - environments whose `reset(...)` methods return a tuple and environments - whose `reset(...)` methods return a single observation object but that - observation object is a tuple. + Reset a gymnasium environment. + + Even though the `gymnasium` library switched to a new API where the + `reset()` method returns a tuple `(observation, info)`, this function + follows the conventions of the classical `gym` library and returns + only the observation of the newly reset environment. Args: - env: The gym environment which will be reset. + env: The gymnasium environment which will be reset. Returns: The initial observation """ @@ -94,25 +82,18 @@ def reset_env(env: gym.Env) -> Iterable: def take_step_in_env(env: gym.Env, action: Iterable) -> tuple: """ - Take a step in the gym environment. + Take a step in the gymnasium environment. Taking a step means performing the action provided via the arguments. - For gym 1.0, the plan is to have a `step(...)` method which returns a - 5-elements tuple containing `observation`, `reward`, `terminated`, - `truncated`, `info` where `terminated` is a boolean indicating whether - or not the episode is terminated because of the actions taken within the - environment, and `truncated` is a boolean indicating whether or not the - episode is finished because the time limit is reached. - However, the old (pre 1.0) gym API (and some environments which were - written with old gym compatibility in mind) has (or have) a `step(...)` - method which returns 4 elements: `observation`, `reward`, `done`, `info` - where `done` is a boolean indicating whether or not the episode is - "done", either because of termination or because of truncation. - This function can work with both pre-1.0 and (hopefully) after-1.0 - versions of gym, and always returns the 4-element tuple as its result. + Even though the `gymnasium` library switched to a new API where the + `step()` method returns a 5-element tuple of the form + `(observation, reward, terminated, truncated, info)`, this function + follows the conventions of the classical `gym` library and returns + a 4-element tuple `(observation, reward, done, info)`. Args: - env: The gym environment in which the given action will be performed. + env: The gymnasium environment in which the action will be performed. + action: The action to be performed. Returns: A tuple in the form `(observation, reward, done, info)` where `observation` is the observation received after performing the action, diff --git a/src/evotorch/neuroevolution/net/vecrl.py b/src/evotorch/neuroevolution/net/vecrl.py index 19d1882f..d87ae0f9 100644 --- a/src/evotorch/neuroevolution/net/vecrl.py +++ b/src/evotorch/neuroevolution/net/vecrl.py @@ -16,17 +16,22 @@ This namespace provides various vectorized reinforcement learning utilities. """ - +import random from collections.abc import Mapping from copy import deepcopy from typing import Any, Callable, Iterable, Optional, Union -import gym +import gymnasium as gym import numpy as np import torch -from functorch import vmap -from gym.spaces import Box, Discrete, Space -from gym.vector import SyncVectorEnv + +try: + from torch.func import vmap +except ImportError: + from functorch import vmap + +from gymnasium.spaces import Box, Discrete, Space +from gymnasium.vector import SyncVectorEnv from torch import nn from torch.nn import utils as nnu @@ -58,7 +63,7 @@ def torch_to_jax(x: torch.Tensor) -> JaxArray: else: def _jax_is_missing(): - raise ImportError("The module `jax` is missing.") + raise ImportError("`jax` is missing, or the attempt to import it failed.") class JaxArray: def __init__(self, *args, **kwargs): @@ -84,22 +89,53 @@ def torch_to_jax(x: torch.Tensor) -> JaxArray: if brax is not None: from brax.envs import Env as BraxEnv - def is_brax_env(env: Any) -> bool: + def _is_new_brax_env(env: Any) -> bool: return isinstance(env, BraxEnv) else: def _brax_is_missing(): - raise ImportError("The module `brax` is missing.") + raise ImportError("`brax` is missing, or the attempt to import it failed.") class BraxEnv: def __init__(self, *args, **kwargs): _brax_is_missing() - def is_brax_env(env: Any) -> bool: + def _is_new_brax_env(env: Any) -> bool: + return False + + +try: + import brax.v1 as old_brax + import brax.v1.envs as old_brax_envs +except ImportError: + old_brax = None + old_brax_envs = None + + +if old_brax is not None: + from brax.v1.envs import Env as OldBraxEnv + + def _is_old_brax_env(env: Any) -> bool: + return isinstance(env, OldBraxEnv) + +else: + + def _old_brax_is_missing(): + raise ImportError("`brax.v1` is missing, or the attempt to import it failed.") + + class OldBraxEnv: + def __init__(self, *args, **kwargs): + _old_brax_is_missing() + + def _is_old_brax_env(env: Any) -> bool: return False +def is_brax_env(env: Any) -> bool: + return _is_new_brax_env(env) or _is_old_brax_env(env) + + def array_type(x: Any, fallback: Optional[str] = None) -> str: """ Get the type of an array as a string ("jax", "torch", or "numpy"). @@ -457,6 +493,13 @@ def make_brax_env( Args: env_name: Name of the brax environment, as string (e.g. "humanoid"). + If the string starts with "old::" (e.g. "old::humanoid", etc.), + then the environment will be made using the namespace `brax.v1` + (which was introduced in brax version 0.9.0 where the updated + implementations of the environments became default and the classical + ones moved into `brax.v1`). + You can use the prefix "old::" for reproducing previous results + that were obtained or reported using an older version of brax. force_classic_api: Whether or not the classic gym API is to be used. num_envs: Batch size for the vectorized environment. discrete_to_continuous_act: Whether or not the the discrete action @@ -475,8 +518,8 @@ def make_brax_env( config = {} config.update(kwargs) if num_envs is not None: - config["batch_size"] = num_envs - env = brax.envs.create_gym_env(env_name, **config) + config["num_envs"] = num_envs + env = VectorEnvFromBrax(env_name, **config) env = TorchWrapper( env, force_classic_api=force_classic_api, @@ -498,10 +541,10 @@ def make_gym_env( **kwargs, ) -> TorchWrapper: """ - Make gym environments and wrap them via a SyncVectorEnv and a TorchWrapper. + Make gymnasium environments and wrap them via SyncVectorEnv and TorchWrapper. Args: - env_name: Name of the gym environment, as string (e.g. "Humanoid-v4"). + env_name: Name of the gymnasium environment, as string (e.g. "Humanoid-v4"). force_classic_api: Whether or not the classic gym API is to be used. num_envs: Batch size for the vectorized environment. discrete_to_continuous_act: Whether or not the the discrete action @@ -513,7 +556,7 @@ def make_gym_env( kwargs: Expected in the form of additional keyword arguments, these are passed to the environment. Returns: - The gym environments, wrapped by a TorchWrapper. + The gymnasium environments, wrapped by a TorchWrapper. """ def make_the_env(): @@ -543,16 +586,26 @@ def make_vector_env( Make a new vectorized environment and wrap it via TorchWrapper. Args: - env_name: Name of the gym environment, as string. + env_name: Name of the environment, as string. If the string starts with "gym::" (e.g. "gym::Humanoid-v4", etc.), - then it is assumed that the target environment is a classical gym - environment which will first be wrapped via a SyncVectorEnv and - then via a TorchWrapper. + then it is assumed that the target environment is a traditional + non-vectorized gymnasium environment. This non-vectorized + will first be duplicated and wrapped via a `SyncVectorEnv` so that + it gains a vectorized interface, and then, it will be wrapped via + `TorchWrapper`. If the string starts with "brax::" (e.g. "brax::humanoid", etc.), then it is assumed that the target environment is a brax environment which will be wrapped via TorchWrapper. + If the string starts with "brax::old::" (e.g. + "brax::old::humanoid", etc.), then the environment will be made + using the namespace `brax.v1` (which was introduced in brax + version 0.9.0 where the updated implementations of the environments + became default and the classical ones moved into `brax.v1`). + You can use the prefix "brax::old::" for reproducing previous + results that were obtained or reported using an older version of + brax. If the string does not contain "::" at all (e.g. "Humanoid-v4"), - then it is assumed that the target environment is a classical gym + then it is assumed that the target environment is a gymnasium environment. Therefore, "gym::Humanoid-v4" and "Humanoid-v4" are equivalent. force_classic_api: Whether or not the classic gym API is to be used. @@ -566,7 +619,7 @@ def make_vector_env( kwargs: Expected in the form of additional keyword arguments, these are passed to the environment. Returns: - The gym environments, wrapped by a TorchWrapper. + The vectorized gymnasium environment, wrapped by TorchWrapper. """ env_parts = str(env_name).split("::", maxsplit=1) @@ -1101,3 +1154,132 @@ def to_torch_module(self, parameter_vector: torch.Tensor) -> nn.Module: net = deepcopy(self.__module).to(parameter_vector.device) nnu.vector_to_parameters(parameter_vector, net.parameters()) return net + + +if brax is not None: # noqa: C901 + + class VectorEnvFromBrax(gym.vector.VectorEnv): + def __init__(self, env_name: str, **kwargs): + env_name = str(env_name) + + if env_name.startswith("old::"): + env_name = env_name[5:] + create = old_brax_envs.create + else: + create = brax.envs.create + + filtered_kwargs = {} + + auto_reset = None + num_envs = None + for k, v in kwargs.items(): + if k in ("batch_size", "num_envs"): + if num_envs is None: + num_envs = int(v) + else: + raise ValueError( + "Among the keyword arguments," + " encountered both 'batch_size' and 'num_envs', which are redundant." + ) + elif k in ("autoreset", "auto_reset"): + if auto_reset is None: + auto_reset = bool(v) + else: + raise ValueError( + "Among the keyword arguments," + " encountered both 'autoreset' and 'auto_reset', which are redundant." + ) + + if auto_reset is None: + auto_reset = True + + if num_envs is None: + raise ValueError( + "Please specify the number of environments via the keyword argument `num_envs` or `batch_size`" + ) + + if not auto_reset: + raise ValueError( + "EvoTorch expects vectorized environments to have the auto-reset behavior." + " It seems that this brax environment is configured to not have the auto-reset behavior," + " which is not supported." + ) + + self.__brax_env = create(env_name, auto_reset=auto_reset, batch_size=num_envs, **filtered_kwargs) + self.__jit_reset = jax.jit(self.__brax_env.reset) + self.__jit_step = jax.jit(self.__brax_env.step) + self.__jit_convert_to_bool = jax.jit(self.__convert_to_bool) + self.__jit_make_terminated_and_truncated = jax.jit(self.__make_terminated_and_truncated) + self.__jit_make_terminated_and_truncated2 = jax.jit(self.__make_terminated_and_truncated2) + self.__given_seed: Optional[int] = None + + inf = float("inf") + observation_space = Box(low=-inf, high=inf, shape=(self.__brax_env.observation_size,), dtype=np.float32) + + if hasattr(self.__brax_env.sys, "actuator"): + + def as_float32_array(arr: Iterable) -> np.ndarray: + return np.array(arr, dtype=np.float32) + + ctrl_range = jax.tree_map(as_float32_array, self.__brax_env.sys.actuator.ctrl_range) + ctrl_lb = ctrl_range[:, 0] + ctrl_ub = ctrl_range[:, 1] + action_space = Box(low=ctrl_lb, high=ctrl_ub, dtype=np.float32) + else: + action_space = Box(low=-1.0, high=1.0, shape=(self.__brax_env.action_size,), dtype=np.float32) + + self.__last_state: Optional[Iterable] = None + super().__init__(num_envs=num_envs, observation_space=observation_space, action_space=action_space) + + def seed(self, seed: Optional[int] = None): + self.__given_seed = None if seed is None else int(seed) + + def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None) -> tuple: + if seed is None: + if self.__given_seed is None: + seed = random.randint(0, (2**32) - 1) + else: + seed = self.__given_seed + else: + seed = int(seed) + + kwargs = {} if options is None else options + + self.__given_seed = None + key = jax.random.PRNGKey(seed) + more_kwargs = {"rng": key} + + state = self.__jit_reset(**kwargs, **more_kwargs) + observation = state.obs + + self.__last_state = state + + return observation, {**(state.metrics), **(state.info)} + + @staticmethod + def __convert_to_bool(x: jnp.ndarray) -> jnp.ndarray: + return jnp.abs(x) > 1e-4 + + def __make_terminated_and_truncated(self, done: jnp.ndarray) -> tuple: + terminated = self.__jit_convert_to_bool(done) + truncated = jnp.zeros_like(terminated) + return terminated, truncated + + def __make_terminated_and_truncated2(self, done: jnp.ndarray, truncation: jnp.ndarray) -> tuple: + done = self.__jit_convert_to_bool(done) + truncated = jnp.zeros_like(done) + terminated = done & (~truncated) + return terminated, truncated + + def step(self, action: Any) -> tuple: + state = self.__jit_step(self.__last_state, action) + self.__last_state = state + observation = state.obs + reward = state.reward + done = state.done + if "truncation" in state.info: + terminated, truncated = self.__jit_make_terminated_and_truncated2(done, state.info["truncation"]) + else: + terminated, truncated = self.__jit_make_terminated_and_truncated(done) + info = {**(state.metrics), **(state.info)} + return observation, reward, terminated, truncated, info diff --git a/src/evotorch/neuroevolution/vecgymne.py b/src/evotorch/neuroevolution/vecgymne.py index bde8deb5..5ee569be 100644 --- a/src/evotorch/neuroevolution/vecgymne.py +++ b/src/evotorch/neuroevolution/vecgymne.py @@ -19,10 +19,10 @@ from pathlib import Path from typing import Any, Callable, Iterable, Optional, Union -import gym +import gymnasium as gym import numpy as np import torch -from gym.spaces import Box +from gymnasium.spaces import Box from torch import nn from ..core import Solution, SolutionBatch diff --git a/src/evotorch/tools/__init__.py b/src/evotorch/tools/__init__.py index 8b091158..165cfb6b 100644 --- a/src/evotorch/tools/__init__.py +++ b/src/evotorch/tools/__init__.py @@ -78,6 +78,7 @@ "set_default_logger_config", "split_workload", "stdev_from_radius", + "storage_ptr", "to_numpy_dtype", "to_stdev_init", "to_torch_dtype", @@ -149,6 +150,7 @@ set_default_logger_config, split_workload, stdev_from_radius, + storage_ptr, to_numpy_dtype, to_stdev_init, to_torch_dtype, diff --git a/src/evotorch/tools/misc.py b/src/evotorch/tools/misc.py index 95715144..748b5e68 100644 --- a/src/evotorch/tools/misc.py +++ b/src/evotorch/tools/misc.py @@ -2066,3 +2066,28 @@ def say_hello(self): sender_id = id(sender) return f"Instance of `{sender_type}` (id:{sender_id}) -- {message}" + + +if hasattr(torch.Tensor, "untyped_storage"): + + def _storage_ptr(x: Iterable) -> int: + return x.untyped_storage().data_ptr() + +else: + + def _storage_ptr(x: Iterable) -> int: + return x.storage().data_ptr() + + +def storage_ptr(x: Iterable) -> int: + """ + Get the pointer to the underlying storage of a tensor of an ObjectArray. + + Calling `storage_ptr(x)` is equivalent to `x.untyped_storage().data_ptr()`. + + Args: + x: A regular PyTorch tensor, or a ReadOnlyTensor, or an ObjectArray. + Returns: + The address of the underlying storage. + """ + return _storage_ptr(x) diff --git a/src/evotorch/tools/objectarray.py b/src/evotorch/tools/objectarray.py index 8505bd25..b9589510 100644 --- a/src/evotorch/tools/objectarray.py +++ b/src/evotorch/tools/objectarray.py @@ -24,7 +24,7 @@ import numpy as np import torch -from .misc import Device, DType, Size, clone, is_integer, is_sequence +from .misc import Device, DType, Size, clone, is_integer, is_sequence, storage_ptr from .recursiveprintable import RecursivePrintable @@ -126,9 +126,9 @@ class ObjectArray(Sequence, RecursivePrintable): An interesting feature of PyTorch: if one slices a tensor A and the result is a new tensor B, and if B is sharing storage memory with A, - then A.storage().data_ptr() and B.storage().data_ptr() will return - the same pointer. This means, one can compare the storage pointers of - A and B and see whether or not the two are sharing memory. + then A.untyped_storage().data_ptr() and B.untyped_storage().data_ptr() + will return the same pointer. This means, one can compare the storage + pointers of A and B and see whether or not the two are sharing memory. ObjectArray was designed to have this exact behavior, so that one can understand if two ObjectArray instances are sharing memory. Note that NumPy does NOT have such a behavior. In more details, @@ -194,7 +194,7 @@ def __init__( self._indices = source._indices[slicing] self._objects = source._objects - if self._indices.storage().data_ptr() != source._indices.storage().data_ptr(): + if storage_ptr(self._indices) != storage_ptr(source._indices): self._objects = clone(self._objects) self._device = torch.device("cpu") @@ -322,7 +322,7 @@ def __getitem__(self, i: Any) -> Any: else: indices = self._indices[i] - same_ptr = indices.storage().data_ptr() == self._indices.storage().data_ptr() + same_ptr = storage_ptr(indices) == storage_ptr(self._indices) result = ObjectArray(len(indices)) @@ -479,6 +479,9 @@ def is_read_only(self) -> bool: def storage(self) -> ObjectArrayStorage: return ObjectArrayStorage(self) + def untyped_storage(self) -> ObjectArrayStorage: + return ObjectArrayStorage(self) + def numpy(self, *, memo: Optional[dict] = None) -> np.ndarray: """ Convert this ObjectArray to a numpy array. diff --git a/src/evotorch/tools/readonlytensor.py b/src/evotorch/tools/readonlytensor.py index b044c934..253c47e8 100644 --- a/src/evotorch/tools/readonlytensor.py +++ b/src/evotorch/tools/readonlytensor.py @@ -96,8 +96,10 @@ def clone(self, *, preserve_read_only: bool = False) -> torch.Tensor: return result def __mutable_if_independent(self, other: torch.Tensor) -> torch.Tensor: - self_ptr = self.storage().data_ptr() - other_ptr = other.storage().data_ptr() + from .misc import storage_ptr + + self_ptr = storage_ptr(self) + other_ptr = storage_ptr(other) if self_ptr != other_ptr: other = other.as_subclass(torch.Tensor) return other diff --git a/tests/test_core.py b/tests/test_core.py index 4f0cdd1d..22985c30 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -335,7 +335,7 @@ def test_batch_cloning(clone_func): def _storage_address(solution_or_batch: Union[et.SolutionBatch, et.Solution]) -> int: - return solution_or_batch.access_values(keep_evals=True).storage().data_ptr() + return ett.storage_ptr(solution_or_batch.access_values(keep_evals=True)) def _share_memory(a: Union[et.SolutionBatch, et.Solution], b: Union[et.SolutionBatch, et.Solution]) -> bool: diff --git a/tests/test_objectarray.py b/tests/test_objectarray.py index e9e81b3a..a6a598ab 100644 --- a/tests/test_objectarray.py +++ b/tests/test_objectarray.py @@ -19,7 +19,7 @@ from evotorch.tools import clone from evotorch.tools import immutable as imm -from evotorch.tools import make_tensor +from evotorch.tools import make_tensor, storage_ptr from evotorch.tools.objectarray import ObjectArray @@ -47,7 +47,7 @@ def test_sharing_memory(): y = x[3:5] # The two tensors should be sharing memory - assert x.storage().data_ptr() == y.storage().data_ptr() + assert storage_ptr(x) == storage_ptr(y) # Change the elements of y y[:] = [0, 0] @@ -166,7 +166,7 @@ def test_read_only_object_tensor(): ro = objs.get_read_only_view() # The two ObjectArray must share memory - assert objs.storage().data_ptr() == ro.storage().data_ptr() + assert storage_ptr(objs) == storage_ptr(ro) # The two ObjectArray instances must be equal, elementwise assert len(objs) == len(ro) diff --git a/tests/test_read_only_tensor.py b/tests/test_read_only_tensor.py index 3aa7a989..69d18b0f 100644 --- a/tests/test_read_only_tensor.py +++ b/tests/test_read_only_tensor.py @@ -24,7 +24,7 @@ def test_read_only_tensor(): y = ett.as_read_only_tensor(x) # Ensure that x and y are sharing memory (even though x is a regular tensor and y is a read-only tensor) - assert x.storage().data_ptr() == y.storage().data_ptr() + assert ett.storage_ptr(x) == ett.storage_ptr(y) clamped = torch.clamp(x, -1.0, 1.0) clamped2 = torch.clamp(y, -1.0, 1.0)