diff --git a/.vale/styles/config/vocabularies/RLlib/accept.txt b/.vale/styles/config/vocabularies/RLlib/accept.txt index 9ca4e388607d1..54d9a0546b3bb 100644 --- a/.vale/styles/config/vocabularies/RLlib/accept.txt +++ b/.vale/styles/config/vocabularies/RLlib/accept.txt @@ -9,9 +9,12 @@ config (IMPALA|impala) hyperparameters? MARLModule +MLAgents +multiagent postprocessing (PPO|ppo) [Pp]y[Tt]orch +pragmas? (RL|rl)lib RLModule rollout diff --git a/doc/source/rllib/images/sigils/new-api-stack.svg b/doc/source/rllib/images/sigils/new-api-stack.svg new file mode 100644 index 0000000000000..bf4c59e0058d4 --- /dev/null +++ b/doc/source/rllib/images/sigils/new-api-stack.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/doc/source/rllib/images/sigils/old-api-stack.svg b/doc/source/rllib/images/sigils/old-api-stack.svg new file mode 100644 index 0000000000000..fb819d4a1d788 --- /dev/null +++ b/doc/source/rllib/images/sigils/old-api-stack.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/doc/source/rllib/index.rst b/doc/source/rllib/index.rst index 7ef5b21a41ca9..15ad1c0ce4d3a 100644 --- a/doc/source/rllib/index.rst +++ b/doc/source/rllib/index.rst @@ -167,7 +167,7 @@ Feature Overview **RLlib Algorithms** ^^^ - Check out the many available RL algorithms of RLlib for model-free and model-based + See the many available RL algorithms of RLlib for model-free and model-based RL, on-policy and off-policy training, multi-agent RL, and more. +++ .. button-ref:: rllib-algorithms-doc diff --git a/doc/source/rllib/key-concepts.rst b/doc/source/rllib/key-concepts.rst index c291cb2c76f37..470e66ff71a8c 100644 --- a/doc/source/rllib/key-concepts.rst +++ b/doc/source/rllib/key-concepts.rst @@ -114,7 +114,7 @@ The following figure shows *synchronous sampling*, the simplest of `these patter RLlib uses `Ray actors `__ to scale training from a single core to many thousands of cores in a cluster. You can `configure the parallelism `__ used for training by changing the ``num_env_runners`` parameter. -Check out our `scaling guide `__ for more details here. +See this `scaling guide `__ for more details here. RL Modules diff --git a/doc/source/rllib/package_ref/evaluation.rst b/doc/source/rllib/package_ref/evaluation.rst index 1b87553691710..5d3db4f9d1cf7 100644 --- a/doc/source/rllib/package_ref/evaluation.rst +++ b/doc/source/rllib/package_ref/evaluation.rst @@ -23,7 +23,7 @@ which sit inside a :py:class:`~ray.rllib.env.env_runner_group.EnvRunnerGroup` **A typical RLlib EnvRunnerGroup setup inside an RLlib Algorithm:** Each :py:class:`~ray.rllib.env.env_runner_group.EnvRunnerGroup` contains exactly one local :py:class:`~ray.rllib.env.env_runner.EnvRunner` object and N ray remote - :py:class:`~ray.rllib.env.env_runner.EnvRunner` (ray actors). + :py:class:`~ray.rllib.env.env_runner.EnvRunner` (Ray actors). The workers contain a policy map (with one or more policies), and - in case a simulator (env) is available - a vectorized :py:class:`~ray.rllib.env.base_env.BaseEnv` (containing M sub-environments) and a :py:class:`~ray.rllib.evaluation.sampler.SamplerInput` (either synchronous or asynchronous) which controls diff --git a/doc/source/rllib/rllib-advanced-api.rst b/doc/source/rllib/rllib-advanced-api.rst index 64bed1e8f6094..5b35253f1539f 100644 --- a/doc/source/rllib/rllib-advanced-api.rst +++ b/doc/source/rllib/rllib-advanced-api.rst @@ -19,87 +19,31 @@ implement `custom training workflows (example) `__ environment API. -Your environment only needs to implement the `set_task` and `get_task` methods -for this to work. You can then define an `env_task_fn` in your config, -which receives the last training results and returns a new task for the env to be set to: - -.. TODO move to doc_code and make it use algo configs. -.. code-block:: python - - from ray.rllib.env.apis.task_settable_env import TaskSettableEnv - - class MyEnv(TaskSettableEnv): - def get_task(self): - return self.current_difficulty - - def set_task(self, task): - self.current_difficulty = task - - def curriculum_fn(train_results, task_settable_env, env_ctx): - # Very simple curriculum function. - current_task = task_settable_env.get_task() - new_task = current_task + 1 - return new_task - - # Setup your Algorithm's config like so: - config = { - "env": MyEnv, - "env_task_fn": curriculum_fn, - } - # Train using `Tuner.fit()` or `Algorithm.train()` and the above config stub. - # ... - -There are two more ways to use the RLlib's other APIs to implement -`curriculum learning `__. - -Use the Algorithm API and update the environment between calls to ``train()``. -This example shows the algorithm being run inside a Tune function. -This is basically the same as what the built-in `env_task_fn` API described above -already does under the hood, but allows you to do even more customizations to your -training loop. - -.. TODO move to doc_code and make it use algo configs. -.. code-block:: python - - import ray - from ray import train, tune - from ray.rllib.algorithms.ppo import PPO - - def train_fn(config): - algo = PPO(config=config, env=YourEnv) - while True: - result = algo.train() - train.report(result) - if result["env_runners"]["episode_return_mean"] > 200: - task = 2 - elif result["env_runners"]["episode_return_mean"] > 100: - task = 1 - else: - task = 0 - algo.workers.foreach_worker( - lambda ev: ev.foreach_env( - lambda env: env.set_task(task))) - - num_gpus = 0 - num_env_runners = 2 +In curriculum learning, you can set the environment to different difficulties +throughout the training process. This setting allows the algorithm to learn how to solve +the actual and final problem incrementally, by interacting with and exploring in more and +more difficult phases. +Normally, such a curriculum starts with setting the environment to an easy level and +then - as training progresses - transitions more toward a harder-to-solve difficulty. +See the `Reverse Curriculum Generation for Reinforcement Learning Agents `_ blog post +for another example of how you can do curriculum learning. + +RLlib's Algorithm and custom callbacks APIs allow for implementing any arbitrary +curricula. This `example script `__ introduces +the basic concepts you need to understand. + +First, define some env options. This example uses the `FrozenLake-v1` environment, +a grid world, whose map is fully customizable. Three tasks of different env difficulties +are represented by slightly different maps that the agent has to navigate. + +.. literalinclude:: ../../../rllib/examples/curriculum/curriculum_learning.py + :language: python + :start-after: __curriculum_learning_example_env_options__ + :end-before: __END_curriculum_learning_example_env_options__ - ray.init() - tune.Tuner( - tune.with_resources(train_fn, resources=tune.PlacementGroupFactory( - [{"CPU": 1}, {"GPU": num_gpus}] + [{"CPU": 1}] * num_env_runners - ),) - param_space={ - "num_gpus": num_gpus, - "num_env_runners": num_env_runners, - }, - ).fit() +Then, define the central piece controlling the curriculum, which is a custom callbacks class +overriding the :py:meth:`~ray.rllib.algorithms.callbacks.Callbacks.on_train_result`. -You could also use RLlib's callbacks API to update the environment on new training -results: .. TODO move to doc_code and make it use algo configs. .. code-block:: python diff --git a/doc/source/rllib/rllib-algorithms.rst b/doc/source/rllib/rllib-algorithms.rst index 0c0482336e592..0c867d2481e35 100644 --- a/doc/source/rllib/rllib-algorithms.rst +++ b/doc/source/rllib/rllib-algorithms.rst @@ -9,7 +9,7 @@ Algorithms .. tip:: - Check out the `environments `__ page to learn more about different environment types. + See the `environments `__ page to learn more about different environment types. Available Algorithms - Overview ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/rllib/rllib-env.rst b/doc/source/rllib/rllib-env.rst index a76af8ac64cb5..b992bbb7e8b7c 100644 --- a/doc/source/rllib/rllib-env.rst +++ b/doc/source/rllib/rllib-env.rst @@ -11,7 +11,7 @@ RLlib works with several different types of environments, including `Farama-Foun .. tip:: - Not all environments work with all algorithms. Check out the `algorithm overview `__ for more information. + Not all environments work with all algorithms. See the `algorithm overview `__ for more information. .. image:: images/rllib-envs.svg diff --git a/doc/source/rllib/rllib-examples.rst b/doc/source/rllib/rllib-examples.rst index 9f3af7d3163cd..e3e3304df93a9 100644 --- a/doc/source/rllib/rllib-examples.rst +++ b/doc/source/rllib/rllib-examples.rst @@ -2,186 +2,342 @@ .. include:: /_includes/rllib/new_api_stack.rst +.. |new_stack| image:: /rllib/images/sigils/new-api-stack.svg + :class: inline-figure + :width: 64 + +.. |old_stack| image:: /rllib/images/sigils/old-api-stack.svg + :class: inline-figure + :width: 64 + + Examples ======== -This page is an index of examples for the various use cases and features of RLlib. +This page contains an index of all the python scripts in the +`examples folder `__ +of RLlib, demonstrating the different use cases and features of the library. -If any example is broken, or if you'd like to add an example to this page, -feel free to raise an issue on our Github repository. +.. note:: -Tuned Examples --------------- + RLlib is currently in a transition state from "old API stack" to "new API stack". + Some of the examples here haven't been translated yet to the new stack and are tagged + with the following comment line on top: ``# @OldAPIStack``. The moving of all example + scripts over to the "new API stack" is work in progress and expected to be completed + by the end of 2024. -- `Tuned examples `__: - Collection of tuned hyperparameters sorted by algorithm. +.. note:: + If any new-API-stack example is broken, or if you'd like to add an example to this page, + feel free to raise an issue on `RLlib's github repository `__. -Environments and Adapters -------------------------- -- `Registering a custom env and model `__: - Example of defining and registering a gym env and model for use with RLlib. -- `Local Unity3D multi-agent environment example `__: - Example of how to setup an RLlib Algorithm against a locally running Unity3D editor instance to - learn any Unity3D game (including support for multi-agent). - Use this example to try things out and watch the game and the learning progress live in the editor. - Providing a compiled game, this example could also run in distributed fashion with `num_env_runners > 0`. - For a more heavy-weight, distributed, cloud-based example, see ``Unity3D client/server`` below. +Folder Structure +++++++++++++++++ +The `examples folder `__ is +structured into several sub-directories, the contents of all of which are described in detail below. -Custom- and Complex Models --------------------------- -- `Custom Keras model `__: - Example of using a custom Keras model. -- `Registering a custom model with supervised loss `__: - Example of defining and registering a custom model with a supervised loss. -- `Batch normalization `__: - Example of adding batch norm layers to a custom model. -- `Custom model API example `__: - Shows how to define a custom Model API in RLlib, such that it can be used inside certain algorithms. -- `Trajectory View API utilizing model `__: - An example on how a model can use the trajectory view API to specify its own input. -- `MobileNetV2 wrapping example model `__: - Implementations of `tf.keras.applications.mobilenet_v2.MobileNetV2` and `torch.hub (mobilenet_v2)`-wrapping example models. -- `Differentiable Neural Computer `__: - Example of DeepMind's Differentiable Neural Computer for partially-observable environments. +How to run an example script +++++++++++++++++++++++++++++ +Most of the example scripts are self-executable, meaning you can just ``cd`` into the respective +directory and run the script as-is with python: -Training Workflows ------------------- +.. code-block:: bash -- `Custom training workflows `__: - Example of how to use Tune's support for custom training functions to implement custom training workflows. -- `Curriculum learning with the TaskSettableEnv API `__: - Example of how to advance the environment through different phases (tasks) over time. - Also see the `curriculum learning how-to `__ from the documentation here. -- `Custom logger `__: - How to setup a custom Logger object in RLlib. -- `Custom metrics `__: - Example of how to output custom training metrics to TensorBoard. -- `Custom Algorith.training_step() method combining on- and off-policy learning `__: - Example of how to use the exec. plan of an Algorithm to trin two different policies in parallel (also using multi-agent API). -- `Custom tune experiment `__: - How to run a custom Ray Tune experiment with RLlib with custom training- and evaluation phases. + $ cd ray/rllib/examples/multi_agent + $ python multi_agent_pendulum.py --enable-new-api-stack --num-agents=2 + + +Use the `--help` command line argument to have each script print out its supported command line options. + +Most of the scripts share a common subset of generally applicable command line arguments, +for example `--num-env-runners`, `--no-tune`, or `--wandb-key`. + + +All sub-folders ++++++++++++++++ + + +Algorithms +---------- +- |new_stack| `How to write a custom Algorith.training_step() method combining on- and off-policy training `__: + Example of how to override the :py:meth:`~ray.rllib.algorithms.algorithm.training_step` method of the + :py:class:`~ray.rllib.algorithms.algorithm.Algorithm` class to train two different policies in parallel + (also using multi-agent API). -Evaluation: +Checkpoints ----------- -- `Custom evaluation function `__: - Example of how to write a custom evaluation function that is called instead of the default behavior, which is running with the evaluation worker set through n episodes. -- `Parallel evaluation and training `__: - Example showing how the evaluation workers and the "normal" rollout workers can run (to some extend) in parallel to speed up training. +- |new_stack| `How to extract a checkpoint from n Tune trials using one or more custom criteria. `__: + Example of how to find a :ref:`checkpoint ` after a `Tuner.fit()` with some custom defined criteria. + +Connectors +---------- + +.. note:: + RLlib's Connector API has been re-written from scratch for the new API stack (|new_stack|). + Connector-pieces and -pipelines are now referred to as :py:class:`~ray.rllib.connectors.connector_v2.ConnectorV2` + (as opposed to ``Connector``, which only continue to work on the old API stack |old_stack|). + + +- |new_stack| `How to frame-stack Atari image observations `__: + An example using Atari framestacking in a very efficient manner, not in the environment itself (as a `gym.Wrapper`), + but by stacking the observations on-the-fly using `EnvToModule` and `LearnerConnector` pipelines. + This method of framestacking is more efficient as it avoids having to send large observation + tensors through the network (ray). -Serving and Offline +- |new_stack| `How to mean/std-filter observations `__: + An example of a :py:class:`~ray.rllib.connectors.connector_v2.ConnectorV2` that filters all observations from the environment using a + plain mean/std-filter (shift by the mean and divide by std-dev). This example demonstrates + how a stateful :py:class:`~ray.rllib.connectors.connector_v2.ConnectorV2` class has its states + (here the means and standard deviations of the individual observation items) coming from the different + :py:class:`~ray.rllib.env.env_runner.EnvRunner` instances a) merged into one common state and + then b) broadcast again back to the remote :py:class:`~ray.rllib.env.env_runner.EnvRunner` workers. + +- |new_stack| `How to include previous-actions and/or previous rewards in RLModule inputs `__: + An example of a :py:class:`~ray.rllib.connectors.connector_v2.ConnectorV2` that adds the n previous actions + and/or the m previous rewards to the RLModule's input dict (to perform its forward passes, both + for inference and training). + +- |new_stack| `How to train with nested action spaces `__: + Learning in arbitrarily nested action spaces, using an env in which the action space equals the + observation space (both are complex, nested Dicts) and the policy has to pick actions + that closely match (or are identical) to the previously seen observations. + +- |new_stack| `How to train with nested observation spaces `__: + Learning in arbitrarily nested observation spaces + (using a CartPole-v1 variant with a nested Dict observation space). + +Curriculum Learning ------------------- -- `Offline RL with CQL `__: - Example showing how to run an offline RL training job using a historic-data json file. -- `Another example for using RLlib with Ray Serve `__ - This script offers a simple workflow for 1) training a policy with RLlib first, 2) creating a new policy 3) restoring its weights from the trained - one and serving the new policy via Ray Serve. -- `Unity3D client/server `__: + +- |new_stack| `How to set up curriculum learning with the custom callbacks API `__: + Example of how to make the environment go through different levels of difficulty (from easy to harder to solve) + and thus help the learning algorithm to cope with an otherwise unsolvable task. + Also see the :doc:`curriculum learning how-to ` from the documentation. + +Debugging +--------- +- |old_stack| `How to train an RLlib algorithm using a deterministic/reproducible setup `__: + Example showing how you can train an RLlib algorithm in a deterministic, reproducible fashion using seeding. + +Environments +------------ +- |new_stack| `How to register a custom gymnasium environment `__: + Example showing how to write your own RL environment using ``gymnasium`` and register it to run train your algorithm against this env with RLlib. + +- |new_stack| `How to set up rendering (and recording) of the environment trajectories during training with WandB `__: + Example showing how you can render and record episode trajectories of your gymnasium envs and log the videos to WandB. + +- |old_stack| `How to run a Unity3D multi-agent environment locally `__: + Example of how to setup an RLlib Algorithm against a locally running Unity3D editor instance to + learn any Unity3D game (including support for multi-agent). + Use this example to try things out and watch the game and the learning progress live in the editor. + Providing a compiled game, this example could also run in distributed fashion with `num_env_runners > 0`. + For a more heavy-weight, distributed, cloud-based example, see ``Unity3D client/server`` below. + +- |old_stack| `How to run with a Unity3D client/server setup `__: Example of how to setup n distributed Unity3D (compiled) games in the cloud that function as data collecting clients against a central RLlib Policy server learning how to play the game. The n distributed clients could themselves be servers for external/human players and allow for control being fully in the hands of the Unity entities instead of RLlib. Note: Uses Unity's MLAgents SDK (>=1.0) and supports all provided MLAgents example games and multi-agent setups. -- `CartPole client/server `__: + +- |old_stack| `How to run with a CartPole client/server setup `__: Example of online serving of predictions for a simple CartPole policy. -- `Saving experiences `__: - Example of how to externally generate experience batches in RLlib-compatible format. -- `Finding a checkpoint using custom criteria `__: - Example of how to find a :ref:`checkpoint ` after a `Tuner.fit()` via some custom defined criteria. -Multi-Agent and Hierarchical ----------------------------- +Evaluation +---------- -- `Simple independent multi-agent setup vs a PettingZoo env `__: - Setup RLlib to run any algorithm in (independent) multi-agent mode against a multi-agent environment. -- `More complex (shared-parameter) multi-agent setup vs a PettingZoo env `__: - Setup RLlib to run any algorithm in (shared-parameter) multi-agent mode against a multi-agent environment. -- `Rock-paper-scissors heuristic vs learned `__ and `Rock-paper-scissors learned vs learned `__: +- |new_stack| `How to run evaluation with a custom evaluation function `__: + Example of how to write a custom evaluation function that's called instead of the default behavior, which is running with the evaluation worker set through n episodes. + +- |new_stack| `How to run evaluation in parallel to training `__: + Example showing how the evaluation workers and the "normal" rollout workers can run (to some extend) in parallel to speed up training. + +GPU (for Training and Sampling) +------------------------------- + +- |new_stack| `How to use fractional GPUs for training an RLModule `__: + If your model is small and easily fits on a single GPU and you want to therefore train + other models alongside it to save time and cost, this script shows you how to set up + your RLlib config with a fractional number of GPUs on the learner (model training) + side. + +Hierarchical Training +--------------------- + +- |old_stack| `How to setup hierarchical training `__: + Example of hierarchical training using the multi-agent API. + +Inference (of Models/Policies) +------------------------------ + +- |old_stack| `How to do inference with an already trained policy `__: + Example of how to perform inference (compute actions) on an already trained policy. +- |old_stack| `How to do inference with an already trained (LSTM) policy `__: + Example of how to perform inference (compute actions) on an already trained (LSTM) policy. +- |old_stack| `How to do inference with an already trained (attention) policy `__: + Example of how to perform inference (compute actions) on an already trained (attention) policy. + +Metrics +------- + +- |old_stack| `How to write your own custom metrics and callbacks in RLlib `__: + Example of how to output custom training metrics to TensorBoard. + + +Multi-Agent RL +-------------- + +- |new_stack| `How to set up independent multi-agent training `__: + Set up RLlib to run any algorithm in (independent) multi-agent mode against a multi-agent environment. +- |new_stack| `How to set up shared-parameter multi-agent training `__: + Set up RLlib to run any algorithm in (shared-parameter) multi-agent mode against a multi-agent environment. +- |new_stack| `How to compare a heuristic policy vs a trained one on rock-paper-scissors `__ and `Rock-paper-scissors learned vs learned `__: Two examples of different heuristic and learned policies competing against each other in the rock-paper-scissors environment. -- `Two-step game `__: +- |new_stack| `How to use agent grouping in a multi-agent environment (two-step game) `__: Example on how to use agent grouping in a multi-agent environment (the two-step game from the `QMIX paper `__). -- `PettingZoo multi-agent example `__: +- |new_stack| `How to set up multi-agent training vs a PettingZoo environment `__: Example on how to use RLlib to learn in `PettingZoo `__ multi-agent environments. -- `PPO with centralized critic on two-step game `__: - Example of customizing PPO to leverage a centralized value function. -- `Centralized critic in the env `__: - A simpler method of implementing a centralized critic by augmentating agent observations with global information. -- `Hand-coded policy `__: +- |new_stack| `How to hand-code a (heuristic) policy `__: Example of running a custom hand-coded policy alongside trainable policies. -- `Weight sharing between policies `__: +- |new_stack| `How to train a single policy (weight sharing) controlling more than one agents `__: Example of how to define weight-sharing layers between two different policies. -- `Multiple algorithms `__: + +- |old_stack| `Hwo to write and set up a model with centralized critic `__: + Example of customizing PPO to leverage a centralized value function. +- |old_stack| `How to write and set up a model with centralized critic in the env `__: + A simpler method of implementing a centralized critic by augmenting agent observations with global information. +- |old_stack| `How to combine multiple algorithms into onw using the multi-agent API `__: Example of alternating training between DQN and PPO. -- `Hierarchical training `__: - Example of hierarchical training using the multi-agent API. + +Offline RL +---------- + +- |old_stack| `How to run an offline RL experiment with CQL `__: + Example showing how to run an offline RL training job using a historic-data JSON file. + +- |old_stack| `How to save experiences from an environment for offline RL `__: + Example of how to externally generate experience batches in RLlib-compatible format. -Special Action- and Observation Spaces --------------------------------------- +Ray Serve and RLlib +------------------- -- `Nested action spaces `__: - Learning in arbitrarily nested action spaces. -- `Parametric actions `__: +- |new_stack| `How to use a trained RLlib algorithm with Ray Serve `__ + This script offers a simple workflow for 1) training a policy with RLlib first, 2) creating a new policy 3) restoring its weights from the trained + one and serving the new policy with Ray Serve. + +Ray Tune and RLlib +------------------ +- |new_stack| `How to define a custom progress reporter and use it with Ray Tune and RLlib `__: + Example of how to write your own progress reporter (for a multi-agent experiment) and use it with Ray Tune and RLlib. + +- |new_stack| `How to define and plug in your custom logger into Ray Tune and RLlib `__: + How to setup a custom Logger object in RLlib and use it with Ray Tune. + +- |new_stack| `How to Custom tune experiment `__: + How to run a custom Ray Tune experiment with RLlib with custom training- and evaluation phases. + +RLModules +--------- + +- |new_stack| `How to configure an autoregressive action distribution `__: + Learning with an auto-regressive action distribution (for example, two action components, where distribution of the second component depends on the first's actually sampled value). + +- |old_stack| `How to train with parametric actions `__: Example of how to handle variable-length or parametric action spaces. -- `Using the "Repeated" space of RLlib for variable lengths observations `__: +- |old_stack| `How to using the "Repeated" space of RLlib for variable lengths observations `__: How to use RLlib's `Repeated` space to handle variable length observations. -- `Autoregressive action distribution example `__: - Learning with auto-regressive action dependencies (e.g. 2 action components; distribution for 2nd component depends on the 1st component's actually sampled value). +- |old_stack| `How to write a custom Keras model `__: + Example of using a custom Keras model. +- |old_stack| `How to register a custom model with supervised loss `__: + Example of defining and registering a custom model with a supervised loss. +- |old_stack| `How to train with batch normalization `__: + Example of adding batch norm layers to a custom model. +- |old_stack| `How to write a custom model with its custom API `__: + Shows how to define a custom Model API in RLlib, such that it can be used inside certain algorithms. +- |old_stack| `How to write a "trajectory ciew API" utilizing model `__: + An example on how a model can use the trajectory view API to specify its own input. +- |old_stack| `How to wrap MobileNetV2 into your RLlib model `__: + Implementations of `tf.keras.applications.mobilenet_v2.MobileNetV2` and `torch.hub (mobilenet_v2)`-wrapping example models. +- |old_stack| `How to setup a Differentiable Neural Computer `__: + Example of DeepMind's Differentiable Neural Computer for partially observable environments. + + +Tuned Examples +++++++++++++++ + +The `tuned examples `__ folder +contains python config files (yaml for the old API stack) that can be executed analogously to +all other example scripts described here in order to run tuned learning experiments +for the different algorithms and different environment types. + +For example, see this tuned Atari example for PPO, which learns to solve the Pong environment +in roughly 5min. It can be run like this on a single g5.24xlarge (or g6.24xlarge) machine with +4 GPUs and 96 CPUs: + +.. code-block:: bash + + $ cd ray/rllib/tuned_examples/ppo + $ python atari_ppo.py --env ALE/Pong-v5 --num-gpus=4 --num-env-runners=95 + +Note that some of the files in this folder are used for RLlib's daily or weekly +release tests as well. Community Examples ------------------- -- `Arena AI `__: +++++++++++++++++++ +- |old_stack| `Arena AI `__: A General Evaluation Platform and Building Toolkit for Single/Multi-Agent Intelligence with RLlib-generated baselines. -- `CARLA `__: +- |old_stack| `CARLA `__: Example of training autonomous vehicles with RLlib and `CARLA `__ simulator. -- `The Emergence of Adversarial Communication in Multi-Agent Reinforcement Learning `__: +- |old_stack| `The Emergence of Adversarial Communication in Multi-Agent Reinforcement Learning `__: Using Graph Neural Networks and RLlib to train multiple cooperative and adversarial agents to solve the "cover the area"-problem, thereby learning how to best communicate (or - in the adversarial case - how to disturb communication) (`code `__). -- `Flatland `__: +- |old_stack| `Flatland `__: A dense traffic simulating environment with RLlib-generated baselines. -- `GFootball `__: +- |old_stack| `GFootball `__: Example of setting up a multi-agent version of `GFootball `__ with RLlib. -- `mobile-env `__: +- |old_stack| `mobile-env `__: An open, minimalist Gymnasium environment for autonomous coordination in wireless mobile networks. Includes an example notebook using Ray RLlib for multi-agent RL with mobile-env. -- `Neural MMO `__: +- |old_stack| `Neural MMO `__: A multiagent AI research environment inspired by Massively Multiplayer Online (MMO) role playing games – self-contained worlds featuring thousands of agents per persistent macrocosm, diverse skilling systems, local and global economies, complex emergent social structures, and ad-hoc high-stakes single and team based conflict. -- `NeuroCuts `__: +- |old_stack| `NeuroCuts `__: Example of building packet classification trees using RLlib / multi-agent in a bandit-like setting. -- `NeuroVectorizer `__: +- |old_stack| `NeuroVectorizer `__: Example of learning optimal LLVM vectorization compiler pragmas for loops in C and C++ codes using RLlib. -- `Roboschool / SageMaker `__: +- |old_stack| `Roboschool / SageMaker `__: Example of training robotic control policies in SageMaker with RLlib. -- `Sequential Social Dilemma Games `__: +- |old_stack| `Sequential Social Dilemma Games `__: Example of using the multi-agent API to model several `social dilemma games `__. -- `Simple custom environment for single RL with Ray and RLlib `__: +- |old_stack| `Simple custom environment for single RL with Ray and RLlib `__: Create a custom environment and train a single agent RL using Ray 2.0 with Tune. -- `StarCraft2 `__: +- |old_stack| `StarCraft2 `__: Example of training in StarCraft2 maps with RLlib / multi-agent. -- `Traffic Flow `__: +- |old_stack| `Traffic Flow `__: Example of optimizing mixed-autonomy traffic simulations with RLlib / multi-agent. Blog Posts ----------- +++++++++++ -- `Attention Nets and More with RLlib’s Trajectory View API `__: +- |old_stack| `Attention Nets and More with RLlib’s Trajectory View API `__: Blog describing RLlib's new "trajectory view API" and how it enables implementations of GTrXL (attention net) architectures. -- `Reinforcement Learning with RLlib in the Unity Game Engine `__: +- |old_stack| `Reinforcement Learning with RLlib in the Unity Game Engine `__: How-To guide about connecting RLlib with the Unity3D game engine for running visual- and physics-based RL experiments. -- `Lessons from Implementing 12 Deep RL Algorithms in TF and PyTorch `__: +- |old_stack| `Lessons from Implementing 12 Deep RL Algorithms in TF and PyTorch `__: Discussion on how the Ray Team ported 12 of RLlib's algorithms from TensorFlow to PyTorch and the lessons learned. -- `Scaling Multi-Agent Reinforcement Learning `__: +- |old_stack| `Scaling Multi-Agent Reinforcement Learning `__: Blog post of a brief tutorial on multi-agent RL and its design in RLlib. -- `Functional RL with Keras and TensorFlow Eager `__: +- |old_stack| `Functional RL with Keras and TensorFlow Eager `__: Exploration of a functional paradigm for implementing reinforcement learning (RL) algorithms. diff --git a/doc/source/rllib/rllib-learner.rst b/doc/source/rllib/rllib-learner.rst index 4fb3ed9d04155..8fa793a08eb6e 100644 --- a/doc/source/rllib/rllib-learner.rst +++ b/doc/source/rllib/rllib-learner.rst @@ -132,9 +132,9 @@ and :py:class:`~ray.rllib.core.learner.learner.Learner` APIs via the :py:class:` # LearnerGroup. config = ( PPOConfig() - # Number of Learner workers (ray actors). + # Number of Learner workers (Ray actors). # Use 0 for no actors, only create a local Learner. - # Use >=1 to create n DDP-style Learner workers (ray actors). + # Use >=1 to create n DDP-style Learner workers (Ray actors). .learners(num_learners=1) # Specify the learner's hyperparameters. .training( diff --git a/doc/source/rllib/rllib-models.rst b/doc/source/rllib/rllib-models.rst index e14713a216aba..001036405abe3 100644 --- a/doc/source/rllib/rllib-models.rst +++ b/doc/source/rllib/rllib-models.rst @@ -332,7 +332,7 @@ Implementing custom Attention Networks Similar to the RNN case described above, you could also implement your own attention-based networks, instead of using the ``use_attention: True`` flag in your model config. -Check out RLlib's `GTrXL (Attention Net) `__ implementations +See RLlib's `GTrXL (Attention Net) `__ implementations (for `TF `__ and `PyTorch `__) to get a better idea on how to write your own models of this type. These are the models we use as wrappers when ``use_attention=True``. diff --git a/doc/source/rllib/rllib-torch2x.rst b/doc/source/rllib/rllib-torch2x.rst index ec3e50bf934d7..06c7476e77bd8 100644 --- a/doc/source/rllib/rllib-torch2x.rst +++ b/doc/source/rllib/rllib-torch2x.rst @@ -80,7 +80,7 @@ Some meta-level comments Exploration ------------ -In RLlib, you can now set the configuration so that it uses the compiled module during sampling of an RL agent training process. By default, the rollout workers run on CPU, therefore it is recommended to use the ``ipex`` or ``onnxrt`` backend. However, you can still run the sampling part on GPUs as well by setting ``num_gpus_per_env_runner`` in which case other backends can be used as well. For enabling torch-compile during training you can also set `torch_compile_learner` equivalents. +In RLlib, you can now set the configuration so that it uses the compiled module during sampling of an RL agent training process. By default, the rollout workers run on CPU, therefore it's recommended to use the ``ipex`` or ``onnxrt`` backend. However, you can still run the sampling part on GPUs as well by setting ``num_gpus_per_env_runner`` in which case other backends can be used as well. For enabling torch-compile during training you can also set `torch_compile_learner` equivalents. diff --git a/doc/source/rllib/rllib-training.rst b/doc/source/rllib/rllib-training.rst index 6bcf8672f276c..fdb2682e4ab1c 100644 --- a/doc/source/rllib/rllib-training.rst +++ b/doc/source/rllib/rllib-training.rst @@ -444,11 +444,11 @@ and 5 remote workers (responsible for sample collection). Since learning is most of the time done on the local worker, it may help to provide one or more GPUs to that worker via the ``num_gpus`` setting. -Similarly, the resource allocation to remote workers can be controlled via ``num_cpus_per_env_runner``, ``num_gpus_per_env_runner``, and ``custom_resources_per_env_runner``. +Similarly, you can control the resource allocation to remote workers with ``num_cpus_per_env_runner``, ``num_gpus_per_env_runner``, and ``custom_resources_per_env_runner``. -The number of GPUs can be fractional quantities (e.g. 0.5) to allocate only a fraction +The number of GPUs can be fractional quantities (for example, 0.5) to allocate only a fraction of a GPU. For example, with DQN you can pack five algorithms onto one GPU by setting -``num_gpus: 0.2``. Check out `this fractional GPU example here `__ +``num_gpus: 0.2``. See `this fractional GPU example here `__ as well that also demonstrates how environments (running on the remote workers) that require a GPU can benefit from the ``num_gpus_per_env_runner`` setting. @@ -493,7 +493,7 @@ Here are some rules of thumb for scaling training with RLlib. 2. If the environment is fast and the model is small (most models for RL are), use time-efficient algorithms such as :ref:`PPO `, or :ref:`IMPALA `. These can be scaled by increasing ``num_env_runners`` to add rollout workers. It may also make sense to enable `vectorization `__ for -inference. Make sure to set ``num_gpus: 1`` if you want to use a GPU. If the learner becomes a bottleneck, multiple GPUs can be used for learning by setting +inference. Make sure to set ``num_gpus: 1`` if you want to use a GPU. If the learner becomes a bottleneck, you can use multiple GPUs for learning by setting ``num_gpus > 1``. 3. If the model is compute intensive (e.g., a large deep residual network) and inference is the bottleneck, consider allocating GPUs to workers by setting ``num_gpus_per_env_runner: 1``. If you only have a single GPU, consider ``num_env_runners: 0`` to use the learner GPU for inference. For efficient use of GPU time, use a small number of GPU workers and a large number of `envs per worker `__. diff --git a/rllib/BUILD b/rllib/BUILD index 3ff7a82754610..16af480689f2b 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -2924,11 +2924,11 @@ py_test( ) py_test( - name = "examples/rl_modules/pretraining_single_agent_training_multi_agent_rlm", - main = "examples/rl_modules/pretraining_single_agent_training_multi_agent_rlm.py", + name = "examples/rl_modules/pretraining_single_agent_training_multi_agent", + main = "examples/rl_modules/pretraining_single_agent_training_multi_agent.py", tags = ["team:rllib", "examples"], size = "medium", - srcs = ["examples/rl_modules/pretraining_single_agent_training_multi_agent_rlm.py"], + srcs = ["examples/rl_modules/pretraining_single_agent_training_multi_agent.py"], args = ["--enable-new-api-stack", "--num-agents=2", "--stop-iters-pretraining=5", "--stop-iters=20", "--stop-reward=150.0"], ) @@ -3058,11 +3058,11 @@ py_test( ) py_test( - name = "examples/custom_metrics_and_callbacks", - main = "examples/custom_metrics_and_callbacks.py", + name = "examples/metrics/custom_metrics_and_callbacks", + main = "examples/metrics/custom_metrics_and_callbacks.py", tags = ["team:rllib", "exclusive", "examples"], size = "small", - srcs = ["examples/custom_metrics_and_callbacks.py"], + srcs = ["examples/metrics/custom_metrics_and_callbacks.py"], args = ["--stop-iters=2"] ) diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py index 479502c764abb..876a47b121f4b 100644 --- a/rllib/algorithms/algorithm.py +++ b/rllib/algorithms/algorithm.py @@ -2747,7 +2747,8 @@ def merge_algorithm_configs( deprecation_warning( "callbacks dict interface", "a class extending rllib.algorithms.callbacks.DefaultCallbacks; " - "see `rllib/examples/custom_metrics_and_callbacks.py` for an example.", + "see `rllib/examples/metrics/custom_metrics_and_callbacks.py` for an " + "example.", error=True, ) diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index 8d0b57c208fc9..5529ebed8be83 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -1965,26 +1965,26 @@ def learners( Args: num_learners: Number of Learner workers used for updating the RLModule. - A value of 0 means training will take place on a local Learner on main + A value of 0 means training takes place on a local Learner on main process CPUs or 1 GPU (determined by `num_gpus_per_learner`). For multi-gpu training, you have to set `num_learners` to > 1 and set - `num_gpus_per_learner` accordingly (e.g. 4 GPUs total and model fits on + `num_gpus_per_learner` accordingly (e.g., 4 GPUs total and model fits on 1 GPU: `num_learners=4; num_gpus_per_learner=1` OR 4 GPUs total and model requires 2 GPUs: `num_learners=2; num_gpus_per_learner=2`). num_cpus_per_learner: Number of CPUs allocated per Learner worker. Only necessary for custom processing pipeline inside each Learner requiring multiple CPU cores. Ignored if `num_learners=0`. num_gpus_per_learner: Number of GPUs allocated per Learner worker. If - `num_learners=0`, any value greater than 0 will run the - training on a single GPU on the main process, while a value of 0 will - run the training on main process CPUs. If `num_gpus_per_learner` is - > 0, then `num_cpus_per_learner` should not be changed (from its default + `num_learners=0`, any value greater than 0 runs the + training on a single GPU on the main process, while a value of 0 runs + the training on main process CPUs. If `num_gpus_per_learner` is > 0, + then you shouldn't change `num_cpus_per_learner` (from its default value of 1). local_gpu_idx: If `num_gpus_per_learner` > 0, and - `num_learners` < 2, then this GPU index will be used for - training. This is an index into the available + `num_learners` < 2, then RLlib uses this GPU index for training. This is + an index into the available CUDA devices. For example if `os.environ["CUDA_VISIBLE_DEVICES"] = "1"` - then a `local_gpu_idx` of 0 will use the GPU with ID=1 on the node. + and `local_gpu_idx=0`, RLlib uses the GPU with ID=1 on the node. Returns: This updated AlgorithmConfig object. @@ -2060,8 +2060,8 @@ def training( worker. This setting only applies to the new API stack. The number of Learner workers can be set via `config.resources( num_learners=...)`. The total effective batch size is then - `num_learners` x `train_batch_size_per_learner` and can - be accessed via the property `AlgorithmConfig.total_train_batch_size`. + `num_learners` x `train_batch_size_per_learner` and you can + access it with the property `AlgorithmConfig.total_train_batch_size`. train_batch_size: Training batch size, if applicable. When on the new API stack, this setting should no longer be used. Instead, use `train_batch_size_per_learner` (in combination with @@ -2164,7 +2164,8 @@ def callbacks(self, callbacks_class) -> "AlgorithmConfig": callbacks_class: Callbacks class, whose methods will be run during various phases of training and environment sample collection. See the `DefaultCallbacks` class and - `examples/custom_metrics_and_callbacks.py` for more usage information. + `examples/metrics/custom_metrics_and_callbacks.py` for more usage + information. Returns: This updated AlgorithmConfig object. @@ -4015,7 +4016,7 @@ def _validate_resources_settings(self): # Remove this once we are able to specify placement group bundle index in RLlib if self.num_cpus_per_learner > 1 and self.num_gpus_per_learner > 0: raise ValueError( - "Cannot set both `num_cpus_per_learner` > 1 and " + "Can't set both `num_cpus_per_learner` > 1 and " " `num_gpus_per_learner` > 0! Either set " "`num_cpus_per_learner` > 1 (and `num_gpus_per_learner`" "=0) OR set `num_gpus_per_learner` > 0 (and leave " @@ -4028,7 +4029,7 @@ def _validate_resources_settings(self): if self.num_learners == 0 and self.num_gpus_per_env_runner > 1: raise ValueError( "num_gpus_per_env_runner must be 0 (cpu) or 1 (gpu) when using local " - "mode (i.e. `num_learners=0`)" + "mode (i.e., `num_learners=0`)" ) def _validate_multi_agent_settings(self): @@ -4133,7 +4134,7 @@ def _validate_input_settings(self): ) if self.input_ == "dataset": - # If we need to read a ray dataset set the parallelism and + # If you need to read a Ray dataset set the parallelism and # num_cpus_per_read_task from rollout worker settings self.input_config["num_cpus_per_read_task"] = self.num_cpus_per_env_runner if self.in_evaluation: diff --git a/rllib/benchmarks/torch_compile/README.md b/rllib/benchmarks/torch_compile/README.md index f0216790ec7c3..aca3935a12a18 100644 --- a/rllib/benchmarks/torch_compile/README.md +++ b/rllib/benchmarks/torch_compile/README.md @@ -54,7 +54,7 @@ config.framework( ) ``` -This benchmark script runs PPO algorithm with the default model architecture for Atari-Breakout game. It will run the training for `n` iterations for both compiled and non-compiled RLModules and reports the speedup. Note that negative speedup values mean a slowdown when you compile the module. +This benchmark script runs PPO algorithm with the default model architecture for Atari-Breakout game. It runs the training for `n` iterations for both compiled and non-compiled RLModules and reports the speedup. Note that negative speedup values mean a slowdown when you compile the module. To run the benchmark script, you need a ray cluster comprised of at least 129 CPUs (2x64 + 1) and 2 GPUs. If this is not accessible to you, you can change the number of sampling workers and batch size to make the requirements smaller. diff --git a/rllib/core/learner/learner_group.py b/rllib/core/learner/learner_group.py index 5496cb0ce5ec1..2b066f9819436 100644 --- a/rllib/core/learner/learner_group.py +++ b/rllib/core/learner/learner_group.py @@ -140,7 +140,7 @@ def __init__( else: backend_config = _get_backend_config(learner_class) - # TODO (sven): Cannot set both `num_cpus_per_learner`>1 and + # TODO (sven): Can't set both `num_cpus_per_learner`>1 and # `num_gpus_per_learner`>0! Users must set one or the other due # to issues with placement group fragmentation. See # https://github.com/ray-project/ray/issues/35409 for more details. diff --git a/rllib/examples/_old_api_stack/attention_net_supervised.py b/rllib/examples/_old_api_stack/attention_net_supervised.py index d5615f8f042fb..2c0f13f506aa5 100644 --- a/rllib/examples/_old_api_stack/attention_net_supervised.py +++ b/rllib/examples/_old_api_stack/attention_net_supervised.py @@ -1,3 +1,4 @@ +# @OldAPIStack from gymnasium.spaces import Box, Discrete import numpy as np diff --git a/rllib/examples/_old_api_stack/complex_struct_space.py b/rllib/examples/_old_api_stack/complex_struct_space.py index 9014443731277..1a1ecd28e122b 100644 --- a/rllib/examples/_old_api_stack/complex_struct_space.py +++ b/rllib/examples/_old_api_stack/complex_struct_space.py @@ -1,4 +1,5 @@ -"""Example of using variable-length Repeated / struct observation spaces. +# @OldAPIStack +"""Example of using variable-length Repeated or struct observation spaces. This example demonstrates the following: - using a custom environment with Repeated / struct observations diff --git a/rllib/examples/_old_api_stack/connectors/adapt_connector_policy.py b/rllib/examples/_old_api_stack/connectors/adapt_connector_policy.py index b4dcb535b230e..db59a49dcdbc0 100644 --- a/rllib/examples/_old_api_stack/connectors/adapt_connector_policy.py +++ b/rllib/examples/_old_api_stack/connectors/adapt_connector_policy.py @@ -1,5 +1,6 @@ +# @OldAPIStack """This example script shows how to load a connector enabled policy, -and adapt/use it with a different version of the environment. +and adapt or use it with a different version of the environment. """ import gymnasium as gym diff --git a/rllib/examples/_old_api_stack/connectors/prepare_checkpoint.py b/rllib/examples/_old_api_stack/connectors/prepare_checkpoint.py index 10ee98905c661..5242c70909649 100644 --- a/rllib/examples/_old_api_stack/connectors/prepare_checkpoint.py +++ b/rllib/examples/_old_api_stack/connectors/prepare_checkpoint.py @@ -1,3 +1,4 @@ +# @OldAPIStack import random from ray.rllib.algorithms.appo import APPOConfig diff --git a/rllib/examples/_old_api_stack/connectors/run_connector_policy.py b/rllib/examples/_old_api_stack/connectors/run_connector_policy.py index 73e3e0905fd87..71241f801e22e 100644 --- a/rllib/examples/_old_api_stack/connectors/run_connector_policy.py +++ b/rllib/examples/_old_api_stack/connectors/run_connector_policy.py @@ -1,5 +1,6 @@ -"""This example script shows how to load a connector enabled policy, -and use it in a serving/inference setting. +# @OldAPIStack +"""This example script loads a connector enabled policy, +and uses it in a serving or inference setting. """ import argparse diff --git a/rllib/examples/_old_api_stack/connectors/self_play_with_policy_checkpoint.py b/rllib/examples/_old_api_stack/connectors/self_play_with_policy_checkpoint.py index 8781b4b483836..f15994c0456ce 100644 --- a/rllib/examples/_old_api_stack/connectors/self_play_with_policy_checkpoint.py +++ b/rllib/examples/_old_api_stack/connectors/self_play_with_policy_checkpoint.py @@ -1,6 +1,7 @@ -"""Example showing how one can restore a connector enabled TF policy +# @OldAPIStack +"""Example showing to restore a connector enabled TF policy checkpoint for a new self-play PyTorch training job. -The checkpointed policy may be trained with a different algorithm too. +You can train the checkpointed policy with a different algorithm too. """ import argparse diff --git a/rllib/examples/_old_api_stack/custom_keras_model.py b/rllib/examples/_old_api_stack/custom_keras_model.py index c49010ef16aab..cdf1f516ef329 100644 --- a/rllib/examples/_old_api_stack/custom_keras_model.py +++ b/rllib/examples/_old_api_stack/custom_keras_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack """Example of using a custom ModelV2 Keras-style model.""" import argparse diff --git a/rllib/examples/_old_api_stack/models/action_mask_model.py b/rllib/examples/_old_api_stack/models/action_mask_model.py index 02630806794e8..92fe99e53847e 100644 --- a/rllib/examples/_old_api_stack/models/action_mask_model.py +++ b/rllib/examples/_old_api_stack/models/action_mask_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack from gymnasium.spaces import Dict from ray.rllib.models.tf.fcnet import FullyConnectedNetwork diff --git a/rllib/examples/_old_api_stack/models/autoregressive_action_dist.py b/rllib/examples/_old_api_stack/models/autoregressive_action_dist.py index 6afc6cf282747..fd8f2d53f7789 100644 --- a/rllib/examples/_old_api_stack/models/autoregressive_action_dist.py +++ b/rllib/examples/_old_api_stack/models/autoregressive_action_dist.py @@ -1,3 +1,4 @@ +# @OldAPIStack from ray.rllib.models.tf.tf_action_dist import Categorical, ActionDistribution from ray.rllib.models.torch.torch_action_dist import ( TorchCategorical, diff --git a/rllib/examples/_old_api_stack/models/autoregressive_action_model.py b/rllib/examples/_old_api_stack/models/autoregressive_action_model.py index 758435d9cda2d..8b71e5ab9dc28 100644 --- a/rllib/examples/_old_api_stack/models/autoregressive_action_model.py +++ b/rllib/examples/_old_api_stack/models/autoregressive_action_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack from gymnasium.spaces import Discrete, Tuple from ray.rllib.models.tf.misc import normc_initializer diff --git a/rllib/examples/_old_api_stack/models/batch_norm_model.py b/rllib/examples/_old_api_stack/models/batch_norm_model.py index accb8dc3a8b7c..7a5ac956d24e8 100644 --- a/rllib/examples/_old_api_stack/models/batch_norm_model.py +++ b/rllib/examples/_old_api_stack/models/batch_norm_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack import numpy as np from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/examples/_old_api_stack/models/centralized_critic_models.py b/rllib/examples/_old_api_stack/models/centralized_critic_models.py index d15c6e6a18347..5ccc4448e5428 100644 --- a/rllib/examples/_old_api_stack/models/centralized_critic_models.py +++ b/rllib/examples/_old_api_stack/models/centralized_critic_models.py @@ -1,3 +1,4 @@ +# @OldAPIStack from gymnasium.spaces import Box from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/examples/_old_api_stack/models/custom_model_api.py b/rllib/examples/_old_api_stack/models/custom_model_api.py index 27ef65f68f13a..7297faa890384 100644 --- a/rllib/examples/_old_api_stack/models/custom_model_api.py +++ b/rllib/examples/_old_api_stack/models/custom_model_api.py @@ -1,3 +1,4 @@ +# @OldAPIStack from gymnasium.spaces import Box from ray.rllib.models.tf.fcnet import FullyConnectedNetwork diff --git a/rllib/examples/_old_api_stack/models/eager_model.py b/rllib/examples/_old_api_stack/models/eager_model.py index 5d030d32d6958..1628fcda4abe8 100644 --- a/rllib/examples/_old_api_stack/models/eager_model.py +++ b/rllib/examples/_old_api_stack/models/eager_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack import random from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/examples/_old_api_stack/models/fast_model.py b/rllib/examples/_old_api_stack/models/fast_model.py index 1a1e24c3f30bf..99ac7e83a7de3 100644 --- a/rllib/examples/_old_api_stack/models/fast_model.py +++ b/rllib/examples/_old_api_stack/models/fast_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.models.torch.misc import SlimFC diff --git a/rllib/examples/_old_api_stack/models/mobilenet_v2_encoder.py b/rllib/examples/_old_api_stack/models/mobilenet_v2_encoder.py index 6a3482f547b0f..34baf73f4ef53 100644 --- a/rllib/examples/_old_api_stack/models/mobilenet_v2_encoder.py +++ b/rllib/examples/_old_api_stack/models/mobilenet_v2_encoder.py @@ -1,3 +1,4 @@ +# @OldAPIStack """ This file implements a MobileNet v2 Encoder. It uses MobileNet v2 to encode images into a latent space of 1000 dimensions. diff --git a/rllib/examples/_old_api_stack/models/mobilenet_v2_with_lstm_models.py b/rllib/examples/_old_api_stack/models/mobilenet_v2_with_lstm_models.py index c8c9b1f0bff08..fc0b310c4ed05 100644 --- a/rllib/examples/_old_api_stack/models/mobilenet_v2_with_lstm_models.py +++ b/rllib/examples/_old_api_stack/models/mobilenet_v2_with_lstm_models.py @@ -1,3 +1,4 @@ +# @OldAPIStack import numpy as np from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/examples/_old_api_stack/models/modelv3.py b/rllib/examples/_old_api_stack/models/modelv3.py index 805c87e8a98d4..a93879510455b 100644 --- a/rllib/examples/_old_api_stack/models/modelv3.py +++ b/rllib/examples/_old_api_stack/models/modelv3.py @@ -1,3 +1,4 @@ +# @OldAPIStack import numpy as np from ray.rllib.policy.sample_batch import SampleBatch diff --git a/rllib/examples/_old_api_stack/models/neural_computer.py b/rllib/examples/_old_api_stack/models/neural_computer.py index 90c8b0cb34937..d863f71e62d74 100644 --- a/rllib/examples/_old_api_stack/models/neural_computer.py +++ b/rllib/examples/_old_api_stack/models/neural_computer.py @@ -1,3 +1,4 @@ +# @OldAPIStack from collections import OrderedDict import gymnasium as gym from typing import Union, Dict, List, Tuple diff --git a/rllib/examples/_old_api_stack/models/parametric_actions_model.py b/rllib/examples/_old_api_stack/models/parametric_actions_model.py index 20711553b82b8..e568b8bed72ad 100644 --- a/rllib/examples/_old_api_stack/models/parametric_actions_model.py +++ b/rllib/examples/_old_api_stack/models/parametric_actions_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack from gymnasium.spaces import Box from ray.rllib.algorithms.dqn.distributional_q_tf_model import DistributionalQTFModel diff --git a/rllib/examples/_old_api_stack/models/rnn_model.py b/rllib/examples/_old_api_stack/models/rnn_model.py index c95b58d5e3a44..bdbc8b6a9c858 100644 --- a/rllib/examples/_old_api_stack/models/rnn_model.py +++ b/rllib/examples/_old_api_stack/models/rnn_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack import numpy as np from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/examples/_old_api_stack/models/rnn_spy_model.py b/rllib/examples/_old_api_stack/models/rnn_spy_model.py index fdf280f043f83..337990a60759a 100644 --- a/rllib/examples/_old_api_stack/models/rnn_spy_model.py +++ b/rllib/examples/_old_api_stack/models/rnn_spy_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack import numpy as np import pickle diff --git a/rllib/examples/_old_api_stack/models/shared_weights_model.py b/rllib/examples/_old_api_stack/models/shared_weights_model.py index c7f44e00e0609..28ad0896b18fb 100644 --- a/rllib/examples/_old_api_stack/models/shared_weights_model.py +++ b/rllib/examples/_old_api_stack/models/shared_weights_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack import numpy as np from ray.rllib.models.modelv2 import ModelV2 diff --git a/rllib/examples/_old_api_stack/models/simple_rpg_model.py b/rllib/examples/_old_api_stack/models/simple_rpg_model.py index c96d24b29ee33..b37d915df8a18 100644 --- a/rllib/examples/_old_api_stack/models/simple_rpg_model.py +++ b/rllib/examples/_old_api_stack/models/simple_rpg_model.py @@ -1,3 +1,4 @@ +# @OldAPIStack from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.models.tf.fcnet import FullyConnectedNetwork as TFFCNet from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 diff --git a/rllib/examples/_old_api_stack/models/trajectory_view_utilizing_models.py b/rllib/examples/_old_api_stack/models/trajectory_view_utilizing_models.py index 1599d5c7ecfe0..ed7e2919ede33 100644 --- a/rllib/examples/_old_api_stack/models/trajectory_view_utilizing_models.py +++ b/rllib/examples/_old_api_stack/models/trajectory_view_utilizing_models.py @@ -1,3 +1,4 @@ +# @OldAPIStack from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.models.torch.misc import SlimFC from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 diff --git a/rllib/examples/_old_api_stack/parametric_actions_cartpole.py b/rllib/examples/_old_api_stack/parametric_actions_cartpole.py index c6b4345e58edd..2f8832af502a3 100644 --- a/rllib/examples/_old_api_stack/parametric_actions_cartpole.py +++ b/rllib/examples/_old_api_stack/parametric_actions_cartpole.py @@ -1,4 +1,5 @@ -"""Example of handling variable length and/or parametric action spaces. +# @OldAPIStack +"""Example of handling variable length or parametric action spaces. This toy example demonstrates the action-embedding based approach for handling large discrete action spaces (potentially infinite in size), similar to this example: diff --git a/rllib/examples/_old_api_stack/parametric_actions_cartpole_embeddings_learnt_by_model.py b/rllib/examples/_old_api_stack/parametric_actions_cartpole_embeddings_learnt_by_model.py index 907add2bc881c..2750e68ec4c11 100644 --- a/rllib/examples/_old_api_stack/parametric_actions_cartpole_embeddings_learnt_by_model.py +++ b/rllib/examples/_old_api_stack/parametric_actions_cartpole_embeddings_learnt_by_model.py @@ -1,4 +1,5 @@ -"""Example of handling variable length and/or parametric action spaces. +# @OldAPIStack +"""Example of handling variable length or parametric action spaces. This is a toy example of the action-embedding based approach for handling large discrete action spaces (potentially infinite in size), similar to this: diff --git a/rllib/examples/_old_api_stack/policy/cliff_walking_wall_policy.py b/rllib/examples/_old_api_stack/policy/cliff_walking_wall_policy.py index 40e7b575fbf66..c9a4758f81ea4 100644 --- a/rllib/examples/_old_api_stack/policy/cliff_walking_wall_policy.py +++ b/rllib/examples/_old_api_stack/policy/cliff_walking_wall_policy.py @@ -1,3 +1,4 @@ +# @OldAPIStack import gymnasium as gym from typing import Dict, Union, List, Tuple, Optional import numpy as np diff --git a/rllib/examples/_old_api_stack/policy/episode_env_aware_policy.py b/rllib/examples/_old_api_stack/policy/episode_env_aware_policy.py index 47ce9b92c8843..0c00e3ebad862 100644 --- a/rllib/examples/_old_api_stack/policy/episode_env_aware_policy.py +++ b/rllib/examples/_old_api_stack/policy/episode_env_aware_policy.py @@ -1,3 +1,4 @@ +# @OldAPIStack import numpy as np import tree from gymnasium.spaces import Box diff --git a/rllib/examples/_old_api_stack/policy/memory_leaking_policy.py b/rllib/examples/_old_api_stack/policy/memory_leaking_policy.py index 9b813981ea480..3a5fa13ed5091 100644 --- a/rllib/examples/_old_api_stack/policy/memory_leaking_policy.py +++ b/rllib/examples/_old_api_stack/policy/memory_leaking_policy.py @@ -1,3 +1,4 @@ +# @OldAPIStack import gymnasium as gym from ray.rllib.examples._old_api_stack.policy.random_policy import RandomPolicy diff --git a/rllib/examples/_old_api_stack/policy/random_policy.py b/rllib/examples/_old_api_stack/policy/random_policy.py index 2bc0a61f59132..c410ba0ec464e 100644 --- a/rllib/examples/_old_api_stack/policy/random_policy.py +++ b/rllib/examples/_old_api_stack/policy/random_policy.py @@ -1,3 +1,4 @@ +# @OldAPIStack from gymnasium.spaces import Box import numpy as np import random diff --git a/rllib/examples/_old_api_stack/policy/rock_paper_scissors_dummies.py b/rllib/examples/_old_api_stack/policy/rock_paper_scissors_dummies.py index 06a22b53aaad7..dbaa0e4010383 100644 --- a/rllib/examples/_old_api_stack/policy/rock_paper_scissors_dummies.py +++ b/rllib/examples/_old_api_stack/policy/rock_paper_scissors_dummies.py @@ -1,3 +1,4 @@ +# @OldAPIStack import gymnasium as gym import numpy as np import random diff --git a/rllib/examples/_old_api_stack/remote_base_env_with_custom_api.py b/rllib/examples/_old_api_stack/remote_base_env_with_custom_api.py index d7eb81fd76fee..4fa783196cf5f 100644 --- a/rllib/examples/_old_api_stack/remote_base_env_with_custom_api.py +++ b/rllib/examples/_old_api_stack/remote_base_env_with_custom_api.py @@ -1,5 +1,6 @@ +# @OldAPIStack """ -This script demonstrates how to specify custom env APIs in +This script specifies custom env APIs in combination with RLlib's `remote_worker_envs` setting, which parallelizes individual sub-envs within a vector env by making each one a Ray Actor. diff --git a/rllib/examples/_old_api_stack/remote_envs_with_inference_done_on_main_node.py b/rllib/examples/_old_api_stack/remote_envs_with_inference_done_on_main_node.py index 23c8c6cf3e30f..eac4adbc60640 100644 --- a/rllib/examples/_old_api_stack/remote_envs_with_inference_done_on_main_node.py +++ b/rllib/examples/_old_api_stack/remote_envs_with_inference_done_on_main_node.py @@ -1,5 +1,6 @@ +# @OldAPIStack """ -This script demonstrates how to specify n (vectorized) envs +This script specifies n (vectorized) envs as Ray remote (actors), such that stepping through these occurs in parallel. Also, actions for each env step are calculated on the "main" node. diff --git a/rllib/examples/_old_api_stack/sb2rllib_rllib_example.py b/rllib/examples/_old_api_stack/sb2rllib_rllib_example.py index 61da98af00306..28b5ddd830b9a 100644 --- a/rllib/examples/_old_api_stack/sb2rllib_rllib_example.py +++ b/rllib/examples/_old_api_stack/sb2rllib_rllib_example.py @@ -1,5 +1,6 @@ +# @OldAPIStack """ -Example script on how to train, save, load, and test an RLlib agent. +Example script that trains, saves, loads, and tests an RLlib agent. Equivalent script with stable baselines: sb2rllib_sb_example.py. Demonstrates transition from stable_baselines to Ray RLlib. diff --git a/rllib/examples/_old_api_stack/sb2rllib_sb_example.py b/rllib/examples/_old_api_stack/sb2rllib_sb_example.py index 3812fea5420a6..8e36860749351 100644 --- a/rllib/examples/_old_api_stack/sb2rllib_sb_example.py +++ b/rllib/examples/_old_api_stack/sb2rllib_sb_example.py @@ -1,5 +1,6 @@ +# @OldAPIStack """ -Example script on how to train, save, load, and test a stable baselines 2 agent. +Example script that trains, saves, loads, and tests a stable baselines 2 agent. Code taken and adjusted from SB2 docs: https://stable-baselines.readthedocs.io/en/master/guide/quickstart.html Equivalent script with RLlib: sb2rllib_rllib_example.py diff --git a/rllib/examples/action_masking.py b/rllib/examples/action_masking.py index c9bab618fdf1b..3d49117c94d4f 100644 --- a/rllib/examples/action_masking.py +++ b/rllib/examples/action_masking.py @@ -1,4 +1,6 @@ -"""Example showing how to use "action masking" in RLlib. +# @OldAPIStack + +"""Example that uses "action masking" in RLlib. "Action masking" allows the agent to select actions based on the current observation. This is useful in many practical scenarios, where different diff --git a/rllib/examples/algorithms/custom_training_step_on_and_off_policy_combined.py b/rllib/examples/algorithms/custom_training_step_on_and_off_policy_combined.py index 0cb6a0df654c3..7e66d499c0443 100644 --- a/rllib/examples/algorithms/custom_training_step_on_and_off_policy_combined.py +++ b/rllib/examples/algorithms/custom_training_step_on_and_off_policy_combined.py @@ -1,3 +1,5 @@ +# @OldAPIStack + """Example of using a custom training workflow. This example creates a number of CartPole agents, some of which are trained with diff --git a/rllib/examples/autoregressive_action_dist.py b/rllib/examples/autoregressive_action_dist.py index af5963cff5d39..1a9ef8af8cb68 100644 --- a/rllib/examples/autoregressive_action_dist.py +++ b/rllib/examples/autoregressive_action_dist.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack """ Example of specifying an autoregressive action distribution. diff --git a/rllib/examples/cartpole_lstm.py b/rllib/examples/cartpole_lstm.py index 5b4110bf1ff39..a154a73f088a8 100644 --- a/rllib/examples/cartpole_lstm.py +++ b/rllib/examples/cartpole_lstm.py @@ -1,4 +1,5 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack + # TODO (sven): Move this script to `examples/rl_modules/...` import argparse diff --git a/rllib/examples/catalogs/custom_action_distribution.py b/rllib/examples/catalogs/custom_action_distribution.py index a404dd36e7bb1..6eb8aa234ea41 100644 --- a/rllib/examples/catalogs/custom_action_distribution.py +++ b/rllib/examples/catalogs/custom_action_distribution.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack (w/ EnvRunners). +# @HybridAPIStack """ This example shows two modifications: diff --git a/rllib/examples/catalogs/mobilenet_v2_encoder.py b/rllib/examples/catalogs/mobilenet_v2_encoder.py index ca44215b8bef0..2358711a96675 100644 --- a/rllib/examples/catalogs/mobilenet_v2_encoder.py +++ b/rllib/examples/catalogs/mobilenet_v2_encoder.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack """ This example shows two modifications: diff --git a/rllib/examples/centralized_critic.py b/rllib/examples/centralized_critic.py index c01f27f239794..a54caf84100ce 100644 --- a/rllib/examples/centralized_critic.py +++ b/rllib/examples/centralized_critic.py @@ -1,7 +1,7 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack # *********************************************************************************** -# IMPORTANT NOTE: This script is using the old API stack and will soon be replaced by +# IMPORTANT NOTE: This script uses the old API stack and will soon be replaced by # `ray.rllib.examples.multi_agent.pettingzoo_shared_value_function.py`! # *********************************************************************************** diff --git a/rllib/examples/centralized_critic_2.py b/rllib/examples/centralized_critic_2.py index 36955cd46275c..cdc86f218ceef 100644 --- a/rllib/examples/centralized_critic_2.py +++ b/rllib/examples/centralized_critic_2.py @@ -1,7 +1,7 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack # *********************************************************************************** -# IMPORTANT NOTE: This script is using the old API stack and will soon be replaced by +# IMPORTANT NOTE: This script uses the old API stack and will soon be replaced by # `ray.rllib.examples.multi_agent.pettingzoo_shared_value_function.py`! # *********************************************************************************** diff --git a/rllib/examples/checkpoint_by_custom_criteria.py b/rllib/examples/checkpoint_by_custom_criteria.py deleted file mode 100644 index b5fc9b0576133..0000000000000 --- a/rllib/examples/checkpoint_by_custom_criteria.py +++ /dev/null @@ -1,6 +0,0 @@ -msg = """ -This script has been moved to -`ray.rllib.examples.checkpoints.checkpoint_by_custom_criteria.py` -""" - -raise NotImplementedError(msg) diff --git a/rllib/examples/checkpoints/cartpole_dqn_export.py b/rllib/examples/checkpoints/cartpole_dqn_export.py index 749cfe4305eaa..48e73f15b6aec 100644 --- a/rllib/examples/checkpoints/cartpole_dqn_export.py +++ b/rllib/examples/checkpoints/cartpole_dqn_export.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack import numpy as np import os diff --git a/rllib/examples/checkpoints/onnx_tf.py b/rllib/examples/checkpoints/onnx_tf.py index f63847f117f8d..b4a1264c20466 100644 --- a/rllib/examples/checkpoints/onnx_tf.py +++ b/rllib/examples/checkpoints/onnx_tf.py @@ -1,3 +1,4 @@ +# @OldAPIStack import argparse import numpy as np import onnxruntime diff --git a/rllib/examples/checkpoints/onnx_torch.py b/rllib/examples/checkpoints/onnx_torch.py index 77a1ffb5f28ae..c377a5c65663d 100644 --- a/rllib/examples/checkpoints/onnx_torch.py +++ b/rllib/examples/checkpoints/onnx_torch.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack from packaging.version import Version import numpy as np diff --git a/rllib/examples/compute_adapted_gae_on_postprocess_trajectory.py b/rllib/examples/compute_adapted_gae_on_postprocess_trajectory.py index bcbcf515e15af..19c28bb3ccb49 100644 --- a/rllib/examples/compute_adapted_gae_on_postprocess_trajectory.py +++ b/rllib/examples/compute_adapted_gae_on_postprocess_trajectory.py @@ -1,6 +1,8 @@ +# @OldAPIStack + """ -Adapted (time-dependent) GAE for PPO algorithm can be activated by setting -use_adapted_gae=True in the policy config. Additionally, it is required that +Adapted (time-dependent) GAE for PPO algorithm that you can activate by setting +use_adapted_gae=True in the policy config. Additionally, it's required that "callbacks" include the custom callback class in the Algorithm's config. Furthermore, the env must return in its info dictionary a key-value pair of the form "d_ts": ... where the value is the length (time) of recent agent step. diff --git a/rllib/examples/curriculum/curriculum_learning.py b/rllib/examples/curriculum/curriculum_learning.py index 02916ed459dfc..1e7ba0250ae08 100644 --- a/rllib/examples/curriculum/curriculum_learning.py +++ b/rllib/examples/curriculum/curriculum_learning.py @@ -1,4 +1,4 @@ -"""Example of using an env-task curriculum via implementing a custom callback. +"""Example of using an env-task curriculum by implementing a custom callback. This example: - demonstrates how to define your own curriculum-capable environments using @@ -89,7 +89,7 @@ "hardest task right away).", ) - +# __curriculum_learning_example_env_options__ ENV_OPTIONS = { "is_slippery": False, # Limit the number of steps the agent is allowed to make in the env to @@ -133,9 +133,10 @@ "FHFFFFFG", ], ] +# __END_curriculum_learning_example_env_options__ -# Simple function sent to an EnvRunner to change the map of all its gym.Envs from +# Simple function sent to an EnvRunner to change the map of all its gym. Envs from # the current one to a new (tougher) one, in which the goal position is further away # from the starting position. Note that a map is a list of strings, each one # representing one row in the map. Each character in the strings represent a single diff --git a/rllib/examples/custom_model_api.py b/rllib/examples/custom_model_api.py index bbc484e0d396d..e1e6705bbf771 100644 --- a/rllib/examples/custom_model_api.py +++ b/rllib/examples/custom_model_api.py @@ -1,3 +1,4 @@ +# @OldAPIStack import argparse from gymnasium.spaces import Box, Discrete import numpy as np diff --git a/rllib/examples/custom_model_loss_and_metrics.py b/rllib/examples/custom_model_loss_and_metrics.py index 324b3f6f4ee7a..ccb3d8e1acd07 100644 --- a/rllib/examples/custom_model_loss_and_metrics.py +++ b/rllib/examples/custom_model_loss_and_metrics.py @@ -1,4 +1,5 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack + # Users should just inherit the Learner and extend the loss_fn. # TODO (sven): Move this example script to `examples/learners/...` diff --git a/rllib/examples/custom_recurrent_rnn_tokenizer.py b/rllib/examples/custom_recurrent_rnn_tokenizer.py index 4f99f6fdd8ecd..fe1d6c225f216 100644 --- a/rllib/examples/custom_recurrent_rnn_tokenizer.py +++ b/rllib/examples/custom_recurrent_rnn_tokenizer.py @@ -1,6 +1,6 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack -"""Example of define custom tokenizers for recurrent models in RLModules. +"""Example of defining custom tokenizers for recurrent models in RLModules. This example shows the following steps: - Define a custom tokenizer for a recurrent encoder. diff --git a/rllib/examples/debugging/deterministic_training.py b/rllib/examples/debugging/deterministic_training.py index 4319515a00c79..6bbf538e025ce 100644 --- a/rllib/examples/debugging/deterministic_training.py +++ b/rllib/examples/debugging/deterministic_training.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack """ Example of a fully deterministic, repeatable RLlib train run using diff --git a/rllib/examples/envs/external_envs/cartpole_server.py b/rllib/examples/envs/external_envs/cartpole_server.py index 5524602c8fe47..bb17089f43d24 100755 --- a/rllib/examples/envs/external_envs/cartpole_server.py +++ b/rllib/examples/envs/external_envs/cartpole_server.py @@ -33,7 +33,7 @@ from ray import air, tune from ray.air.constants import TRAINING_ITERATION from ray.rllib.env.policy_server_input import PolicyServerInput -from ray.rllib.examples.custom_metrics_and_callbacks import MyCallbacks +from ray.rllib.examples.metrics.custom_metrics_and_callbacks import MyCallbacks from ray.rllib.utils.metrics import ( ENV_RUNNER_RESULTS, EPISODE_RETURN_MEAN, diff --git a/rllib/examples/envs/greyscale_env.py b/rllib/examples/envs/greyscale_env.py index 162aa5f39d0d5..5af971ad23fbc 100644 --- a/rllib/examples/envs/greyscale_env.py +++ b/rllib/examples/envs/greyscale_env.py @@ -1,3 +1,4 @@ +# @OldAPIStack """ Example of interfacing with an environment that produces 2D observations. diff --git a/rllib/examples/envs/unity3d_env_local.py b/rllib/examples/envs/unity3d_env_local.py index 91f7ea6ecb550..40350a8c5853d 100644 --- a/rllib/examples/envs/unity3d_env_local.py +++ b/rllib/examples/envs/unity3d_env_local.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack """ Example of running an RLlib Algorithm against a locally running Unity3D editor diff --git a/rllib/examples/gpus/fractional_gpus_per_learner.py b/rllib/examples/gpus/fractional_gpus_per_learner.py index 9ac4c5dbeddfc..b577f66d5d093 100644 --- a/rllib/examples/gpus/fractional_gpus_per_learner.py +++ b/rllib/examples/gpus/fractional_gpus_per_learner.py @@ -77,6 +77,7 @@ parser = add_rllib_example_script_args( default_iters=50, default_reward=180, default_timesteps=100000 ) +parser.set_defaults(num_env_runners=2) # TODO (sven): Retire the currently supported --num-gpus in favor of --num-learners. parser.add_argument("--num-learners", type=int, default=1) parser.add_argument("--num-gpus-per-learner", type=float, default=0.5) @@ -98,10 +99,12 @@ enable_env_runner_and_connector_v2=True, ) .environment("CartPole-v1") + # Define EnvRunner scaling. + .env_runners(num_env_runners=args.num_env_runners) # Define Learner scaling. .learners( - # How many Learner workers do we need? If you have more than 1 GPU, you - # should set this to the number of GPUs available. + # How many Learner workers do we need? If you have more than 1 GPU, + # set this parameter to the number of GPUs available. num_learners=args.num_learners, # How many GPUs does each Learner need? If you have more than 1 GPU or only # one Learner, you should set this to 1, otherwise, set this to some diff --git a/rllib/examples/hierarchical/hierarchical_training.py b/rllib/examples/hierarchical/hierarchical_training.py index b7b02d76fffcd..924aa5de2f07d 100644 --- a/rllib/examples/hierarchical/hierarchical_training.py +++ b/rllib/examples/hierarchical/hierarchical_training.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack """Example of hierarchical training using the multi-agent API. diff --git a/rllib/examples/inference/policy_inference_after_training_with_attention.py b/rllib/examples/inference/policy_inference_after_training_with_attention.py index 97f5f2ecded8e..a3166c3d4761a 100644 --- a/rllib/examples/inference/policy_inference_after_training_with_attention.py +++ b/rllib/examples/inference/policy_inference_after_training_with_attention.py @@ -1,3 +1,4 @@ +# @OldAPIStack """ Example showing how you can use your trained policy for inference (computing actions) in an environment. diff --git a/rllib/examples/inference/policy_inference_after_training_with_lstm.py b/rllib/examples/inference/policy_inference_after_training_with_lstm.py index 7bee98e132f36..a7dc5ada6f3ca 100644 --- a/rllib/examples/inference/policy_inference_after_training_with_lstm.py +++ b/rllib/examples/inference/policy_inference_after_training_with_lstm.py @@ -1,3 +1,4 @@ +# @OldAPIStack """ Example showing how you can use your trained policy for inference (computing actions) in an environment. diff --git a/rllib/examples/metrics/__init__.py b/rllib/examples/metrics/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/rllib/examples/custom_metrics_and_callbacks.py b/rllib/examples/metrics/custom_metrics_and_callbacks.py similarity index 99% rename from rllib/examples/custom_metrics_and_callbacks.py rename to rllib/examples/metrics/custom_metrics_and_callbacks.py index f8db1388062d5..7c279be4ccd15 100644 --- a/rllib/examples/custom_metrics_and_callbacks.py +++ b/rllib/examples/metrics/custom_metrics_and_callbacks.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack """Example of using RLlib's debug callbacks. diff --git a/rllib/examples/multi_agent/two_algorithms.py b/rllib/examples/multi_agent/two_algorithms.py index 589eaa8f80e9f..f77c6d0d5c3b5 100644 --- a/rllib/examples/multi_agent/two_algorithms.py +++ b/rllib/examples/multi_agent/two_algorithms.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack """Example of using two different training methods at once in multi-agent. diff --git a/rllib/examples/offline_rl/custom_input_api.py b/rllib/examples/offline_rl/custom_input_api.py index 660e1982a0156..789e64a2a357e 100644 --- a/rllib/examples/offline_rl/custom_input_api.py +++ b/rllib/examples/offline_rl/custom_input_api.py @@ -1,6 +1,6 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack -"""Example of creating a custom input api +"""Example of creating a custom input API Custom input apis are useful when your data source is in a custom format or when it is necessary to use an external data loading mechanism. diff --git a/rllib/examples/offline_rl/offline_rl.py b/rllib/examples/offline_rl/offline_rl.py index 98d7a696a82d7..b60e80e8dff0f 100644 --- a/rllib/examples/offline_rl/offline_rl.py +++ b/rllib/examples/offline_rl/offline_rl.py @@ -1,6 +1,6 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack -"""Example on how to use CQL to learn from an offline json file. +"""Example on how to use CQL to learn from an offline JSON file. Important node: Make sure that your offline data file contains only a single timestep per line to mimic the way SAC pulls samples from diff --git a/rllib/examples/offline_rl/saving_experiences.py b/rllib/examples/offline_rl/saving_experiences.py index 168297cc297d9..27c76c264da98 100644 --- a/rllib/examples/offline_rl/saving_experiences.py +++ b/rllib/examples/offline_rl/saving_experiences.py @@ -1,4 +1,4 @@ -# TODO (sven): Move this example script into the new API stack. +# @OldAPIStack """Simple example of writing experiences to a file using JsonWriter.""" diff --git a/rllib/examples/replay_buffer_api.py b/rllib/examples/replay_buffer_api.py index c0ce3939bd925..5d87a5ef5cd3b 100644 --- a/rllib/examples/replay_buffer_api.py +++ b/rllib/examples/replay_buffer_api.py @@ -1,4 +1,4 @@ -# Move this example to the new API stack. +# @OldAPIStack # __sphinx_doc_replay_buffer_api_example_script_begin__ """Simple example of how to modify replay buffer behaviour. diff --git a/rllib/examples/rl_modules/pretraining_single_agent_training_multi_agent_rlm.py b/rllib/examples/rl_modules/pretraining_single_agent_training_multi_agent.py similarity index 100% rename from rllib/examples/rl_modules/pretraining_single_agent_training_multi_agent_rlm.py rename to rllib/examples/rl_modules/pretraining_single_agent_training_multi_agent.py diff --git a/rllib/utils/error.py b/rllib/utils/error.py index 3e2b7a8f94740..b57681465f946 100644 --- a/rllib/utils/error.py +++ b/rllib/utils/error.py @@ -37,11 +37,11 @@ class NotSerializable(Exception): # num_gpus=n or num_gpus_per_env_runner=m settings. ERR_MSG_NO_GPUS = """Found {} GPUs on your machine (GPU devices found: {})! If your machine does not have any GPUs, you should set the config keys - `num_gpus_per_learner` and `num_gpus_per_env_runner` to 0 (they may be set to - 1 by default for your particular RL algorithm).""" + `num_gpus_per_learner` and `num_gpus_per_env_runner` to 0. They may be set to + 1 by default for your particular RL algorithm.""" ERR_MSG_INVALID_ENV_DESCRIPTOR = """The env string you provided ('{}') is: -a) Not a supported/installed environment. +a) Not a supported or -installed environment. b) Not a tune-registered environment creator. c) Not a valid env class string. diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py index 7b16332206f37..eaed1c3d0590a 100644 --- a/rllib/utils/test_utils.py +++ b/rllib/utils/test_utils.py @@ -1408,7 +1408,7 @@ def run_rllib_example_script_experiment( trainable: The Trainable sub-class to run in the tune.Tuner. If None (default), use the registered RLlib Algorithm class specified by args.algo. tune_callbacks: A list of Tune callbacks to configure with the tune.Tuner. - In case `args.wandb_key` is provided, will append a WandB logger to this + In case `args.wandb_key` is provided, appends a WandB logger to this list. keep_config: Set this to True, if you don't want this utility to change the given `base_config` in any way and leave it as-is. This is helpful