From b59f6ce8b61005ccff90d07f89896afc76dc571a Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Wed, 26 Jul 2023 23:10:26 +0200
Subject: [PATCH 01/20] Add categories to command registry

---
 agbenchmark/benchmarks.py                     |  4 +-
 autogpt/app/main.py                           | 31 +-----
 autogpt/commands/__init__.py                  |  2 +-
 autogpt/commands/execute_code.py              |  6 +-
 autogpt/commands/file_operations.py           |  6 +-
 autogpt/commands/git_operations.py            |  5 +-
 autogpt/commands/image_gen.py                 |  6 +-
 .../commands/{task_statuses.py => system.py}  |  6 +-
 autogpt/commands/web_search.py                |  6 +-
 autogpt/commands/web_selenium.py              |  6 +-
 autogpt/core/runner/cli_web_app/server/api.py |  4 +-
 autogpt/models/command_registry.py            | 95 +++++++++++++++++--
 tests/unit/test_commands.py                   |  4 +-
 13 files changed, 130 insertions(+), 51 deletions(-)
 rename autogpt/commands/{task_statuses.py => system.py} (87%)

diff --git a/agbenchmark/benchmarks.py b/agbenchmark/benchmarks.py
index ea884b3aa501..fc19d3bf343d 100644
--- a/agbenchmark/benchmarks.py
+++ b/agbenchmark/benchmarks.py
@@ -49,8 +49,8 @@ def get_command_registry(config: Config):
     enabled_command_categories = [
         x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
     ]
-    for command_category in enabled_command_categories:
-        command_registry.import_commands(command_category)
+    for command_module in enabled_command_categories:
+        command_registry.import_command_module(command_module)
     return command_registry
 
 
diff --git a/autogpt/app/main.py b/autogpt/app/main.py
index fa61eeaf7104..9cfc624fa82d 100644
--- a/autogpt/app/main.py
+++ b/autogpt/app/main.py
@@ -134,36 +134,9 @@ def run_auto_gpt(
     config.file_logger_path = Workspace.build_file_logger_path(config.workspace_path)
 
     config.plugins = scan_plugins(config, config.debug_mode)
-    # Create a CommandRegistry instance and scan default folder
-    command_registry = CommandRegistry()
-
-    logger.debug(
-        f"The following command categories are disabled: {config.disabled_command_categories}"
-    )
-    enabled_command_categories = [
-        x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
-    ]
 
-    logger.debug(
-        f"The following command categories are enabled: {enabled_command_categories}"
-    )
-
-    for command_category in enabled_command_categories:
-        command_registry.import_commands(command_category)
-
-    # Unregister commands that are incompatible with the current config
-    incompatible_commands = []
-    for command in command_registry.commands.values():
-        if callable(command.enabled) and not command.enabled(config):
-            command.enabled = False
-            incompatible_commands.append(command)
-
-    for command in incompatible_commands:
-        command_registry.unregister(command)
-        logger.debug(
-            f"Unregistering incompatible command: {command.name}, "
-            f"reason - {command.disabled_reason or 'Disabled by current config.'}"
-        )
+    # Create a CommandRegistry instance and scan default folder
+    command_registry = CommandRegistry.with_command_modules(COMMAND_CATEGORIES, config)
 
     ai_config = construct_main_ai_config(
         config,
diff --git a/autogpt/commands/__init__.py b/autogpt/commands/__init__.py
index 9a932b175f03..018f5b8fcfb6 100644
--- a/autogpt/commands/__init__.py
+++ b/autogpt/commands/__init__.py
@@ -3,5 +3,5 @@
     "autogpt.commands.file_operations",
     "autogpt.commands.web_search",
     "autogpt.commands.web_selenium",
-    "autogpt.commands.task_statuses",
+    "autogpt.commands.system",
 ]
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index dd35f8593259..30e1e27ea8ca 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -1,4 +1,8 @@
-"""Execute code in a Docker container"""
+"""Commands to execute code"""
+
+COMMAND_CATEGORY = "execute_code"
+COMMAND_CATEGORY_TITLE = "Execute Code"
+
 import os
 import subprocess
 from pathlib import Path
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index 715a90aebffd..d076f3245bf6 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -1,6 +1,10 @@
-"""File operations for AutoGPT"""
+"""Commands to perform operations on files"""
+
 from __future__ import annotations
 
+COMMAND_CATEGORY = "file_operations"
+COMMAND_CATEGORY_TITLE = "File Operations"
+
 import contextlib
 import hashlib
 import os
diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py
index 021157fbbd56..f7f8186be161 100644
--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -1,4 +1,7 @@
-"""Git operations for autogpt"""
+"""Commands to perform Git operations"""
+
+COMMAND_CATEGORY = "git_operations"
+COMMAND_CATEGORY_TITLE = "Git Operations"
 
 from git.repo import Repo
 
diff --git a/autogpt/commands/image_gen.py b/autogpt/commands/image_gen.py
index e02400a8189b..3f6c1d98de43 100644
--- a/autogpt/commands/image_gen.py
+++ b/autogpt/commands/image_gen.py
@@ -1,4 +1,8 @@
-""" Image Generation Module for AutoGPT."""
+"""Commands to generate images based on text input"""
+
+COMMAND_CATEGORY = "text_to_image"
+COMMAND_CATEGORY_TITLE = "Text to Image"
+
 import io
 import json
 import time
diff --git a/autogpt/commands/task_statuses.py b/autogpt/commands/system.py
similarity index 87%
rename from autogpt/commands/task_statuses.py
rename to autogpt/commands/system.py
index 34908928feea..08bfd5e57ea7 100644
--- a/autogpt/commands/task_statuses.py
+++ b/autogpt/commands/system.py
@@ -1,6 +1,10 @@
-"""Task Statuses module."""
+"""Commands to control the internal state of the program"""
+
 from __future__ import annotations
 
+COMMAND_CATEGORY = "system"
+COMMAND_CATEGORY_TITLE = "System"
+
 from typing import NoReturn
 
 from autogpt.agents.agent import Agent
diff --git a/autogpt/commands/web_search.py b/autogpt/commands/web_search.py
index 9ea0d2061164..49712049d472 100644
--- a/autogpt/commands/web_search.py
+++ b/autogpt/commands/web_search.py
@@ -1,6 +1,10 @@
-"""Google search command for Autogpt."""
+"""Commands to search the web with"""
+
 from __future__ import annotations
 
+COMMAND_CATEGORY = "web_search"
+COMMAND_CATEGORY_TITLE = "Web Search"
+
 import json
 import time
 from itertools import islice
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 948d799e9c95..2d978494a9d3 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -1,6 +1,10 @@
-"""Selenium web scraping module."""
+"""Commands for browsing a website"""
+
 from __future__ import annotations
 
+COMMAND_CATEGORY = "web_browse"
+COMMAND_CATEGORY_TITLE = "Web Browsing"
+
 import logging
 from pathlib import Path
 from sys import platform
diff --git a/autogpt/core/runner/cli_web_app/server/api.py b/autogpt/core/runner/cli_web_app/server/api.py
index 2dc3f0101c07..dc2fd55040a0 100644
--- a/autogpt/core/runner/cli_web_app/server/api.py
+++ b/autogpt/core/runner/cli_web_app/server/api.py
@@ -109,6 +109,6 @@ def get_command_registry(config: Config):
     enabled_command_categories = [
         x for x in COMMAND_CATEGORIES if x not in config.disabled_command_categories
     ]
-    for command_category in enabled_command_categories:
-        command_registry.import_commands(command_category)
+    for command_module in enabled_command_categories:
+        command_registry.import_command_module(command_module)
     return command_registry
diff --git a/autogpt/models/command_registry.py b/autogpt/models/command_registry.py
index f54f4adb5030..767c7ddda55b 100644
--- a/autogpt/models/command_registry.py
+++ b/autogpt/models/command_registry.py
@@ -1,6 +1,13 @@
+from __future__ import annotations
+
 import importlib
 import inspect
-from typing import Any
+from dataclasses import dataclass
+from types import ModuleType
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from autogpt.config import Config
 
 from autogpt.command_decorator import AUTO_GPT_COMMAND_IDENTIFIER
 from autogpt.logs import logger
@@ -18,9 +25,21 @@ class CommandRegistry:
     commands: dict[str, Command]
     commands_aliases: dict[str, Command]
 
+    # Alternative way to structure the registry; currently redundant with self.commands
+    categories: dict[str, CommandCategory]
+
+    @dataclass
+    class CommandCategory:
+        name: str
+        title: str
+        description: str
+        commands: list[Command] = []
+        modules: list[ModuleType] = []
+
     def __init__(self):
         self.commands = {}
         self.commands_aliases = {}
+        self.categories = {}
 
     def __contains__(self, command_name: str):
         return command_name in self.commands or command_name in self.commands_aliases
@@ -84,7 +103,41 @@ def command_prompt(self) -> str:
         ]
         return "\n".join(commands_list)
 
-    def import_commands(self, module_name: str) -> None:
+    @staticmethod
+    def with_command_modules(modules: list[str], config: Config) -> CommandRegistry:
+        new_registry = CommandRegistry()
+
+        logger.debug(
+            f"The following command categories are disabled: {config.disabled_command_categories}"
+        )
+        enabled_command_modules = [
+            x for x in modules if x not in config.disabled_command_categories
+        ]
+
+        logger.debug(
+            f"The following command categories are enabled: {enabled_command_modules}"
+        )
+
+        for command_module in enabled_command_modules:
+            new_registry.import_command_module(command_module)
+
+        # Unregister commands that are incompatible with the current config
+        incompatible_commands: list[Command] = []
+        for command in new_registry.commands.values():
+            if callable(command.enabled) and not command.enabled(config):
+                command.enabled = False
+                incompatible_commands.append(command)
+
+        for command in incompatible_commands:
+            new_registry.unregister(command)
+            logger.debug(
+                f"Unregistering incompatible command: {command.name}, "
+                f"reason - {command.disabled_reason or 'Disabled by current config.'}"
+            )
+
+        return new_registry
+
+    def import_command_module(self, module_name: str) -> None:
         """
         Imports the specified Python module containing command plugins.
 
@@ -99,16 +152,42 @@ def import_commands(self, module_name: str) -> None:
 
         module = importlib.import_module(module_name)
 
+        category = self.register_module_category(module)
+
         for attr_name in dir(module):
             attr = getattr(module, attr_name)
+
+            command = None
+
             # Register decorated functions
-            if hasattr(attr, AUTO_GPT_COMMAND_IDENTIFIER) and getattr(
-                attr, AUTO_GPT_COMMAND_IDENTIFIER
-            ):
-                self.register(attr.command)
+            if getattr(attr, AUTO_GPT_COMMAND_IDENTIFIER, False):
+                command = attr.command
+
             # Register command classes
             elif (
                 inspect.isclass(attr) and issubclass(attr, Command) and attr != Command
             ):
-                cmd_instance = attr()
-                self.register(cmd_instance)
+                command = attr()
+
+            if command:
+                self.register(command)
+                category.commands.append(command)
+
+    def register_module_category(self, module: ModuleType) -> CommandCategory:
+        if not (category_name := getattr(module, "COMMAND_CATEGORY", None)):
+            raise ValueError(f"Cannot import invalid command module {module.__name__}")
+
+        if category_name not in self.categories:
+            self.categories[category_name] = CommandRegistry.CommandCategory(
+                name=category_name,
+                title=getattr(
+                    module, "COMMAND_CATEGORY_TITLE", category_name.capitalize()
+                ),
+                description=getattr(module, "__doc__", ""),
+            )
+
+        category = self.categories[category_name]
+        if module not in category.modules:
+            category.modules.append(module)
+
+        return category
diff --git a/tests/unit/test_commands.py b/tests/unit/test_commands.py
index 2cdf8701a69b..57de732a626b 100644
--- a/tests/unit/test_commands.py
+++ b/tests/unit/test_commands.py
@@ -193,7 +193,7 @@ def test_import_mock_commands_module():
     registry = CommandRegistry()
     mock_commands_module = "tests.mocks.mock_commands"
 
-    registry.import_commands(mock_commands_module)
+    registry.import_command_module(mock_commands_module)
 
     assert "function_based" in registry
     assert registry.commands["function_based"].name == "function_based"
@@ -219,7 +219,7 @@ def test_import_temp_command_file_module(tmp_path: Path):
     sys.path.append(str(tmp_path))
 
     temp_commands_module = "mock_commands"
-    registry.import_commands(temp_commands_module)
+    registry.import_command_module(temp_commands_module)
 
     # Remove the temp directory from sys.path
     sys.path.remove(str(tmp_path))

From 1390c83269146bc4b060cbe0efeaf5992b6e055f Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Wed, 26 Jul 2023 23:19:57 +0200
Subject: [PATCH 02/20] Fix tests

---
 autogpt/models/command_registry.py | 6 +++---
 tests/mocks/mock_commands.py       | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/autogpt/models/command_registry.py b/autogpt/models/command_registry.py
index 767c7ddda55b..9dfb35bd3aea 100644
--- a/autogpt/models/command_registry.py
+++ b/autogpt/models/command_registry.py
@@ -2,7 +2,7 @@
 
 import importlib
 import inspect
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from types import ModuleType
 from typing import TYPE_CHECKING, Any
 
@@ -33,8 +33,8 @@ class CommandCategory:
         name: str
         title: str
         description: str
-        commands: list[Command] = []
-        modules: list[ModuleType] = []
+        commands: list[Command] = field(default_factory=list[Command])
+        modules: list[ModuleType] = field(default_factory=list[ModuleType])
 
     def __init__(self):
         self.commands = {}
diff --git a/tests/mocks/mock_commands.py b/tests/mocks/mock_commands.py
index 278894c4d096..3758c1da2bbb 100644
--- a/tests/mocks/mock_commands.py
+++ b/tests/mocks/mock_commands.py
@@ -1,5 +1,7 @@
 from autogpt.command_decorator import command
 
+COMMAND_CATEGORY = "mock"
+
 
 @command(
     "function_based",

From 2b8152444843ec1746f9c293cc118c823c1ca8fb Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Fri, 28 Jul 2023 01:01:36 +0200
Subject: [PATCH 03/20] Clean up prompt generation

* Rename Performance Evaluations to Best Practices
* Move specification of response format from system prompt to Agent.construct_base_prompt
* Clean up PromptGenerator class
---
 autogpt/agents/agent.py             |   6 +-
 autogpt/agents/base.py              |  13 ++-
 autogpt/config/ai_config.py         |  64 ++++++++-----
 autogpt/config/prompt_config.py     |   2 +-
 autogpt/prompts/generator.py        | 144 ++++++++++++----------------
 autogpt/prompts/prompt.py           |   8 +-
 prompt_settings.yaml                |   2 +-
 tests/unit/test_prompt_config.py    |  22 ++---
 tests/unit/test_prompt_generator.py |  21 ++--
 9 files changed, 138 insertions(+), 144 deletions(-)

diff --git a/autogpt/agents/agent.py b/autogpt/agents/agent.py
index 563c682385c6..fa20ea587f1c 100644
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -293,10 +293,10 @@ def execute_command(
         # Handle non-native commands (e.g. from plugins)
         for command in agent.ai_config.prompt_generator.commands:
             if (
-                command_name == command["label"].lower()
-                or command_name == command["name"].lower()
+                command_name == command.label.lower()
+                or command_name == command.name.lower()
             ):
-                return command["function"](**arguments)
+                return command.function(**arguments)
 
         raise RuntimeError(
             f"Cannot execute '{command_name}': unknown command."
diff --git a/autogpt/agents/base.py b/autogpt/agents/base.py
index e6b24be12ba8..e7c00a4475bf 100644
--- a/autogpt/agents/base.py
+++ b/autogpt/agents/base.py
@@ -1,8 +1,11 @@
 from __future__ import annotations
 
+import json
 from abc import ABCMeta, abstractmethod
 from typing import TYPE_CHECKING, Any, Optional
 
+from autogpt.json_utils.utilities import llm_response_schema
+
 if TYPE_CHECKING:
     from autogpt.config import AIConfig, Config
 
@@ -155,7 +158,15 @@ def construct_base_prompt(
 
         prompt = ChatSequence.for_model(
             self.llm.name,
-            [Message("system", self.system_prompt)] + prepend_messages,
+            [
+                Message("system", self.system_prompt),
+                Message(
+                    "system",
+                    "Respond with only valid JSON conforming to the following schema: \n"
+                    f"{json.dumps(llm_response_schema(self.config))}\n",
+                ),
+            ]
+            + prepend_messages,
         )
 
         # Reserve tokens for messages to be appended later, if any
diff --git a/autogpt/config/ai_config.py b/autogpt/config/ai_config.py
index b47740f6a8d8..632bc4b34f04 100644
--- a/autogpt/config/ai_config.py
+++ b/autogpt/config/ai_config.py
@@ -1,7 +1,4 @@
-# sourcery skip: do-not-use-staticmethod
-"""
-A module that contains the AIConfig class object that contains the configuration
-"""
+"""A module that contains the AIConfig class object that contains the configuration"""
 from __future__ import annotations
 
 import platform
@@ -15,6 +12,8 @@
     from autogpt.models.command_registry import CommandRegistry
     from autogpt.prompts.generator import PromptGenerator
 
+    from .config import Config
+
 
 class AIConfig:
     """
@@ -104,7 +103,7 @@ def save(self, ai_settings_file: str | Path) -> None:
             yaml.dump(config, file, allow_unicode=True)
 
     def construct_full_prompt(
-        self, config, prompt_generator: Optional[PromptGenerator] = None
+        self, config: Config, prompt_generator: Optional[PromptGenerator] = None
     ) -> str:
         """
         Returns a prompt to the user with the class information in an organized fashion.
@@ -117,26 +116,27 @@ def construct_full_prompt(
               including the ai_name, ai_role, ai_goals, and api_budget.
         """
 
-        prompt_start = (
-            "Your decisions must always be made independently without"
-            " seeking user assistance. Play to your strengths as an LLM and pursue"
-            " simple strategies with no legal complications."
-            ""
-        )
-
         from autogpt.prompts.prompt import build_default_prompt_generator
 
+        prompt_generator = prompt_generator or self.prompt_generator
         if prompt_generator is None:
             prompt_generator = build_default_prompt_generator(config)
-        prompt_generator.goals = self.ai_goals
-        prompt_generator.name = self.ai_name
-        prompt_generator.role = self.ai_role
-        prompt_generator.command_registry = self.command_registry
+            prompt_generator.command_registry = self.command_registry
+            self.prompt_generator = prompt_generator
+
         for plugin in config.plugins:
             if not plugin.can_handle_post_prompt():
                 continue
             prompt_generator = plugin.post_prompt(prompt_generator)
 
+        # Construct full prompt
+        full_prompt_parts = [
+            f"You are {self.ai_name}, {self.ai_role.rstrip('.')}.",
+            "Your decisions must always be made independently without seeking "
+            "user assistance. Play to your strengths as an LLM and pursue "
+            "simple strategies with no legal complications.",
+        ]
+
         if config.execute_local_commands:
             # add OS info to prompt
             os_name = platform.system()
@@ -146,14 +146,28 @@ def construct_full_prompt(
                 else distro.name(pretty=True)
             )
 
-            prompt_start += f"\nThe OS you are running on is: {os_info}"
+            full_prompt_parts.append(f"The OS you are running on is: {os_info}")
 
-        # Construct full prompt
-        full_prompt = f"You are {prompt_generator.name}, {prompt_generator.role}\n{prompt_start}\n\nGOALS:\n\n"
-        for i, goal in enumerate(self.ai_goals):
-            full_prompt += f"{i+1}. {goal}\n"
+        if self.ai_goals:
+            full_prompt_parts += "\n".join(
+                [
+                    "## Goals",
+                    "In service of the user, you have the following goals:",
+                    *[f"{i+1}. {goal}" for i, goal in enumerate(self.ai_goals)],
+                ]
+            )
+
+        additional_constraints: list[str] = []
         if self.api_budget > 0.0:
-            full_prompt += f"\nIt takes money to let you run. Your API budget is ${self.api_budget:.3f}"
-        self.prompt_generator = prompt_generator
-        full_prompt += f"\n\n{prompt_generator.generate_prompt_string(config)}"
-        return full_prompt
+            additional_constraints.append(
+                f"It takes money to let you run. "
+                f"Your API budget is ${self.api_budget:.3f}"
+            )
+
+        full_prompt_parts.append(
+            prompt_generator.generate_prompt_string(
+                additional_constraints=additional_constraints
+            )
+        )
+
+        return "\n\n".join(full_prompt_parts).strip("\n")
diff --git a/autogpt/config/prompt_config.py b/autogpt/config/prompt_config.py
index 793bb4440437..055e7897b16f 100644
--- a/autogpt/config/prompt_config.py
+++ b/autogpt/config/prompt_config.py
@@ -44,4 +44,4 @@ def __init__(self, prompt_settings_file: str) -> None:
 
         self.constraints = config_params.get("constraints", [])
         self.resources = config_params.get("resources", [])
-        self.performance_evaluations = config_params.get("performance_evaluations", [])
+        self.best_practices = config_params.get("best_practices", [])
diff --git a/autogpt/prompts/generator.py b/autogpt/prompts/generator.py
index bc836f30c593..a8217953dbbf 100644
--- a/autogpt/prompts/generator.py
+++ b/autogpt/prompts/generator.py
@@ -1,11 +1,8 @@
 """ A module for generating custom prompt strings."""
 from __future__ import annotations
 
-import json
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, TypedDict
-
-from autogpt.config import Config
-from autogpt.json_utils.utilities import llm_response_schema
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Callable, Optional
 
 if TYPE_CHECKING:
     from autogpt.models.command_registry import CommandRegistry
@@ -17,34 +14,33 @@ class PromptGenerator:
         resources, and performance evaluations.
     """
 
-    class Command(TypedDict):
+    @dataclass
+    class Command:
         label: str
         name: str
         params: dict[str, str]
         function: Optional[Callable]
 
+        def __str__(self) -> str:
+            """Returns a string representation of the command."""
+            params_string = ", ".join(
+                f'"{key}": "{value}"' for key, value in self.params.items()
+            )
+            return f'{self.label}: "{self.name}", params: ({params_string})'
+
     constraints: list[str]
     commands: list[Command]
     resources: list[str]
-    performance_evaluation: list[str]
+    best_practices: list[str]
     command_registry: CommandRegistry | None
 
-    # TODO: replace with AIConfig
-    name: str
-    role: str
-    goals: list[str]
-
     def __init__(self):
         self.constraints = []
         self.commands = []
         self.resources = []
-        self.performance_evaluation = []
+        self.best_practices = []
         self.command_registry = None
 
-        self.name = "Bob"
-        self.role = "AI"
-        self.goals = []
-
     def add_constraint(self, constraint: str) -> None:
         """
         Add a constraint to the constraints list.
@@ -75,31 +71,15 @@ def add_command(
             function (callable, optional): A callable function to be called when
                 the command is executed. Defaults to None.
         """
-        command_params = {name: type for name, type in params.items()}
-
-        command: PromptGenerator.Command = {
-            "label": command_label,
-            "name": command_name,
-            "params": command_params,
-            "function": function,
-        }
 
-        self.commands.append(command)
-
-    def _generate_command_string(self, command: Dict[str, Any]) -> str:
-        """
-        Generate a formatted string representation of a command.
-
-        Args:
-            command (dict): A dictionary containing command information.
-
-        Returns:
-            str: The formatted command string.
-        """
-        params_string = ", ".join(
-            f'"{key}": "{value}"' for key, value in command["params"].items()
+        self.commands.append(
+            PromptGenerator.Command(
+                label=command_label,
+                name=command_name,
+                params={name: type for name, type in params.items()},
+                function=function,
+            )
         )
-        return f'{command["label"]}: "{command["name"]}", params: {params_string}'
 
     def add_resource(self, resource: str) -> None:
         """
@@ -110,71 +90,67 @@ def add_resource(self, resource: str) -> None:
         """
         self.resources.append(resource)
 
-    def add_performance_evaluation(self, evaluation: str) -> None:
+    def add_best_practice(self, best_practice: str) -> None:
         """
-        Add a performance evaluation item to the performance_evaluation list.
+        Add an item to the list of best practices.
 
         Args:
-            evaluation (str): The evaluation item to be added.
+            best_practice (str): The best practice item to be added.
         """
-        self.performance_evaluation.append(evaluation)
+        self.best_practices.append(best_practice)
 
-    def _generate_numbered_list(self, items: List[Any], item_type="list") -> str:
+    def _generate_numbered_list(self, items: list[str], start_at: int = 1) -> str:
         """
-        Generate a numbered list from given items based on the item_type.
+        Generate a numbered list containing the given items.
 
         Args:
             items (list): A list of items to be numbered.
-            item_type (str, optional): The type of items in the list.
-                Defaults to 'list'.
+            start_at (int, optional): The number to start the sequence with; defaults to 1.
 
         Returns:
             str: The formatted numbered list.
         """
-        if item_type == "command":
-            command_strings = []
-            if self.command_registry:
-                command_strings += [
-                    str(item)
-                    for item in self.command_registry.commands.values()
-                    if item.enabled
-                ]
-            # terminate command is added manually
-            command_strings += [self._generate_command_string(item) for item in items]
-            return "\n".join(f"{i+1}. {item}" for i, item in enumerate(command_strings))
-        else:
-            return "\n".join(f"{i+1}. {item}" for i, item in enumerate(items))
-
-    def generate_prompt_string(self, config: Config) -> str:
+        return "\n".join(f"{i}. {item}" for i, item in enumerate(items, start_at))
+
+    def generate_prompt_string(
+        self,
+        *,
+        additional_constraints: list[str] = [],
+        additional_resources: list[str] = [],
+        additional_best_practices: list[str] = [],
+    ) -> str:
         """
         Generate a prompt string based on the constraints, commands, resources,
-            and performance evaluations.
+            and best practices.
 
         Returns:
             str: The generated prompt string.
         """
+
         return (
-            f"Constraints:\n{self._generate_numbered_list(self.constraints)}\n\n"
-            f"{generate_commands(self, config)}"
-            f"Resources:\n{self._generate_numbered_list(self.resources)}\n\n"
-            "Performance Evaluation:\n"
-            f"{self._generate_numbered_list(self.performance_evaluation)}\n\n"
-            "Respond with only valid JSON conforming to the following schema: \n"
-            f"{json.dumps(llm_response_schema(config))}\n"
+            "## Constraints\n"
+            "You operate within the following constraints:\n"
+            f"{self._generate_numbered_list(self.constraints + additional_constraints)}\n\n"
+            "## Commands\n"
+            "You have access to the following commands:\n"
+            f"{self._generate_commands()}\n\n"
+            "## Resources\n"
+            "You can leverage access to the following resources:\n"
+            f"{self._generate_numbered_list(self.resources + additional_resources)}\n\n"
+            "## Best practices\n"
+            f"{self._generate_numbered_list(self.best_practices + additional_best_practices)}"
         )
 
+    def _generate_commands(self) -> str:
+        command_strings = []
+        if self.command_registry:
+            command_strings += [
+                str(cmd)
+                for cmd in self.command_registry.commands.values()
+                if cmd.enabled
+            ]
 
-def generate_commands(self, config: Config) -> str:
-    """
-    Generate a prompt string based on the constraints, commands, resources,
-        and performance evaluations.
+        # Add commands from plugins etc.
+        command_strings += [str(cmd) for cmd in self.commands]
 
-    Returns:
-        str: The generated prompt string.
-    """
-    if config.openai_functions:
-        return ""
-    return (
-        "Commands:\n"
-        f"{self._generate_numbered_list(self.commands, item_type='command')}\n\n"
-    )
+        return self._generate_numbered_list(command_strings)
diff --git a/autogpt/prompts/prompt.py b/autogpt/prompts/prompt.py
index b64f11f599a2..d40a0d328807 100644
--- a/autogpt/prompts/prompt.py
+++ b/autogpt/prompts/prompt.py
@@ -8,7 +8,7 @@
 def build_default_prompt_generator(config: Config) -> PromptGenerator:
     """
     This function generates a prompt string that includes various constraints,
-        commands, resources, and performance evaluations.
+        commands, resources, and best practices.
 
     Returns:
         str: The generated prompt string.
@@ -28,8 +28,8 @@ def build_default_prompt_generator(config: Config) -> PromptGenerator:
     for resource in prompt_config.resources:
         prompt_generator.add_resource(resource)
 
-    # Add performance evaluations to the PromptGenerator object
-    for performance_evaluation in prompt_config.performance_evaluations:
-        prompt_generator.add_performance_evaluation(performance_evaluation)
+    # Add best practices to the PromptGenerator object
+    for best_practice in prompt_config.best_practices:
+        prompt_generator.add_best_practice(best_practice)
 
     return prompt_generator
diff --git a/prompt_settings.yaml b/prompt_settings.yaml
index 342d67b9ebb0..8ec5302feda4 100644
--- a/prompt_settings.yaml
+++ b/prompt_settings.yaml
@@ -9,7 +9,7 @@ resources: [
   'Long Term memory management.',
   'File output.'
 ]
-performance_evaluations: [
+best_practices: [
   'Continuously review and analyze your actions to ensure you are performing to the best of your abilities.',
   'Constructively self-criticize your big-picture behavior constantly.',
   'Reflect on past decisions and strategies to refine your approach.',
diff --git a/tests/unit/test_prompt_config.py b/tests/unit/test_prompt_config.py
index 4616db971b35..b83efd0d5209 100644
--- a/tests/unit/test_prompt_config.py
+++ b/tests/unit/test_prompt_config.py
@@ -18,10 +18,10 @@ def test_prompt_config_loading(tmp_path):
 - A test resource
 - Another test resource
 - A third test resource
-performance_evaluations:
-- A test performance evaluation
-- Another test performance evaluation
-- A third test performance evaluation
+best_practices:
+- A test best-practice
+- Another test best-practice
+- A third test best-practice
 """
     prompt_settings_file = tmp_path / "test_prompt_settings.yaml"
     prompt_settings_file.write_text(yaml_content)
@@ -36,13 +36,7 @@ def test_prompt_config_loading(tmp_path):
     assert prompt_config.resources[0] == "A test resource"
     assert prompt_config.resources[1] == "Another test resource"
     assert prompt_config.resources[2] == "A third test resource"
-    assert len(prompt_config.performance_evaluations) == 3
-    assert prompt_config.performance_evaluations[0] == "A test performance evaluation"
-    assert (
-        prompt_config.performance_evaluations[1]
-        == "Another test performance evaluation"
-    )
-    assert (
-        prompt_config.performance_evaluations[2]
-        == "A third test performance evaluation"
-    )
+    assert len(prompt_config.best_practices) == 3
+    assert prompt_config.best_practices[0] == "A test best-practice"
+    assert prompt_config.best_practices[1] == "Another test best-practice"
+    assert prompt_config.best_practices[2] == "A third test best-practice"
diff --git a/tests/unit/test_prompt_generator.py b/tests/unit/test_prompt_generator.py
index 44147e6dbce8..d1b08f1a041a 100644
--- a/tests/unit/test_prompt_generator.py
+++ b/tests/unit/test_prompt_generator.py
@@ -20,13 +20,12 @@ def test_add_command():
     params = {"arg1": "value1", "arg2": "value2"}
     generator = PromptGenerator()
     generator.add_command(command_label, command_name, params)
-    command = {
+    assert generator.commands[0].__dict__ == {
         "label": command_label,
         "name": command_name,
         "params": params,
         "function": None,
     }
-    assert command in generator.commands
 
 
 def test_add_resource():
@@ -39,18 +38,18 @@ def test_add_resource():
     assert resource in generator.resources
 
 
-def test_add_performance_evaluation():
+def test_add_best_practice():
     """
-    Test if the add_performance_evaluation() method adds an evaluation to the generator's
-    performance_evaluation list.
+    Test if the add_best_practice() method adds a best practice to the generator's
+    best_practices list.
     """
-    evaluation = "Evaluation1"
+    practice = "Practice1"
     generator = PromptGenerator()
-    generator.add_performance_evaluation(evaluation)
-    assert evaluation in generator.performance_evaluation
+    generator.add_best_practice(practice)
+    assert practice in generator.best_practices
 
 
-def test_generate_prompt_string(config):
+def test_generate_prompt_string():
     """
     Test if the generate_prompt_string() method generates a prompt string with all the added
     constraints, commands, resources, and evaluations.
@@ -82,10 +81,10 @@ def test_generate_prompt_string(config):
     for resource in resources:
         generator.add_resource(resource)
     for evaluation in evaluations:
-        generator.add_performance_evaluation(evaluation)
+        generator.add_best_practice(evaluation)
 
     # Generate the prompt string and verify its correctness
-    prompt_string = generator.generate_prompt_string(config)
+    prompt_string = generator.generate_prompt_string()
     assert prompt_string is not None
 
     # Check if all constraints, commands, resources, and evaluations are present in the prompt string

From 38c6fb6a076890ff63b8611c3a1ce81d361c8bd2 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Fri, 28 Jul 2023 01:17:40 +0200
Subject: [PATCH 04/20] Add debug logging to AIConfig autogeneration

---
 autogpt/app/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/autogpt/app/setup.py b/autogpt/app/setup.py
index f2b52916cfe7..cb6073adc0dd 100644
--- a/autogpt/app/setup.py
+++ b/autogpt/app/setup.py
@@ -83,6 +83,7 @@ def prompt_user(
                 "Falling back to manual mode.",
                 speak_text=True,
             )
+            logger.debug(f"Error during AIConfig generation: {e}")
 
             return generate_aiconfig_manual(config)
 

From 031519b142ed59956299e129e2ff8e0cad6b4938 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Sat, 29 Jul 2023 18:33:48 +0200
Subject: [PATCH 05/20] Clarify prompting and add support for multiple thought
 processes to Agent

---
 autogpt/agents/base.py        | 124 ++++++++++++++++++++++++++++------
 autogpt/config/ai_config.py   |  20 +++---
 autogpt/llm/utils/__init__.py |   3 +
 autogpt/prompts/prompt.py     |   6 +-
 prompt_settings.yaml          |   3 +-
 5 files changed, 123 insertions(+), 33 deletions(-)

diff --git a/autogpt/agents/base.py b/autogpt/agents/base.py
index e7c00a4475bf..bf43b3769823 100644
--- a/autogpt/agents/base.py
+++ b/autogpt/agents/base.py
@@ -1,10 +1,8 @@
 from __future__ import annotations
 
-import json
+import re
 from abc import ABCMeta, abstractmethod
-from typing import TYPE_CHECKING, Any, Optional
-
-from autogpt.json_utils.utilities import llm_response_schema
+from typing import TYPE_CHECKING, Any, Literal, Optional
 
 if TYPE_CHECKING:
     from autogpt.config import AIConfig, Config
@@ -26,6 +24,8 @@
 class BaseAgent(metaclass=ABCMeta):
     """Base class for all Auto-GPT agents."""
 
+    ThoughtProcessID = Literal["one-shot"]
+
     def __init__(
         self,
         ai_config: AIConfig,
@@ -94,6 +94,7 @@ def __init__(
     def think(
         self,
         instruction: Optional[str] = None,
+        thought_process_id: ThoughtProcessID = "one-shot",
     ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
         """Runs the agent for one cycle.
 
@@ -106,8 +107,8 @@ def think(
 
         instruction = instruction or self.default_cycle_instruction
 
-        prompt: ChatSequence = self.construct_prompt(instruction)
-        prompt = self.on_before_think(prompt, instruction)
+        prompt: ChatSequence = self.construct_prompt(instruction, thought_process_id)
+        prompt = self.on_before_think(prompt, thought_process_id, instruction)
         raw_response = create_chat_completion(
             prompt,
             self.config,
@@ -117,7 +118,7 @@ def think(
         )
         self.cycle_count += 1
 
-        return self.on_response(raw_response, prompt, instruction)
+        return self.on_response(raw_response, thought_process_id, prompt, instruction)
 
     @abstractmethod
     def execute(
@@ -140,6 +141,7 @@ def execute(
 
     def construct_base_prompt(
         self,
+        thought_process_id: ThoughtProcessID,
         prepend_messages: list[Message] = [],
         append_messages: list[Message] = [],
         reserve_tokens: int = 0,
@@ -158,15 +160,7 @@ def construct_base_prompt(
 
         prompt = ChatSequence.for_model(
             self.llm.name,
-            [
-                Message("system", self.system_prompt),
-                Message(
-                    "system",
-                    "Respond with only valid JSON conforming to the following schema: \n"
-                    f"{json.dumps(llm_response_schema(self.config))}\n",
-                ),
-            ]
-            + prepend_messages,
+            [Message("system", self.system_prompt)] + prepend_messages,
         )
 
         # Reserve tokens for messages to be appended later, if any
@@ -189,7 +183,11 @@ def construct_base_prompt(
 
         return prompt
 
-    def construct_prompt(self, cycle_instruction: str) -> ChatSequence:
+    def construct_prompt(
+        self,
+        cycle_instruction: str,
+        thought_process_id: ThoughtProcessID,
+    ) -> ChatSequence:
         """Constructs and returns a prompt with the following structure:
         1. System prompt
         2. Message history of the agent, truncated & prepended with running summary as needed
@@ -206,14 +204,86 @@ def construct_prompt(self, cycle_instruction: str) -> ChatSequence:
         cycle_instruction_tlength = count_message_tokens(
             cycle_instruction_msg, self.llm.name
         )
-        prompt = self.construct_base_prompt(reserve_tokens=cycle_instruction_tlength)
+
+        append_messages: list[Message] = []
+
+        response_format_instr = self.response_format_instruction(thought_process_id)
+        if response_format_instr:
+            append_messages.append(Message("system", response_format_instr))
+
+        prompt = self.construct_base_prompt(
+            thought_process_id,
+            append_messages=append_messages,
+            reserve_tokens=cycle_instruction_tlength,
+        )
 
         # ADD user input message ("triggering prompt")
         prompt.append(cycle_instruction_msg)
 
         return prompt
 
-    def on_before_think(self, prompt: ChatSequence, instruction: str) -> ChatSequence:
+    # This can be expanded to support multiple types of (inter)actions within an agent
+    def response_format_instruction(self, thought_process_id: ThoughtProcessID) -> str:
+        if thought_process_id != "one-shot":
+            raise NotImplementedError(f"Unknown thought process '{thought_process_id}'")
+
+        RESPONSE_FORMAT_WITH_COMMAND = """```ts
+        interface Response {
+            thoughts: {
+                // Thoughts
+                text: string;
+                reasoning: string;
+                // Short markdown-style bullet list that conveys the long-term plan
+                plan: string;
+                // Constructive self-criticism
+                criticism: string;
+                // Summary of thoughts to say to the user
+                speak: string;
+            };
+            command: {
+                name: string;
+                args: Record<string, any>;
+            };
+        }
+        ```"""
+
+        RESPONSE_FORMAT_WITHOUT_COMMAND = """```ts
+        interface Response {
+            thoughts: {
+                // Thoughts
+                text: string;
+                reasoning: string;
+                // Short markdown-style bullet list that conveys the long-term plan
+                plan: string;
+                // Constructive self-criticism
+                criticism: string;
+                // Summary of thoughts to say to the user
+                speak: string;
+            };
+        }
+        ```"""
+
+        response_format = re.sub(
+            r"\n\s+",
+            "\n",
+            RESPONSE_FORMAT_WITHOUT_COMMAND
+            if self.config.openai_functions
+            else RESPONSE_FORMAT_WITH_COMMAND,
+        )
+
+        use_functions = self.config.openai_functions and self.command_registry.commands
+        return (
+            f"Respond strictly with JSON{', and also specify a command to use through a function_call' if use_functions else ''}. "
+            "The JSON should be compatible with the TypeScript type `Response` from the following:\n"
+            f"{response_format}\n"
+        )
+
+    def on_before_think(
+        self,
+        prompt: ChatSequence,
+        thought_process_id: ThoughtProcessID,
+        instruction: str,
+    ) -> ChatSequence:
         """Called after constructing the prompt but before executing it.
 
         Calls the `on_planning` hook of any enabled and capable plugins, adding their
@@ -248,7 +318,11 @@ def on_before_think(self, prompt: ChatSequence, instruction: str) -> ChatSequenc
         return prompt
 
     def on_response(
-        self, llm_response: ChatModelResponse, prompt: ChatSequence, instruction: str
+        self,
+        llm_response: ChatModelResponse,
+        thought_process_id: ThoughtProcessID,
+        prompt: ChatSequence,
+        instruction: str,
     ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
         """Called upon receiving a response from the chat model.
 
@@ -271,7 +345,9 @@ def on_response(
         )  # FIXME: support function calls
 
         try:
-            return self.parse_and_process_response(llm_response, prompt, instruction)
+            return self.parse_and_process_response(
+                llm_response, thought_process_id, prompt, instruction
+            )
         except SyntaxError as e:
             logger.error(f"Response could not be parsed: {e}")
             # TODO: tune this message
@@ -286,7 +362,11 @@ def on_response(
 
     @abstractmethod
     def parse_and_process_response(
-        self, llm_response: ChatModelResponse, prompt: ChatSequence, instruction: str
+        self,
+        llm_response: ChatModelResponse,
+        thought_process_id: ThoughtProcessID,
+        prompt: ChatSequence,
+        instruction: str,
     ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
         """Validate, parse & process the LLM's response.
 
diff --git a/autogpt/config/ai_config.py b/autogpt/config/ai_config.py
index 632bc4b34f04..ce26e23dd48b 100644
--- a/autogpt/config/ai_config.py
+++ b/autogpt/config/ai_config.py
@@ -148,15 +148,6 @@ def construct_full_prompt(
 
             full_prompt_parts.append(f"The OS you are running on is: {os_info}")
 
-        if self.ai_goals:
-            full_prompt_parts += "\n".join(
-                [
-                    "## Goals",
-                    "In service of the user, you have the following goals:",
-                    *[f"{i+1}. {goal}" for i, goal in enumerate(self.ai_goals)],
-                ]
-            )
-
         additional_constraints: list[str] = []
         if self.api_budget > 0.0:
             additional_constraints.append(
@@ -170,4 +161,15 @@ def construct_full_prompt(
             )
         )
 
+        if self.ai_goals:
+            full_prompt_parts.append(
+                "\n".join(
+                    [
+                        "## Goals",
+                        "For your task, you must fulfill the following goals:",
+                        *[f"{i+1}. {goal}" for i, goal in enumerate(self.ai_goals)],
+                    ]
+                )
+            )
+
         return "\n\n".join(full_prompt_parts).strip("\n")
diff --git a/autogpt/llm/utils/__init__.py b/autogpt/llm/utils/__init__.py
index e433476ec0be..9eceae0db9e5 100644
--- a/autogpt/llm/utils/__init__.py
+++ b/autogpt/llm/utils/__init__.py
@@ -154,6 +154,9 @@ def create_chat_completion(
             function.schema for function in functions
         ]
 
+    # Print full prompt to debug log
+    logger.debug(prompt.dump())
+
     response = iopenai.create_chat_completion(
         messages=prompt.raw(),
         **chat_completion_kwargs,
diff --git a/autogpt/prompts/prompt.py b/autogpt/prompts/prompt.py
index d40a0d328807..627b6c50f189 100644
--- a/autogpt/prompts/prompt.py
+++ b/autogpt/prompts/prompt.py
@@ -2,7 +2,11 @@
 from autogpt.config.prompt_config import PromptConfig
 from autogpt.prompts.generator import PromptGenerator
 
-DEFAULT_TRIGGERING_PROMPT = "Determine exactly one command to use, and respond using the JSON schema specified previously:"
+DEFAULT_TRIGGERING_PROMPT = (
+    "Determine exactly one command to use based on the given goals "
+    "and the progress you have made so far, "
+    "and respond using the JSON schema specified previously:"
+)
 
 
 def build_default_prompt_generator(config: Config) -> PromptGenerator:
diff --git a/prompt_settings.yaml b/prompt_settings.yaml
index 8ec5302feda4..a83ca6225bba 100644
--- a/prompt_settings.yaml
+++ b/prompt_settings.yaml
@@ -7,7 +7,8 @@ constraints: [
 resources: [
   'Internet access for searches and information gathering.',
   'Long Term memory management.',
-  'File output.'
+  'File output.',
+  'Command execution'
 ]
 best_practices: [
   'Continuously review and analyze your actions to ensure you are performing to the best of your abilities.',

From e8115869410e989f8178405cb626fc836e4e7e3b Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Sat, 29 Jul 2023 20:28:50 +0200
Subject: [PATCH 06/20] WIP: PlanningAgent

---
 autogpt/agents/planning_agent.py | 335 +++++++++++++++++++++++++++++++
 autogpt/memory/agent_history.py  |  80 ++++++++
 2 files changed, 415 insertions(+)
 create mode 100644 autogpt/agents/planning_agent.py
 create mode 100644 autogpt/memory/agent_history.py

diff --git a/autogpt/agents/planning_agent.py b/autogpt/agents/planning_agent.py
new file mode 100644
index 000000000000..2865723189cd
--- /dev/null
+++ b/autogpt/agents/planning_agent.py
@@ -0,0 +1,335 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from typing import TYPE_CHECKING, Any, Literal, Optional
+
+if TYPE_CHECKING:
+    from autogpt.config import AIConfig, Config
+    from autogpt.llm.base import ChatModelResponse, ChatSequence
+    from autogpt.memory.vector import VectorMemory
+    from autogpt.models.command_registry import CommandRegistry
+
+from autogpt.json_utils.utilities import extract_dict_from_response, validate_dict
+from autogpt.llm.api_manager import ApiManager
+from autogpt.llm.base import Message
+from autogpt.llm.utils import count_string_tokens
+from autogpt.logs import logger
+from autogpt.logs.log_cycle import (
+    CURRENT_CONTEXT_FILE_NAME,
+    FULL_MESSAGE_HISTORY_FILE_NAME,
+    NEXT_ACTION_FILE_NAME,
+    USER_INPUT_FILE_NAME,
+    LogCycleHandler,
+)
+from autogpt.memory.agent_history import ActionHistory
+from autogpt.workspace import Workspace
+
+from .base import AgentThoughts, BaseAgent, CommandArgs, CommandName
+
+PLANNING_AGENT_SYSTEM_PROMPT = """You are an AI agent named {ai_name}"""
+
+
+class PlanningAgent(BaseAgent):
+    """Agent class for interacting with Auto-GPT."""
+
+    ThoughtProcessID = Literal["plan", "action", "evaluate"]
+
+    def __init__(
+        self,
+        ai_config: AIConfig,
+        command_registry: CommandRegistry,
+        memory: VectorMemory,
+        triggering_prompt: str,
+        config: Config,
+        cycle_budget: Optional[int] = None,
+    ):
+        super().__init__(
+            ai_config=ai_config,
+            command_registry=command_registry,
+            config=config,
+            default_cycle_instruction=triggering_prompt,
+            cycle_budget=cycle_budget,
+        )
+
+        self.memory = memory
+        """VectorMemoryProvider used to manage the agent's context (TODO)"""
+
+        self.workspace = Workspace(config.workspace_path, config.restrict_to_workspace)
+        """Workspace that the agent has access to, e.g. for reading/writing files."""
+
+        self.created_at = datetime.now().strftime("%Y%m%d_%H%M%S")
+        """Timestamp the agent was created; only used for structured debug logging."""
+
+        self.log_cycle_handler = LogCycleHandler()
+        """LogCycleHandler for structured debug logging."""
+
+        self.action_history = ActionHistory()
+
+        self.plan: list[str] = []
+        """List of steps that the Agent plans to take"""
+
+    def construct_base_prompt(
+        self,
+        thought_process_id: ThoughtProcessID,
+        prepend_messages: list[Message] = [],
+        append_messages: list[Message] = [],
+        reserve_tokens: int = 0,
+    ) -> ChatSequence:
+        match thought_process_id:
+            case "plan" | "action":
+                # Add the current plan to the prompt, if any
+                if self.plan:
+                    plan_section = [
+                        "## Plan",
+                        "To complete your task, you have made the following plan:",
+                    ]
+                    plan_section += [f"{i}. {s}" for i, s in enumerate(self.plan, 1)]
+
+                    # Add the actions so far to the prompt
+                    if self.action_history:
+                        plan_section += [
+                            "\n### Progress",
+                            "So far, you have executed the following actions based on the plan:",
+                        ]
+                        for i, cycle in enumerate(self.action_history, 1):
+                            if not (cycle.action and cycle.result):
+                                logger.warn(f"Incomplete action in history: {cycle}")
+                                continue
+
+                            result_description = (
+                                "successful"
+                                if cycle.result.success
+                                else f"not successful, with the following message: {cycle.result.reason}"
+                            )
+                            plan_section.append(
+                                f"{i}. You executed `{cycle.action.format_call()}`: "
+                                f"the result was {result_description}."
+                            )
+
+                    prepend_messages.append(Message("system", "\n".join(plan_section)))
+
+            case "evaluate":
+                pass
+            case _:
+                raise NotImplementedError(
+                    f"Unknown thought process '{thought_process_id}'"
+                )
+
+        prompt = ChatSequence.for_model(
+            self.llm.name,
+            [Message("system", self.system_prompt)] + prepend_messages,
+        )
+
+        # No message history; this makes it easier to develop & debug the prompt
+
+        if append_messages:
+            prompt.extend(append_messages)
+
+        return prompt
+
+    def on_before_think(self, *args, **kwargs) -> ChatSequence:
+        prompt = super().on_before_think(*args, **kwargs)
+
+        self.log_cycle_handler.log_count_within_cycle = 0
+        self.log_cycle_handler.log_cycle(
+            self.ai_config.ai_name,
+            self.created_at,
+            self.cycle_count,
+            self.history.raw(),
+            FULL_MESSAGE_HISTORY_FILE_NAME,
+        )
+        self.log_cycle_handler.log_cycle(
+            self.ai_config.ai_name,
+            self.created_at,
+            self.cycle_count,
+            prompt.raw(),
+            CURRENT_CONTEXT_FILE_NAME,
+        )
+        return prompt
+
+    def execute(
+        self,
+        command_name: str | None,
+        command_args: dict[str, str] | None,
+        user_input: str | None,
+    ) -> str:
+        # Execute command
+        if command_name is not None and command_name.lower().startswith("error"):
+            result = f"Could not execute command: {command_name}{command_args}"
+        elif command_name == "human_feedback":
+            result = f"Human feedback: {user_input}"
+            self.log_cycle_handler.log_cycle(
+                self.ai_config.ai_name,
+                self.created_at,
+                self.cycle_count,
+                user_input,
+                USER_INPUT_FILE_NAME,
+            )
+
+        else:
+            for plugin in self.config.plugins:
+                if not plugin.can_handle_pre_command():
+                    continue
+                command_name, arguments = plugin.pre_command(command_name, command_args)
+            command_result = execute_command(
+                command_name=command_name,
+                arguments=command_args,
+                agent=self,
+            )
+            result = f"Command {command_name} returned: " f"{command_result}"
+
+            result_tlength = count_string_tokens(str(command_result), self.llm.name)
+            memory_tlength = count_string_tokens(
+                str(self.history.summary_message()), self.llm.name
+            )
+            if result_tlength + memory_tlength > self.send_token_limit:
+                result = f"Failure: command {command_name} returned too much output. \
+                    Do not execute this command again with the same arguments."
+
+            for plugin in self.config.plugins:
+                if not plugin.can_handle_post_command():
+                    continue
+                result = plugin.post_command(command_name, result)
+        # Check if there's a result from the command append it to the message
+        if result is None:
+            self.history.add("system", "Unable to execute command", "action_result")
+        else:
+            self.history.add("system", result, "action_result")
+
+        return result
+
+    def parse_and_process_response(
+        self, llm_response: ChatModelResponse, *args, **kwargs
+    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+        if not llm_response.content:
+            raise SyntaxError("Assistant response has no text content")
+
+        assistant_reply_dict = extract_dict_from_response(llm_response.content)
+
+        valid, errors = validate_dict(assistant_reply_dict, self.config)
+        if not valid:
+            raise SyntaxError(
+                "Validation of response failed:\n  "
+                + ";\n  ".join([str(e) for e in errors])
+            )
+
+        for plugin in self.config.plugins:
+            if not plugin.can_handle_post_planning():
+                continue
+            assistant_reply_dict = plugin.post_planning(assistant_reply_dict)
+
+        response = None, None, assistant_reply_dict
+
+        # Print Assistant thoughts
+        if assistant_reply_dict != {}:
+            # Get command name and arguments
+            try:
+                command_name, arguments = extract_command(
+                    assistant_reply_dict, llm_response, self.config
+                )
+                response = command_name, arguments, assistant_reply_dict
+            except Exception as e:
+                logger.error("Error: \n", str(e))
+
+        self.log_cycle_handler.log_cycle(
+            self.ai_config.ai_name,
+            self.created_at,
+            self.cycle_count,
+            assistant_reply_dict,
+            NEXT_ACTION_FILE_NAME,
+        )
+        return response
+
+
+def extract_command(
+    assistant_reply_json: dict, assistant_reply: ChatModelResponse, config: Config
+) -> tuple[str, dict[str, str]]:
+    """Parse the response and return the command name and arguments
+
+    Args:
+        assistant_reply_json (dict): The response object from the AI
+        assistant_reply (ChatModelResponse): The model response from the AI
+        config (Config): The config object
+
+    Returns:
+        tuple: The command name and arguments
+
+    Raises:
+        json.decoder.JSONDecodeError: If the response is not valid JSON
+
+        Exception: If any other error occurs
+    """
+    if config.openai_functions:
+        if assistant_reply.function_call is None:
+            return "Error:", {"message": "No 'function_call' in assistant reply"}
+        assistant_reply_json["command"] = {
+            "name": assistant_reply.function_call.name,
+            "args": json.loads(assistant_reply.function_call.arguments),
+        }
+    try:
+        if "command" not in assistant_reply_json:
+            return "Error:", {"message": "Missing 'command' object in JSON"}
+
+        if not isinstance(assistant_reply_json, dict):
+            return (
+                "Error:",
+                {
+                    "message": f"The previous message sent was not a dictionary {assistant_reply_json}"
+                },
+            )
+
+        command = assistant_reply_json["command"]
+        if not isinstance(command, dict):
+            return "Error:", {"message": "'command' object is not a dictionary"}
+
+        if "name" not in command:
+            return "Error:", {"message": "Missing 'name' field in 'command' object"}
+
+        command_name = command["name"]
+
+        # Use an empty dictionary if 'args' field is not present in 'command' object
+        arguments = command.get("args", {})
+
+        return command_name, arguments
+    except json.decoder.JSONDecodeError:
+        return "Error:", {"message": "Invalid JSON"}
+    # All other errors, return "Error: + error message"
+    except Exception as e:
+        return "Error:", {"message": str(e)}
+
+
+def execute_command(
+    command_name: str,
+    arguments: dict[str, str],
+    agent: PlanningAgent,
+) -> Any:
+    """Execute the command and return the result
+
+    Args:
+        command_name (str): The name of the command to execute
+        arguments (dict): The arguments for the command
+        agent (Agent): The agent that is executing the command
+
+    Returns:
+        str: The result of the command
+    """
+    try:
+        # Execute a native command with the same name or alias, if it exists
+        if command := agent.command_registry.get_command(command_name):
+            return command(**arguments, agent=agent)
+
+        # Handle non-native commands (e.g. from plugins)
+        for command in agent.ai_config.prompt_generator.commands:
+            if (
+                command_name == command.label.lower()
+                or command_name == command.name.lower()
+            ):
+                return command.function(**arguments)
+
+        raise RuntimeError(
+            f"Cannot execute '{command_name}': unknown command."
+            " Do not try to use this command again."
+        )
+    except Exception as e:
+        return f"Error: {str(e)}"
diff --git a/autogpt/memory/agent_history.py b/autogpt/memory/agent_history.py
new file mode 100644
index 000000000000..9e5cfff85b04
--- /dev/null
+++ b/autogpt/memory/agent_history.py
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Literal
+
+
+@dataclass
+class Action:
+    name: str
+    args: dict[str, Any]
+    reasoning: str
+
+    def format_call(self) -> str:
+        return f"{self.name}({', '.join([f'{a}={repr(v)}' for a, v in self.args.items()])})"
+
+
+@dataclass
+class ActionSuccessResult:
+    success: Literal[True]
+    results: Any
+
+
+@dataclass
+class ActionErrorResult:
+    success: Literal[False]
+    reason: str
+
+
+ActionResult = ActionSuccessResult | ActionErrorResult
+
+
+class ActionHistory:
+    """Utility container for an action history"""
+
+    @dataclass
+    class CycleRecord:
+        action: Action | None
+        result: ActionResult | None
+
+    cursor: int
+    cycles: list[CycleRecord]
+
+    def __init__(self, cycles: list[CycleRecord] = []):
+        self.cycles = cycles
+        self.cursor = len(self.cycles)
+
+    @property
+    def current_record(self) -> CycleRecord | None:
+        if self.cursor == len(self):
+            return None
+        return self[self.cursor]
+
+    def __getitem__(self, key: int) -> CycleRecord:
+        return self.cycles[key]
+
+    def __iter__(self):
+        return iter(self.cycles)
+
+    def __len__(self):
+        return len(self.cycles)
+
+    def __bool__(self):
+        return len(self.cycles) > 0
+
+    def register_action(self, action: Action) -> None:
+        if not self.current_record:
+            self.cycles.append(self.CycleRecord(None, None))
+            assert self.current_record
+        elif self.current_record.action:
+            raise ValueError("Action for current cycle already set")
+
+        self.current_record.action = action
+
+    def register_result(self, result: ActionResult) -> None:
+        if not self.current_record:
+            raise RuntimeError("Cannot register result for cycle without action")
+        elif self.current_record.result:
+            raise ValueError("Result for current cycle already set")
+
+        self.current_record.result = result

From 3d7bdd72334f1b76ceecadfd718059e6485f2508 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Sun, 30 Jul 2023 21:32:25 +0200
Subject: [PATCH 07/20] Disable message history by default on BaseAgent

---
 autogpt/agents/agent.py          |  3 +++
 autogpt/agents/base.py           | 32 ++++++++++++++++++--------------
 autogpt/agents/planning_agent.py | 22 +++++++---------------
 3 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/autogpt/agents/agent.py b/autogpt/agents/agent.py
index fa20ea587f1c..daa2be627abd 100644
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -97,6 +97,9 @@ def construct_base_prompt(self, *args, **kwargs) -> ChatSequence:
                 kwargs["append_messages"] = []
             kwargs["append_messages"].append(budget_msg)
 
+            # Include message history in base prompt
+            kwargs["with_message_history"] = True
+
         return super().construct_base_prompt(*args, **kwargs)
 
     def on_before_think(self, *args, **kwargs) -> ChatSequence:
diff --git a/autogpt/agents/base.py b/autogpt/agents/base.py
index bf43b3769823..5d36e0f477f0 100644
--- a/autogpt/agents/base.py
+++ b/autogpt/agents/base.py
@@ -145,6 +145,7 @@ def construct_base_prompt(
         prepend_messages: list[Message] = [],
         append_messages: list[Message] = [],
         reserve_tokens: int = 0,
+        with_message_history: bool = False,
     ) -> ChatSequence:
         """Constructs and returns a prompt with the following structure:
         1. System prompt
@@ -163,20 +164,23 @@ def construct_base_prompt(
             [Message("system", self.system_prompt)] + prepend_messages,
         )
 
-        # Reserve tokens for messages to be appended later, if any
-        reserve_tokens += self.history.max_summary_tlength
-        if append_messages:
-            reserve_tokens += count_message_tokens(append_messages, self.llm.name)
-
-        # Fill message history, up to a margin of reserved_tokens.
-        # Trim remaining historical messages and add them to the running summary.
-        history_start_index = len(prompt)
-        trimmed_history = add_history_upto_token_limit(
-            prompt, self.history, self.send_token_limit - reserve_tokens
-        )
-        if trimmed_history:
-            new_summary_msg, _ = self.history.trim_messages(list(prompt), self.config)
-            prompt.insert(history_start_index, new_summary_msg)
+        if with_message_history:
+            # Reserve tokens for messages to be appended later, if any
+            reserve_tokens += self.history.max_summary_tlength
+            if append_messages:
+                reserve_tokens += count_message_tokens(append_messages, self.llm.name)
+
+            # Fill message history, up to a margin of reserved_tokens.
+            # Trim remaining historical messages and add them to the running summary.
+            history_start_index = len(prompt)
+            trimmed_history = add_history_upto_token_limit(
+                prompt, self.history, self.send_token_limit - reserve_tokens
+            )
+            if trimmed_history:
+                new_summary_msg, _ = self.history.trim_messages(
+                    list(prompt), self.config
+                )
+                prompt.insert(history_start_index, new_summary_msg)
 
         if append_messages:
             prompt.extend(append_messages)
diff --git a/autogpt/agents/planning_agent.py b/autogpt/agents/planning_agent.py
index 2865723189cd..6e9907bce48b 100644
--- a/autogpt/agents/planning_agent.py
+++ b/autogpt/agents/planning_agent.py
@@ -70,12 +70,12 @@ def __init__(
         """List of steps that the Agent plans to take"""
 
     def construct_base_prompt(
-        self,
-        thought_process_id: ThoughtProcessID,
-        prepend_messages: list[Message] = [],
-        append_messages: list[Message] = [],
-        reserve_tokens: int = 0,
+        self, thought_process_id: ThoughtProcessID, **kwargs
     ) -> ChatSequence:
+        prepend_messages = kwargs["prepend_messages"] = kwargs.get(
+            "prepend_messages", []
+        )
+
         match thought_process_id:
             case "plan" | "action":
                 # Add the current plan to the prompt, if any
@@ -116,18 +116,10 @@ def construct_base_prompt(
                     f"Unknown thought process '{thought_process_id}'"
                 )
 
-        prompt = ChatSequence.for_model(
-            self.llm.name,
-            [Message("system", self.system_prompt)] + prepend_messages,
+        return super().construct_base_prompt(
+            thought_process_id=thought_process_id, **kwargs
         )
 
-        # No message history; this makes it easier to develop & debug the prompt
-
-        if append_messages:
-            prompt.extend(append_messages)
-
-        return prompt
-
     def on_before_think(self, *args, **kwargs) -> ChatSequence:
         prompt = super().on_before_think(*args, **kwargs)
 

From 248a43db667cffa2a6e181e8a4129b9f9417e27a Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 31 Jul 2023 00:54:37 +0200
Subject: [PATCH 08/20] Add CommandOutput and ThoughtProcessOutput type aliases

---
 autogpt/agents/agent.py      | 5 +++--
 autogpt/agents/base.py       | 7 ++++---
 autogpt/command_decorator.py | 6 +++---
 autogpt/models/command.py    | 4 +++-
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/autogpt/agents/agent.py b/autogpt/agents/agent.py
index fa20ea587f1c..bafbce46d7db 100644
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -23,6 +23,7 @@
     USER_INPUT_FILE_NAME,
     LogCycleHandler,
 )
+from autogpt.models.command import CommandOutput
 from autogpt.workspace import Workspace
 
 from .base import AgentThoughts, BaseAgent, CommandArgs, CommandName
@@ -172,7 +173,7 @@ def execute(
 
     def parse_and_process_response(
         self, llm_response: ChatModelResponse, *args, **kwargs
-    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+    ) -> Agent.ThoughtProcessOutput:
         if not llm_response.content:
             raise SyntaxError("Assistant response has no text content")
 
@@ -274,7 +275,7 @@ def execute_command(
     command_name: str,
     arguments: dict[str, str],
     agent: Agent,
-) -> Any:
+) -> CommandOutput:
     """Execute the command and return the result
 
     Args:
diff --git a/autogpt/agents/base.py b/autogpt/agents/base.py
index bf43b3769823..58bf882a375f 100644
--- a/autogpt/agents/base.py
+++ b/autogpt/agents/base.py
@@ -25,6 +25,7 @@ class BaseAgent(metaclass=ABCMeta):
     """Base class for all Auto-GPT agents."""
 
     ThoughtProcessID = Literal["one-shot"]
+    ThoughtProcessOutput = tuple[CommandName | None, CommandArgs | None, AgentThoughts]
 
     def __init__(
         self,
@@ -95,7 +96,7 @@ def think(
         self,
         instruction: Optional[str] = None,
         thought_process_id: ThoughtProcessID = "one-shot",
-    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+    ) -> ThoughtProcessOutput:
         """Runs the agent for one cycle.
 
         Params:
@@ -323,7 +324,7 @@ def on_response(
         thought_process_id: ThoughtProcessID,
         prompt: ChatSequence,
         instruction: str,
-    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+    ) -> ThoughtProcessOutput:
         """Called upon receiving a response from the chat model.
 
         Adds the last/newest message in the prompt and the response to `history`,
@@ -367,7 +368,7 @@ def parse_and_process_response(
         thought_process_id: ThoughtProcessID,
         prompt: ChatSequence,
         instruction: str,
-    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+    ) -> ThoughtProcessOutput:
         """Validate, parse & process the LLM's response.
 
         Must be implemented by derivative classes: no base implementation is provided,
diff --git a/autogpt/command_decorator.py b/autogpt/command_decorator.py
index d082d9bf5d7f..64d2967b517e 100644
--- a/autogpt/command_decorator.py
+++ b/autogpt/command_decorator.py
@@ -2,7 +2,7 @@
 from typing import Any, Callable, Optional, TypedDict
 
 from autogpt.config import Config
-from autogpt.models.command import Command, CommandParameter
+from autogpt.models.command import Command, CommandOutput, CommandParameter
 
 # Unique identifier for auto-gpt commands
 AUTO_GPT_COMMAND_IDENTIFIER = "auto_gpt_command"
@@ -21,10 +21,10 @@ def command(
     enabled: bool | Callable[[Config], bool] = True,
     disabled_reason: Optional[str] = None,
     aliases: list[str] = [],
-) -> Callable[..., Any]:
+) -> Callable[..., CommandOutput]:
     """The command decorator is used to create Command objects from ordinary functions."""
 
-    def decorator(func: Callable[..., Any]) -> Command:
+    def decorator(func: Callable[..., CommandOutput]) -> Command:
         typed_parameters = [
             CommandParameter(
                 name=param_name,
diff --git a/autogpt/models/command.py b/autogpt/models/command.py
index 614697861efb..5f105d9d0c4f 100644
--- a/autogpt/models/command.py
+++ b/autogpt/models/command.py
@@ -4,6 +4,8 @@
 
 from .command_parameter import CommandParameter
 
+CommandOutput = Any
+
 
 class Command:
     """A class representing a command.
@@ -18,7 +20,7 @@ def __init__(
         self,
         name: str,
         description: str,
-        method: Callable[..., Any],
+        method: Callable[..., CommandOutput],
         parameters: list[CommandParameter],
         enabled: bool | Callable[[Config], bool] = True,
         disabled_reason: Optional[str] = None,

From b72bc8743c1cc890e68b341133641d7e72e2acaf Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 31 Jul 2023 00:59:33 +0200
Subject: [PATCH 09/20] Fix interrupts in main.py

---
 autogpt/app/main.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/autogpt/app/main.py b/autogpt/app/main.py
index f8ac3ca4b0a6..1efaf6a43e7a 100644
--- a/autogpt/app/main.py
+++ b/autogpt/app/main.py
@@ -174,7 +174,7 @@ def run_auto_gpt(
     run_interaction_loop(agent)
 
 
-def _get_cycle_budget(continuous_mode: bool, continuous_limit: int) -> int | None:
+def _get_cycle_budget(continuous_mode: bool, continuous_limit: int) -> int | float:
     # Translate from the continuous_mode/continuous_limit config
     # to a cycle_budget (maximum number of cycles to run without checking in with the
     # user) and a count of cycles_remaining before we check in..
@@ -217,10 +217,9 @@ def run_interaction_loop(
 
     def graceful_agent_interrupt(signum: int, frame: Optional[FrameType]) -> None:
         nonlocal cycle_budget, cycles_remaining, spinner
-        if cycles_remaining in [0, 1, math.inf]:
+        if cycles_remaining in [0, 1]:
             logger.typewriter_log(
-                "Interrupt signal received. Stopping continuous command execution "
-                "immediately.",
+                "Interrupt signal received. Stopping Auto-GPT immediately.",
                 Fore.RED,
             )
             sys.exit()

From 5b4c33a0a18e12a152711ec4cf274a049f81938d Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 31 Jul 2023 01:08:15 +0200
Subject: [PATCH 10/20] Use custom exceptions and clean up exception/error
 handling

---
 autogpt/agents/agent.py             | 183 ++++++++++++++++------------
 autogpt/agents/base.py              |  17 +--
 autogpt/agents/utils/exceptions.py  |  56 +++++++++
 autogpt/app/main.py                 |  72 ++++++-----
 autogpt/commands/execute_code.py    |  28 +++--
 autogpt/commands/file_operations.py |  55 ++++-----
 autogpt/commands/git_operations.py  |   6 +-
 autogpt/commands/web_search.py      |   7 +-
 autogpt/commands/web_selenium.py    |  14 ++-
 autogpt/models/agent_actions.py     |  45 +++++++
 10 files changed, 318 insertions(+), 165 deletions(-)
 create mode 100644 autogpt/agents/utils/exceptions.py
 create mode 100644 autogpt/models/agent_actions.py

diff --git a/autogpt/agents/agent.py b/autogpt/agents/agent.py
index bafbce46d7db..a06b39b2590f 100644
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -3,7 +3,7 @@
 import json
 import time
 from datetime import datetime
-from typing import TYPE_CHECKING, Any, Optional
+from typing import TYPE_CHECKING, Optional
 
 if TYPE_CHECKING:
     from autogpt.config import AIConfig, Config
@@ -11,6 +11,12 @@
     from autogpt.memory.vector import VectorMemory
     from autogpt.models.command_registry import CommandRegistry
 
+from autogpt.agents.utils.exceptions import (
+    AgentException,
+    CommandExecutionError,
+    InvalidAgentResponseError,
+    UnknownCommandError,
+)
 from autogpt.json_utils.utilities import extract_dict_from_response, validate_dict
 from autogpt.llm.api_manager import ApiManager
 from autogpt.llm.base import Message
@@ -23,10 +29,16 @@
     USER_INPUT_FILE_NAME,
     LogCycleHandler,
 )
+from autogpt.models.agent_actions import (
+    ActionErrorResult,
+    ActionInterruptedByHuman,
+    ActionResult,
+    ActionSuccessResult,
+)
 from autogpt.models.command import CommandOutput
 from autogpt.workspace import Workspace
 
-from .base import AgentThoughts, BaseAgent, CommandArgs, CommandName
+from .base import BaseAgent
 
 
 class Agent(BaseAgent):
@@ -122,15 +134,14 @@ def on_before_think(self, *args, **kwargs) -> ChatSequence:
 
     def execute(
         self,
-        command_name: str | None,
-        command_args: dict[str, str] | None,
-        user_input: str | None,
-    ) -> str:
-        # Execute command
-        if command_name is not None and command_name.lower().startswith("error"):
-            result = f"Could not execute command: {command_name}{command_args}"
-        elif command_name == "human_feedback":
-            result = f"Human feedback: {user_input}"
+        command_name: str,
+        command_args: dict[str, str] = {},
+        user_input: str = "",
+    ) -> ActionResult:
+        result: ActionResult
+
+        if command_name == "human_feedback":
+            result = ActionInterruptedByHuman(user_input)
             self.log_cycle_handler.log_cycle(
                 self.ai_config.ai_name,
                 self.created_at,
@@ -144,30 +155,51 @@ def execute(
                 if not plugin.can_handle_pre_command():
                     continue
                 command_name, arguments = plugin.pre_command(command_name, command_args)
-            command_result = execute_command(
-                command_name=command_name,
-                arguments=command_args,
-                agent=self,
-            )
-            result = f"Command {command_name} returned: " f"{command_result}"
 
-            result_tlength = count_string_tokens(str(command_result), self.llm.name)
+            try:
+                return_value = execute_command(
+                    command_name=command_name,
+                    arguments=command_args,
+                    agent=self,
+                )
+                result = ActionSuccessResult(return_value)
+            except AgentException as e:
+                result = ActionErrorResult(e.message, e)
+
+            result_tlength = count_string_tokens(str(result), self.llm.name)
             memory_tlength = count_string_tokens(
                 str(self.history.summary_message()), self.llm.name
             )
             if result_tlength + memory_tlength > self.send_token_limit:
-                result = f"Failure: command {command_name} returned too much output. \
-                    Do not execute this command again with the same arguments."
+                result = ActionErrorResult(
+                    reason=f"Command {command_name} returned too much output. "
+                    "Do not execute this command again with the same arguments."
+                )
 
             for plugin in self.config.plugins:
                 if not plugin.can_handle_post_command():
                     continue
-                result = plugin.post_command(command_name, result)
+                if result.status == "success":
+                    result.results = plugin.post_command(command_name, result.results)
+                elif result.status == "error":
+                    result.reason = plugin.post_command(command_name, result.reason)
+
         # Check if there's a result from the command append it to the message
-        if result is None:
-            self.history.add("system", "Unable to execute command", "action_result")
-        else:
-            self.history.add("system", result, "action_result")
+        if result.status == "success":
+            self.history.add(
+                "system",
+                f"Command {command_name} returned: {result.results}",
+                "action_result",
+            )
+        elif result.status == "error":
+            message = f"Command {command_name} failed: {result.reason}"
+            if (
+                result.error
+                and isinstance(result.error, AgentException)
+                and result.error.hint
+            ):
+                message = message.rstrip(".") + f". {result.error.hint}"
+            self.history.add("system", message, "action_result")
 
         return result
 
@@ -175,34 +207,29 @@ def parse_and_process_response(
         self, llm_response: ChatModelResponse, *args, **kwargs
     ) -> Agent.ThoughtProcessOutput:
         if not llm_response.content:
-            raise SyntaxError("Assistant response has no text content")
+            raise InvalidAgentResponseError("Assistant response has no text content")
 
-        assistant_reply_dict = extract_dict_from_response(llm_response.content)
-
-        valid, errors = validate_dict(assistant_reply_dict, self.config)
-        if not valid:
-            raise SyntaxError(
-                "Validation of response failed:\n  "
-                + ";\n  ".join([str(e) for e in errors])
-            )
+        response_content = llm_response.content
 
         for plugin in self.config.plugins:
             if not plugin.can_handle_post_planning():
                 continue
-            assistant_reply_dict = plugin.post_planning(assistant_reply_dict)
+            response_content = plugin.post_planning(response_content)
 
-        response = None, None, assistant_reply_dict
+        assistant_reply_dict = extract_dict_from_response(response_content)
 
-        # Print Assistant thoughts
-        if assistant_reply_dict != {}:
-            # Get command name and arguments
-            try:
-                command_name, arguments = extract_command(
-                    assistant_reply_dict, llm_response, self.config
-                )
-                response = command_name, arguments, assistant_reply_dict
-            except Exception as e:
-                logger.error("Error: \n", str(e))
+        _, errors = validate_dict(assistant_reply_dict, self.config)
+        if errors:
+            raise InvalidAgentResponseError(
+                "Validation of response failed:\n  "
+                + ";\n  ".join([str(e) for e in errors])
+            )
+
+        # Get command name and arguments
+        command_name, arguments = extract_command(
+            assistant_reply_dict, llm_response, self.config
+        )
+        response = command_name, arguments, assistant_reply_dict
 
         self.log_cycle_handler.log_cycle(
             self.ai_config.ai_name,
@@ -234,29 +261,26 @@ def extract_command(
     """
     if config.openai_functions:
         if assistant_reply.function_call is None:
-            return "Error:", {"message": "No 'function_call' in assistant reply"}
+            raise InvalidAgentResponseError("No 'function_call' in assistant reply")
         assistant_reply_json["command"] = {
             "name": assistant_reply.function_call.name,
             "args": json.loads(assistant_reply.function_call.arguments),
         }
     try:
-        if "command" not in assistant_reply_json:
-            return "Error:", {"message": "Missing 'command' object in JSON"}
-
         if not isinstance(assistant_reply_json, dict):
-            return (
-                "Error:",
-                {
-                    "message": f"The previous message sent was not a dictionary {assistant_reply_json}"
-                },
+            raise InvalidAgentResponseError(
+                f"The previous message sent was not a dictionary {assistant_reply_json}"
             )
 
+        if "command" not in assistant_reply_json:
+            raise InvalidAgentResponseError("Missing 'command' object in JSON")
+
         command = assistant_reply_json["command"]
         if not isinstance(command, dict):
-            return "Error:", {"message": "'command' object is not a dictionary"}
+            raise InvalidAgentResponseError("'command' object is not a dictionary")
 
         if "name" not in command:
-            return "Error:", {"message": "Missing 'name' field in 'command' object"}
+            raise InvalidAgentResponseError("Missing 'name' field in 'command' object")
 
         command_name = command["name"]
 
@@ -264,11 +288,12 @@ def extract_command(
         arguments = command.get("args", {})
 
         return command_name, arguments
+
     except json.decoder.JSONDecodeError:
-        return "Error:", {"message": "Invalid JSON"}
-    # All other errors, return "Error: + error message"
+        raise InvalidAgentResponseError("Invalid JSON")
+
     except Exception as e:
-        return "Error:", {"message": str(e)}
+        raise InvalidAgentResponseError(str(e))
 
 
 def execute_command(
@@ -286,22 +311,28 @@ def execute_command(
     Returns:
         str: The result of the command
     """
-    try:
-        # Execute a native command with the same name or alias, if it exists
-        if command := agent.command_registry.get_command(command_name):
+    # Execute a native command with the same name or alias, if it exists
+    if command := agent.command_registry.get_command(command_name):
+        try:
             return command(**arguments, agent=agent)
-
-        # Handle non-native commands (e.g. from plugins)
-        for command in agent.ai_config.prompt_generator.commands:
-            if (
-                command_name == command.label.lower()
-                or command_name == command.name.lower()
-            ):
+        except AgentException:
+            raise
+        except Exception as e:
+            raise CommandExecutionError(str(e))
+
+    # Handle non-native commands (e.g. from plugins)
+    for command in agent.ai_config.prompt_generator.commands:
+        if (
+            command_name == command.label.lower()
+            or command_name == command.name.lower()
+        ):
+            try:
                 return command.function(**arguments)
+            except AgentException:
+                raise
+            except Exception as e:
+                raise CommandExecutionError(str(e))
 
-        raise RuntimeError(
-            f"Cannot execute '{command_name}': unknown command."
-            " Do not try to use this command again."
-        )
-    except Exception as e:
-        return f"Error: {str(e)}"
+    raise UnknownCommandError(
+        f"Cannot execute command '{command_name}': unknown command."
+    )
diff --git a/autogpt/agents/base.py b/autogpt/agents/base.py
index 58bf882a375f..52ce285a451b 100644
--- a/autogpt/agents/base.py
+++ b/autogpt/agents/base.py
@@ -9,11 +9,13 @@
 
     from autogpt.models.command_registry import CommandRegistry
 
+from autogpt.agents.utils.exceptions import InvalidAgentResponseError
 from autogpt.llm.base import ChatModelResponse, ChatSequence, Message
 from autogpt.llm.providers.openai import OPEN_AI_CHAT_MODELS, get_openai_command_specs
 from autogpt.llm.utils import count_message_tokens, create_chat_completion
 from autogpt.logs import logger
 from autogpt.memory.message_history import MessageHistory
+from autogpt.models.agent_actions import ActionResult
 from autogpt.prompts.prompt import DEFAULT_TRIGGERING_PROMPT
 
 CommandName = str
@@ -25,7 +27,7 @@ class BaseAgent(metaclass=ABCMeta):
     """Base class for all Auto-GPT agents."""
 
     ThoughtProcessID = Literal["one-shot"]
-    ThoughtProcessOutput = tuple[CommandName | None, CommandArgs | None, AgentThoughts]
+    ThoughtProcessOutput = tuple[CommandName, CommandArgs, AgentThoughts]
 
     def __init__(
         self,
@@ -124,10 +126,10 @@ def think(
     @abstractmethod
     def execute(
         self,
-        command_name: str | None,
-        command_args: dict[str, str] | None,
-        user_input: str | None,
-    ) -> str:
+        command_name: str,
+        command_args: dict[str, str] = {},
+        user_input: str = "",
+    ) -> ActionResult:
         """Executes the given command, if any, and returns the agent's response.
 
         Params:
@@ -349,15 +351,14 @@ def on_response(
             return self.parse_and_process_response(
                 llm_response, thought_process_id, prompt, instruction
             )
-        except SyntaxError as e:
-            logger.error(f"Response could not be parsed: {e}")
+        except InvalidAgentResponseError as e:
             # TODO: tune this message
             self.history.add(
                 "system",
                 f"Your response could not be parsed: {e}"
                 "\n\nRemember to only respond using the specified format above!",
             )
-            return None, None, {}
+            raise
 
         # TODO: update memory/context
 
diff --git a/autogpt/agents/utils/exceptions.py b/autogpt/agents/utils/exceptions.py
new file mode 100644
index 000000000000..d6f2d74e9261
--- /dev/null
+++ b/autogpt/agents/utils/exceptions.py
@@ -0,0 +1,56 @@
+from typing import Optional
+
+
+class AgentException(Exception):
+    """Base class for specific exceptions relevant in the execution of Agents"""
+
+    message: str
+
+    hint: Optional[str] = None
+    """A hint which can be passed to the LLM to reduce reoccurrence of this error"""
+
+    def __init__(self, message: str, *args):
+        self.message = message
+        super().__init__(message, *args)
+
+
+class ConfigurationError(AgentException):
+    """Error caused by invalid, incompatible or otherwise incorrect configuration"""
+
+
+class InvalidAgentResponseError(AgentException):
+    """The LLM deviated from the prescribed response format"""
+
+
+class UnknownCommandError(AgentException):
+    """The AI tried to use an unknown command"""
+
+    hint = "Do not try to use this command again."
+
+
+class DuplicateOperationError(AgentException):
+    """The proposed operation has already been executed"""
+
+
+class CommandExecutionError(AgentException):
+    """An error occured when trying to execute the command"""
+
+
+class InvalidArgumentError(CommandExecutionError):
+    """The command received an invalid argument"""
+
+
+class OperationNotAllowedError(CommandExecutionError):
+    """The agent is not allowed to execute the proposed operation"""
+
+
+class AccessDeniedError(CommandExecutionError):
+    """The operation failed because access to a required resource was denied"""
+
+
+class CodeExecutionError(CommandExecutionError):
+    """The operation (an attempt to run arbitrary code) returned an error"""
+
+
+class TooMuchOutputError(CommandExecutionError):
+    """The operation generated more output than what the Agent can process"""
diff --git a/autogpt/app/main.py b/autogpt/app/main.py
index 1efaf6a43e7a..b3d5090961fd 100644
--- a/autogpt/app/main.py
+++ b/autogpt/app/main.py
@@ -11,6 +11,7 @@
 from colorama import Fore, Style
 
 from autogpt.agents import Agent, AgentThoughts, CommandArgs, CommandName
+from autogpt.agents.utils.exceptions import InvalidAgentResponseError
 from autogpt.app.configurator import create_config
 from autogpt.app.setup import prompt_user
 from autogpt.commands import COMMAND_CATEGORIES
@@ -243,6 +244,9 @@ def graceful_agent_interrupt(signum: int, frame: Optional[FrameType]) -> None:
     # Application Main Loop #
     #########################
 
+    # Keep track of consecutive failures of the agent
+    consecutive_failures = 0
+
     while cycles_remaining > 0:
         logger.debug(f"Cycle budget: {cycle_budget}; remaining: {cycles_remaining}")
 
@@ -251,7 +255,20 @@ def graceful_agent_interrupt(signum: int, frame: Optional[FrameType]) -> None:
         ########
         # Have the agent determine the next action to take.
         with spinner:
-            command_name, command_args, assistant_reply_dict = agent.think()
+            try:
+                command_name, command_args, assistant_reply_dict = agent.think()
+            except InvalidAgentResponseError as e:
+                logger.warn(f"The agent's thoughts could not be parsed: {e}")
+                consecutive_failures += 1
+                if consecutive_failures >= 3:
+                    logger.error(
+                        f"The agent failed to output valid thoughts {consecutive_failures} "
+                        "times in a row. Terminating..."
+                    )
+                    sys.exit()
+                continue
+
+        consecutive_failures = 0
 
         ###############
         # Update User #
@@ -297,7 +314,7 @@ def graceful_agent_interrupt(signum: int, frame: Optional[FrameType]) -> None:
             else:  # user_feedback == UserFeedback.TEXT
                 command_name = "human_feedback"
         else:
-            user_input = None
+            user_input = ""
             # First log new-line so user can differentiate sections better in console
             logger.typewriter_log("\n")
             if cycles_remaining != math.inf:
@@ -314,19 +331,25 @@ def graceful_agent_interrupt(signum: int, frame: Optional[FrameType]) -> None:
         # and then having the decrement set it to 0, exiting the application.
         if command_name != "human_feedback":
             cycles_remaining -= 1
+
+        if not command_name:
+            continue
+
         result = agent.execute(command_name, command_args, user_input)
 
-        if result is not None:
-            logger.typewriter_log("SYSTEM: ", Fore.YELLOW, result)
-        else:
-            logger.typewriter_log("SYSTEM: ", Fore.YELLOW, "Unable to execute command")
+        if result.status == "success":
+            logger.typewriter_log("SYSTEM: ", Fore.YELLOW, result.results)
+        elif result.status == "error":
+            logger.warn(
+                f"Command {command_name} returned an error: {result.error or result.reason}"
+            )
 
 
 def update_user(
     config: Config,
     ai_config: AIConfig,
-    command_name: CommandName | None,
-    command_args: CommandArgs | None,
+    command_name: CommandName,
+    command_args: CommandArgs,
     assistant_reply_dict: AgentThoughts,
 ) -> None:
     """Prints the assistant's thoughts and the next command to the user.
@@ -341,30 +364,17 @@ def update_user(
 
     print_assistant_thoughts(ai_config.ai_name, assistant_reply_dict, config)
 
-    if command_name is not None:
-        if config.speak_mode:
-            say_text(f"I want to execute {command_name}", config)
+    if config.speak_mode:
+        say_text(f"I want to execute {command_name}", config)
 
-        # First log new-line so user can differentiate sections better in console
-        logger.typewriter_log("\n")
-        logger.typewriter_log(
-            "NEXT ACTION: ",
-            Fore.CYAN,
-            f"COMMAND = {Fore.CYAN}{remove_ansi_escape(command_name)}{Style.RESET_ALL}  "
-            f"ARGUMENTS = {Fore.CYAN}{command_args}{Style.RESET_ALL}",
-        )
-    elif command_name.lower().startswith("error"):
-        logger.typewriter_log(
-            "ERROR: ",
-            Fore.RED,
-            f"The Agent failed to select an action. " f"Error message: {command_name}",
-        )
-    else:
-        logger.typewriter_log(
-            "NO ACTION SELECTED: ",
-            Fore.RED,
-            f"The Agent failed to select an action.",
-        )
+    # First log new-line so user can differentiate sections better in console
+    logger.typewriter_log("\n")
+    logger.typewriter_log(
+        "NEXT ACTION: ",
+        Fore.CYAN,
+        f"COMMAND = {Fore.CYAN}{remove_ansi_escape(command_name)}{Style.RESET_ALL}  "
+        f"ARGUMENTS = {Fore.CYAN}{command_args}{Style.RESET_ALL}",
+    )
 
 
 def get_user_feedback(
diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 30e1e27ea8ca..a835d6d4d753 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -12,6 +12,13 @@
 from docker.models.containers import Container as DockerContainer
 
 from autogpt.agents.agent import Agent
+from autogpt.agents.utils.exceptions import (
+    AccessDeniedError,
+    CodeExecutionError,
+    CommandExecutionError,
+    InvalidArgumentError,
+    OperationNotAllowedError,
+)
 from autogpt.command_decorator import command
 from autogpt.config import Config
 from autogpt.logs import logger
@@ -60,7 +67,9 @@ def execute_python_code(code: str, name: str, agent: Agent) -> str:
     # so sanitization must be done here to prevent path traversal.
     file_path = agent.workspace.get_path(code_dir / name)
     if not file_path.is_relative_to(code_dir):
-        return "Error: 'name' argument resulted in path traversal, operation aborted"
+        raise AccessDeniedError(
+            "'name' argument resulted in path traversal, operation aborted"
+        )
 
     try:
         with open(file_path, "w+", encoding="utf-8") as f:
@@ -68,7 +77,7 @@ def execute_python_code(code: str, name: str, agent: Agent) -> str:
 
         return execute_python_file(str(file_path), agent)
     except Exception as e:
-        return f"Error: {str(e)}"
+        raise CommandExecutionError(*e.args)
 
 
 @command(
@@ -97,12 +106,12 @@ def execute_python_file(filename: str, agent: Agent) -> str:
     )
 
     if not filename.endswith(".py"):
-        return "Error: Invalid file type. Only .py files are allowed."
+        raise InvalidArgumentError("Invalid file type. Only .py files are allowed.")
 
     file_path = Path(filename)
     if not file_path.is_file():
         # Mimic the response that you get from the command line so that it's easier to identify
-        return (
+        raise InvalidArgumentError(
             f"python: can't open file '{filename}': [Errno 2] No such file or directory"
         )
 
@@ -119,7 +128,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
         if result.returncode == 0:
             return result.stdout
         else:
-            return f"Error: {result.stderr}"
+            raise CodeExecutionError(result.stderr)
 
     logger.debug("Auto-GPT is not running in a Docker container")
     try:
@@ -178,10 +187,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
         logger.warn(
             "Could not run the script in a container. If you haven't already, please install Docker https://docs.docker.com/get-docker/"
         )
-        return f"Error: {str(e)}"
-
-    except Exception as e:
-        return f"Error: {str(e)}"
+        raise CommandExecutionError(f"Could not run the script in a container: {e}")
 
 
 def validate_command(command: str, config: Config) -> bool:
@@ -231,7 +237,7 @@ def execute_shell(command_line: str, agent: Agent) -> str:
     """
     if not validate_command(command_line, agent.config):
         logger.info(f"Command '{command_line}' not allowed")
-        return "Error: This Shell Command is not allowed."
+        raise OperationNotAllowedError("This shell command is not allowed.")
 
     current_dir = Path.cwd()
     # Change dir into workspace if necessary
@@ -278,7 +284,7 @@ def execute_shell_popen(command_line, agent: Agent) -> str:
     """
     if not validate_command(command_line, agent.config):
         logger.info(f"Command '{command_line}' not allowed")
-        return "Error: This Shell Command is not allowed."
+        raise OperationNotAllowedError("This shell command is not allowed.")
 
     current_dir = os.getcwd()
     # Change dir into workspace if necessary
diff --git a/autogpt/commands/file_operations.py b/autogpt/commands/file_operations.py
index 41da057e3788..afd1651eca6c 100644
--- a/autogpt/commands/file_operations.py
+++ b/autogpt/commands/file_operations.py
@@ -13,6 +13,7 @@
 from typing import Generator, Literal
 
 from autogpt.agents.agent import Agent
+from autogpt.agents.utils.exceptions import DuplicateOperationError
 from autogpt.command_decorator import command
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, VectorMemory
@@ -151,17 +152,14 @@ def read_file(filename: str, agent: Agent) -> str:
     Returns:
         str: The contents of the file
     """
-    try:
-        content = read_textual_file(filename, logger)
+    content = read_textual_file(filename, logger)
 
-        # TODO: invalidate/update memory when file is edited
-        file_memory = MemoryItem.from_text_file(content, filename, agent.config)
-        if len(file_memory.chunks) > 1:
-            return file_memory.summary
+    # TODO: invalidate/update memory when file is edited
+    file_memory = MemoryItem.from_text_file(content, filename, agent.config)
+    if len(file_memory.chunks) > 1:
+        return file_memory.summary
 
-        return content
-    except Exception as e:
-        return f"Error: {str(e)}"
+    return content
 
 
 def ingest_file(
@@ -220,16 +218,14 @@ def write_to_file(filename: str, text: str, agent: Agent) -> str:
     """
     checksum = text_checksum(text)
     if is_duplicate_operation("write", filename, agent, checksum):
-        return "Error: File has already been updated."
-    try:
-        directory = os.path.dirname(filename)
-        os.makedirs(directory, exist_ok=True)
-        with open(filename, "w", encoding="utf-8") as f:
-            f.write(text)
-        log_operation("write", filename, agent, checksum)
-        return "File written to successfully."
-    except Exception as err:
-        return f"Error: {err}"
+        raise DuplicateOperationError("File has already been updated.")
+
+    directory = os.path.dirname(filename)
+    os.makedirs(directory, exist_ok=True)
+    with open(filename, "w", encoding="utf-8") as f:
+        f.write(text)
+    log_operation("write", filename, agent, checksum)
+    return "File written to successfully."
 
 
 @sanitize_path_arg("filename")
@@ -246,20 +242,17 @@ def append_to_file(
     Returns:
         str: A message indicating success or failure
     """
-    try:
-        directory = os.path.dirname(filename)
-        os.makedirs(directory, exist_ok=True)
-        with open(filename, "a", encoding="utf-8") as f:
-            f.write(text)
+    directory = os.path.dirname(filename)
+    os.makedirs(directory, exist_ok=True)
+    with open(filename, "a", encoding="utf-8") as f:
+        f.write(text)
 
-        if should_log:
-            with open(filename, "r", encoding="utf-8") as f:
-                checksum = text_checksum(f.read())
-            log_operation("append", filename, agent, checksum=checksum)
+    if should_log:
+        with open(filename, "r", encoding="utf-8") as f:
+            checksum = text_checksum(f.read())
+        log_operation("append", filename, agent, checksum=checksum)
 
-        return "Text appended successfully."
-    except Exception as err:
-        return f"Error: {err}"
+    return "Text appended successfully."
 
 
 @command(
diff --git a/autogpt/commands/git_operations.py b/autogpt/commands/git_operations.py
index f7f8186be161..917beaabe069 100644
--- a/autogpt/commands/git_operations.py
+++ b/autogpt/commands/git_operations.py
@@ -6,6 +6,7 @@
 from git.repo import Repo
 
 from autogpt.agents.agent import Agent
+from autogpt.agents.utils.exceptions import CommandExecutionError
 from autogpt.command_decorator import command
 from autogpt.url_utils.validators import validate_url
 
@@ -50,6 +51,7 @@ def clone_repository(url: str, clone_path: str, agent: Agent) -> str:
     )
     try:
         Repo.clone_from(url=auth_repo_url, to_path=clone_path)
-        return f"""Cloned {url} to {clone_path}"""
     except Exception as e:
-        return f"Error: {str(e)}"
+        raise CommandExecutionError(f"Could not clone repo: {e}")
+
+    return f"""Cloned {url} to {clone_path}"""
diff --git a/autogpt/commands/web_search.py b/autogpt/commands/web_search.py
index 49712049d472..483833604600 100644
--- a/autogpt/commands/web_search.py
+++ b/autogpt/commands/web_search.py
@@ -12,6 +12,7 @@
 from duckduckgo_search import DDGS
 
 from autogpt.agents.agent import Agent
+from autogpt.agents.utils.exceptions import CommandExecutionError, ConfigurationError
 from autogpt.command_decorator import command
 
 DUCKDUCKGO_MAX_ATTEMPTS = 3
@@ -119,9 +120,11 @@ def google(query: str, agent: Agent, num_results: int = 8) -> str | list[str]:
         ) == 403 and "invalid API key" in error_details.get("error", {}).get(
             "message", ""
         ):
-            return "Error: The provided Google API key is invalid or missing."
+            raise ConfigurationError(
+                "The provided Google API key is invalid or missing."
+            )
         else:
-            return f"Error: {e}"
+            raise CommandExecutionError(f"An unexpected error occurred: {e}")
     # google_result can be a list or a string depending on the search results
 
     # Return the list of search result URLs
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 2d978494a9d3..8f20d561b7f0 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -32,6 +32,7 @@
 from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
 
 from autogpt.agents.agent import Agent
+from autogpt.agents.utils.exceptions import CommandExecutionError
 from autogpt.command_decorator import command
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, get_memory
@@ -72,17 +73,22 @@ def browse_website(url: str, question: str, agent: Agent) -> str:
         # These errors are often quite long and include lots of context.
         # Just grab the first line.
         msg = e.msg.split("\n")[0]
-        return f"Error: {msg}"
+        raise CommandExecutionError(msg)
 
     add_header(driver)
-    summary = summarize_memorize_webpage(url, text, question, agent, driver)
+    summary = (
+        summarize_memorize_webpage(url, text, question, agent, driver) if text else None
+    )
     links = scrape_links_with_selenium(driver, url)
 
     # Limit links to 5
     if len(links) > 5:
         links = links[:5]
     close_browser(driver)
-    return f"Answer gathered from website: {summary}\n\nLinks: {links}"
+    if summary:
+        return f"Answer gathered from website: {summary}\n\nLinks: {links}"
+    else:
+        return f"Website did not contain any text.\n\nLinks: {links}"
 
 
 def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
@@ -229,7 +235,7 @@ def summarize_memorize_webpage(
         str: The summary of the text
     """
     if not text:
-        return "Error: No text to summarize"
+        raise ValueError("No text to summarize")
 
     text_length = len(text)
     logger.info(f"Text length: {text_length} characters")
diff --git a/autogpt/models/agent_actions.py b/autogpt/models/agent_actions.py
new file mode 100644
index 000000000000..82f219b2bc65
--- /dev/null
+++ b/autogpt/models/agent_actions.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any, Literal, Optional
+
+
+@dataclass
+class Action:
+    name: str
+    args: dict[str, Any]
+    reasoning: str
+
+    def format_call(self) -> str:
+        return f"{self.name}({', '.join([f'{a}={repr(v)}' for a, v in self.args.items()])})"
+
+
+@dataclass
+class ActionSuccessResult:
+    results: Any
+    status: Literal["success"] = "success"
+
+    def __str__(self) -> str:
+        return f"Action succeeded and returned: `{self.results}`"
+
+
+@dataclass
+class ActionErrorResult:
+    reason: str
+    error: Optional[Exception] = None
+    status: Literal["error"] = "error"
+
+    def __str__(self) -> str:
+        return f"Action failed: `{self.reason}`"
+
+
+@dataclass
+class ActionInterruptedByHuman:
+    feedback: str
+    status: Literal["interrupted_by_human"] = "interrupted_by_human"
+
+    def __str__(self) -> str:
+        return f'The user interrupted the action with the following feedback: "{self.feedback}"'
+
+
+ActionResult = ActionSuccessResult | ActionErrorResult | ActionInterruptedByHuman

From 67efb0c9ac3ffe3d3f28d4ed7612f4639ab61489 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 31 Jul 2023 01:29:31 +0200
Subject: [PATCH 11/20] Remove duplicate agent_history.py

---
 autogpt/memory/agent_history.py | 80 ---------------------------------
 1 file changed, 80 deletions(-)
 delete mode 100644 autogpt/memory/agent_history.py

diff --git a/autogpt/memory/agent_history.py b/autogpt/memory/agent_history.py
deleted file mode 100644
index 9e5cfff85b04..000000000000
--- a/autogpt/memory/agent_history.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Any, Literal
-
-
-@dataclass
-class Action:
-    name: str
-    args: dict[str, Any]
-    reasoning: str
-
-    def format_call(self) -> str:
-        return f"{self.name}({', '.join([f'{a}={repr(v)}' for a, v in self.args.items()])})"
-
-
-@dataclass
-class ActionSuccessResult:
-    success: Literal[True]
-    results: Any
-
-
-@dataclass
-class ActionErrorResult:
-    success: Literal[False]
-    reason: str
-
-
-ActionResult = ActionSuccessResult | ActionErrorResult
-
-
-class ActionHistory:
-    """Utility container for an action history"""
-
-    @dataclass
-    class CycleRecord:
-        action: Action | None
-        result: ActionResult | None
-
-    cursor: int
-    cycles: list[CycleRecord]
-
-    def __init__(self, cycles: list[CycleRecord] = []):
-        self.cycles = cycles
-        self.cursor = len(self.cycles)
-
-    @property
-    def current_record(self) -> CycleRecord | None:
-        if self.cursor == len(self):
-            return None
-        return self[self.cursor]
-
-    def __getitem__(self, key: int) -> CycleRecord:
-        return self.cycles[key]
-
-    def __iter__(self):
-        return iter(self.cycles)
-
-    def __len__(self):
-        return len(self.cycles)
-
-    def __bool__(self):
-        return len(self.cycles) > 0
-
-    def register_action(self, action: Action) -> None:
-        if not self.current_record:
-            self.cycles.append(self.CycleRecord(None, None))
-            assert self.current_record
-        elif self.current_record.action:
-            raise ValueError("Action for current cycle already set")
-
-        self.current_record.action = action
-
-    def register_result(self, result: ActionResult) -> None:
-        if not self.current_record:
-            raise RuntimeError("Cannot register result for cycle without action")
-        elif self.current_record.result:
-            raise ValueError("Result for current cycle already set")
-
-        self.current_record.result = result

From bcfdbfab7b982eb7da11f751efe599c8df325aff Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 31 Jul 2023 01:38:30 +0200
Subject: [PATCH 12/20] Update PlanningAgent from upstream

---
 autogpt/agents/planning_agent.py | 196 ++++++++++++++++++-------------
 1 file changed, 112 insertions(+), 84 deletions(-)

diff --git a/autogpt/agents/planning_agent.py b/autogpt/agents/planning_agent.py
index 6e9907bce48b..c200ccd6f27b 100644
--- a/autogpt/agents/planning_agent.py
+++ b/autogpt/agents/planning_agent.py
@@ -10,8 +10,13 @@
     from autogpt.memory.vector import VectorMemory
     from autogpt.models.command_registry import CommandRegistry
 
+from autogpt.agents.utils.exceptions import (
+    AgentException,
+    CommandExecutionError,
+    InvalidAgentResponseError,
+    UnknownCommandError,
+)
 from autogpt.json_utils.utilities import extract_dict_from_response, validate_dict
-from autogpt.llm.api_manager import ApiManager
 from autogpt.llm.base import Message
 from autogpt.llm.utils import count_string_tokens
 from autogpt.logs import logger
@@ -22,6 +27,14 @@
     USER_INPUT_FILE_NAME,
     LogCycleHandler,
 )
+from autogpt.models.agent_actions import (
+    ActionErrorResult,
+    ActionHistory,
+    ActionInterruptedByHuman,
+    ActionResult,
+    ActionSuccessResult,
+)
+from autogpt.models.command import CommandOutput
 from autogpt.memory.agent_history import ActionHistory
 from autogpt.workspace import Workspace
 
@@ -97,14 +110,9 @@ def construct_base_prompt(
                                 logger.warn(f"Incomplete action in history: {cycle}")
                                 continue
 
-                            result_description = (
-                                "successful"
-                                if cycle.result.success
-                                else f"not successful, with the following message: {cycle.result.reason}"
-                            )
                             plan_section.append(
-                                f"{i}. You executed `{cycle.action.format_call()}`: "
-                                f"the result was {result_description}."
+                                f"{i}. You executed the command `{cycle.action.format_call()}`, "
+                                f"which gave the result `{cycle.result}`."
                             )
 
                     prepend_messages.append(Message("system", "\n".join(plan_section)))
@@ -142,15 +150,14 @@ def on_before_think(self, *args, **kwargs) -> ChatSequence:
 
     def execute(
         self,
-        command_name: str | None,
-        command_args: dict[str, str] | None,
-        user_input: str | None,
-    ) -> str:
-        # Execute command
-        if command_name is not None and command_name.lower().startswith("error"):
-            result = f"Could not execute command: {command_name}{command_args}"
-        elif command_name == "human_feedback":
-            result = f"Human feedback: {user_input}"
+        command_name: str,
+        command_args: dict[str, str] = {},
+        user_input: str = "",
+    ) -> ActionResult:
+        result: ActionResult
+
+        if command_name == "human_feedback":
+            result = ActionInterruptedByHuman(user_input)
             self.log_cycle_handler.log_cycle(
                 self.ai_config.ai_name,
                 self.created_at,
@@ -164,65 +171,82 @@ def execute(
                 if not plugin.can_handle_pre_command():
                     continue
                 command_name, arguments = plugin.pre_command(command_name, command_args)
-            command_result = execute_command(
-                command_name=command_name,
-                arguments=command_args,
-                agent=self,
-            )
-            result = f"Command {command_name} returned: " f"{command_result}"
 
-            result_tlength = count_string_tokens(str(command_result), self.llm.name)
+            try:
+                return_value = execute_command(
+                    command_name=command_name,
+                    arguments=command_args,
+                    agent=self,
+                )
+
+                result = ActionSuccessResult(return_value)
+            except AgentException as e:
+                result = ActionErrorResult(e.message, e)
+
+            result_tlength = count_string_tokens(str(result), self.llm.name)
             memory_tlength = count_string_tokens(
                 str(self.history.summary_message()), self.llm.name
             )
             if result_tlength + memory_tlength > self.send_token_limit:
-                result = f"Failure: command {command_name} returned too much output. \
-                    Do not execute this command again with the same arguments."
+                result = ActionErrorResult(
+                    reason=f"Command {command_name} returned too much output. "
+                    "Do not execute this command again with the same arguments."
+                )
 
             for plugin in self.config.plugins:
                 if not plugin.can_handle_post_command():
                     continue
-                result = plugin.post_command(command_name, result)
+                if result.status == "success":
+                    result.results = plugin.post_command(command_name, result.results)
+                elif result.status == "error":
+                    result.reason = plugin.post_command(command_name, result.reason)
+
         # Check if there's a result from the command append it to the message
-        if result is None:
-            self.history.add("system", "Unable to execute command", "action_result")
-        else:
-            self.history.add("system", result, "action_result")
+        if result.status == "success":
+            self.history.add(
+                "system",
+                f"Command {command_name} returned: {result.results}",
+                "action_result",
+            )
+        elif result.status == "error":
+            message = f"Command {command_name} failed: {result.reason}"
+            if (
+                result.error
+                and isinstance(result.error, AgentException)
+                and result.error.hint
+            ):
+                message = message.rstrip(".") + f". {result.error.hint}"
+            self.history.add("system", message, "action_result")
 
         return result
 
     def parse_and_process_response(
         self, llm_response: ChatModelResponse, *args, **kwargs
-    ) -> tuple[CommandName | None, CommandArgs | None, AgentThoughts]:
+    ) -> PlanningAgent.ThoughtProcessOutput:
         if not llm_response.content:
-            raise SyntaxError("Assistant response has no text content")
-
-        assistant_reply_dict = extract_dict_from_response(llm_response.content)
+            raise InvalidAgentResponseError("Assistant response has no text content")
 
-        valid, errors = validate_dict(assistant_reply_dict, self.config)
-        if not valid:
-            raise SyntaxError(
-                "Validation of response failed:\n  "
-                + ";\n  ".join([str(e) for e in errors])
-            )
+        response_content = llm_response.content
 
         for plugin in self.config.plugins:
             if not plugin.can_handle_post_planning():
                 continue
-            assistant_reply_dict = plugin.post_planning(assistant_reply_dict)
+            response_content = plugin.post_planning(response_content)
 
-        response = None, None, assistant_reply_dict
+        assistant_reply_dict = extract_dict_from_response(response_content)
 
-        # Print Assistant thoughts
-        if assistant_reply_dict != {}:
-            # Get command name and arguments
-            try:
-                command_name, arguments = extract_command(
-                    assistant_reply_dict, llm_response, self.config
-                )
-                response = command_name, arguments, assistant_reply_dict
-            except Exception as e:
-                logger.error("Error: \n", str(e))
+        _, errors = validate_dict(assistant_reply_dict, self.config)
+        if errors:
+            raise InvalidAgentResponseError(
+                "Validation of response failed:\n  "
+                + ";\n  ".join([str(e) for e in errors])
+            )
+
+        # Get command name and arguments
+        command_name, arguments = extract_command(
+            assistant_reply_dict, llm_response, self.config
+        )
+        response = command_name, arguments, assistant_reply_dict
 
         self.log_cycle_handler.log_cycle(
             self.ai_config.ai_name,
@@ -254,29 +278,26 @@ def extract_command(
     """
     if config.openai_functions:
         if assistant_reply.function_call is None:
-            return "Error:", {"message": "No 'function_call' in assistant reply"}
+            raise InvalidAgentResponseError("No 'function_call' in assistant reply")
         assistant_reply_json["command"] = {
             "name": assistant_reply.function_call.name,
             "args": json.loads(assistant_reply.function_call.arguments),
         }
     try:
-        if "command" not in assistant_reply_json:
-            return "Error:", {"message": "Missing 'command' object in JSON"}
-
         if not isinstance(assistant_reply_json, dict):
-            return (
-                "Error:",
-                {
-                    "message": f"The previous message sent was not a dictionary {assistant_reply_json}"
-                },
+            raise InvalidAgentResponseError(
+                f"The previous message sent was not a dictionary {assistant_reply_json}"
             )
 
+        if "command" not in assistant_reply_json:
+            raise InvalidAgentResponseError("Missing 'command' object in JSON")
+
         command = assistant_reply_json["command"]
         if not isinstance(command, dict):
-            return "Error:", {"message": "'command' object is not a dictionary"}
+            raise InvalidAgentResponseError("'command' object is not a dictionary")
 
         if "name" not in command:
-            return "Error:", {"message": "Missing 'name' field in 'command' object"}
+            raise InvalidAgentResponseError("Missing 'name' field in 'command' object")
 
         command_name = command["name"]
 
@@ -284,18 +305,19 @@ def extract_command(
         arguments = command.get("args", {})
 
         return command_name, arguments
+
     except json.decoder.JSONDecodeError:
-        return "Error:", {"message": "Invalid JSON"}
-    # All other errors, return "Error: + error message"
+        raise InvalidAgentResponseError("Invalid JSON")
+
     except Exception as e:
-        return "Error:", {"message": str(e)}
+        raise InvalidAgentResponseError(str(e))
 
 
 def execute_command(
     command_name: str,
     arguments: dict[str, str],
     agent: PlanningAgent,
-) -> Any:
+) -> CommandOutput:
     """Execute the command and return the result
 
     Args:
@@ -306,22 +328,28 @@ def execute_command(
     Returns:
         str: The result of the command
     """
-    try:
-        # Execute a native command with the same name or alias, if it exists
-        if command := agent.command_registry.get_command(command_name):
+    # Execute a native command with the same name or alias, if it exists
+    if command := agent.command_registry.get_command(command_name):
+        try:
             return command(**arguments, agent=agent)
-
-        # Handle non-native commands (e.g. from plugins)
-        for command in agent.ai_config.prompt_generator.commands:
-            if (
-                command_name == command.label.lower()
-                or command_name == command.name.lower()
-            ):
+        except AgentException:
+            raise
+        except Exception as e:
+            raise CommandExecutionError(str(e))
+
+    # Handle non-native commands (e.g. from plugins)
+    for command in agent.ai_config.prompt_generator.commands:
+        if (
+            command_name == command.label.lower()
+            or command_name == command.name.lower()
+        ):
+            try:
                 return command.function(**arguments)
+            except AgentException:
+                raise
+            except Exception as e:
+                raise CommandExecutionError(str(e))
 
-        raise RuntimeError(
-            f"Cannot execute '{command_name}': unknown command."
-            " Do not try to use this command again."
-        )
-    except Exception as e:
-        return f"Error: {str(e)}"
+    raise UnknownCommandError(
+        f"Cannot execute command '{command_name}': unknown command."
+    )

From 191c3efe33a46ccb396665daae962db7271385e2 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 31 Jul 2023 02:44:57 +0200
Subject: [PATCH 13/20] WIP: Support for dynamic in-prompt context

---
 autogpt/agents/agent.py          |  11 ++
 autogpt/agents/planning_agent.py | 198 ++++++++++---------------------
 autogpt/agents/utils/context.py  |  23 ++++
 autogpt/models/agent_actions.py  |  53 ++++++++-
 autogpt/models/command.py        |   4 +-
 autogpt/models/context_item.py   |  76 ++++++++++++
 6 files changed, 226 insertions(+), 139 deletions(-)
 create mode 100644 autogpt/agents/utils/context.py
 create mode 100644 autogpt/models/context_item.py

diff --git a/autogpt/agents/agent.py b/autogpt/agents/agent.py
index 2ebd6e48c67d..1cbc79681157 100644
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -36,6 +36,7 @@
     ActionSuccessResult,
 )
 from autogpt.models.command import CommandOutput
+from autogpt.models.context_item import ContextItem
 from autogpt.workspace import Workspace
 
 from .base import BaseAgent
@@ -165,6 +166,16 @@ def execute(
                     arguments=command_args,
                     agent=self,
                 )
+
+                # Intercept ContextItem if one is returned by the command
+                if type(return_value) == tuple and isinstance(
+                    return_value[1], ContextItem
+                ):
+                    # self.context.add(return_value[1])
+                    # return_value = return_value[0]
+                    # HACK: use content of ContextItem as return value, for legacy support
+                    return_value = return_value[1]
+
                 result = ActionSuccessResult(return_value)
             except AgentException as e:
                 result = ActionErrorResult(e.message, e)
diff --git a/autogpt/agents/planning_agent.py b/autogpt/agents/planning_agent.py
index c200ccd6f27b..28f83be22b93 100644
--- a/autogpt/agents/planning_agent.py
+++ b/autogpt/agents/planning_agent.py
@@ -1,8 +1,7 @@
 from __future__ import annotations
 
-import json
 from datetime import datetime
-from typing import TYPE_CHECKING, Any, Literal, Optional
+from typing import TYPE_CHECKING, Literal, Optional
 
 if TYPE_CHECKING:
     from autogpt.config import AIConfig, Config
@@ -10,12 +9,7 @@
     from autogpt.memory.vector import VectorMemory
     from autogpt.models.command_registry import CommandRegistry
 
-from autogpt.agents.utils.exceptions import (
-    AgentException,
-    CommandExecutionError,
-    InvalidAgentResponseError,
-    UnknownCommandError,
-)
+from autogpt.agents.utils.exceptions import AgentException, InvalidAgentResponseError
 from autogpt.json_utils.utilities import extract_dict_from_response, validate_dict
 from autogpt.llm.base import Message
 from autogpt.llm.utils import count_string_tokens
@@ -34,13 +28,12 @@
     ActionResult,
     ActionSuccessResult,
 )
-from autogpt.models.command import CommandOutput
-from autogpt.memory.agent_history import ActionHistory
+from autogpt.models.context_item import ContextItem
 from autogpt.workspace import Workspace
 
-from .base import AgentThoughts, BaseAgent, CommandArgs, CommandName
-
-PLANNING_AGENT_SYSTEM_PROMPT = """You are an AI agent named {ai_name}"""
+from .agent import execute_command, extract_command
+from .base import BaseAgent
+from .utils.context import AgentContext
 
 
 class PlanningAgent(BaseAgent):
@@ -79,6 +72,9 @@ def __init__(
 
         self.action_history = ActionHistory()
 
+        self.context = AgentContext()
+        """Dynamic segment of the prompt, to provide the LLM with relevant context"""
+
         self.plan: list[str] = []
         """List of steps that the Agent plans to take"""
 
@@ -89,35 +85,53 @@ def construct_base_prompt(
             "prepend_messages", []
         )
 
-        match thought_process_id:
-            case "plan" | "action":
-                # Add the current plan to the prompt, if any
-                if self.plan:
-                    plan_section = [
-                        "## Plan",
-                        "To complete your task, you have made the following plan:",
-                    ]
-                    plan_section += [f"{i}. {s}" for i, s in enumerate(self.plan, 1)]
-
-                    # Add the actions so far to the prompt
-                    if self.action_history:
-                        plan_section += [
-                            "\n### Progress",
-                            "So far, you have executed the following actions based on the plan:",
-                        ]
-                        for i, cycle in enumerate(self.action_history, 1):
-                            if not (cycle.action and cycle.result):
-                                logger.warn(f"Incomplete action in history: {cycle}")
-                                continue
-
-                            plan_section.append(
-                                f"{i}. You executed the command `{cycle.action.format_call()}`, "
-                                f"which gave the result `{cycle.result}`."
-                            )
-
-                    prepend_messages.append(Message("system", "\n".join(plan_section)))
+        # Add the current plan to the prompt, if any
+        if self.plan:
+            plan_section = [
+                "## Plan",
+                "To complete your task, you have composed the following plan:",
+            ]
+            plan_section += [f"{i}. {s}" for i, s in enumerate(self.plan, 1)]
+
+            # Add the actions so far to the prompt
+            if self.action_history:
+                plan_section += [
+                    "\n### Progress",
+                    "So far, you have executed the following actions based on the plan:",
+                ]
+                for i, cycle in enumerate(self.action_history, 1):
+                    if not (cycle.action and cycle.result):
+                        logger.warn(f"Incomplete action in history: {cycle}")
+                        continue
+
+                    plan_section.append(
+                        f"{i}. You executed the command `{cycle.action.format_call()}`, "
+                        f"which gave the result `{cycle.result}`."
+                    )
+
+            prepend_messages.append(Message("system", "\n".join(plan_section)))
+
+        if self.context:
+            context_section = [
+                "## Context",
+                "Below is information that may be relevant to your task. These take up "
+                "part of your working memory, which is limited, so when a context item is "
+                "no longer relevant for your plan, use the `close_context_item` command to "
+                "free up some memory."
+                "\n",
+                self.context.format_numbered(),
+            ]
+            prepend_messages.append(Message("system", "\n".join(context_section)))
 
+        match thought_process_id:
+            case "plan":
+                # TODO: add planning instructions; details about what to pay attention to when planning
+                pass
+            case "action":
+                # TODO: need to insert the functions here again?
+                pass
             case "evaluate":
+                # TODO: insert latest action (with reasoning) + result + evaluation instructions
                 pass
             case _:
                 raise NotImplementedError(
@@ -179,6 +193,13 @@ def execute(
                     agent=self,
                 )
 
+                # Intercept ContextItem if one is returned by the command
+                if type(return_value) == tuple and isinstance(
+                    return_value[1], ContextItem
+                ):
+                    self.context.add(return_value[1])
+                    return_value = return_value[0]
+
                 result = ActionSuccessResult(return_value)
             except AgentException as e:
                 result = ActionErrorResult(e.message, e)
@@ -256,100 +277,3 @@ def parse_and_process_response(
             NEXT_ACTION_FILE_NAME,
         )
         return response
-
-
-def extract_command(
-    assistant_reply_json: dict, assistant_reply: ChatModelResponse, config: Config
-) -> tuple[str, dict[str, str]]:
-    """Parse the response and return the command name and arguments
-
-    Args:
-        assistant_reply_json (dict): The response object from the AI
-        assistant_reply (ChatModelResponse): The model response from the AI
-        config (Config): The config object
-
-    Returns:
-        tuple: The command name and arguments
-
-    Raises:
-        json.decoder.JSONDecodeError: If the response is not valid JSON
-
-        Exception: If any other error occurs
-    """
-    if config.openai_functions:
-        if assistant_reply.function_call is None:
-            raise InvalidAgentResponseError("No 'function_call' in assistant reply")
-        assistant_reply_json["command"] = {
-            "name": assistant_reply.function_call.name,
-            "args": json.loads(assistant_reply.function_call.arguments),
-        }
-    try:
-        if not isinstance(assistant_reply_json, dict):
-            raise InvalidAgentResponseError(
-                f"The previous message sent was not a dictionary {assistant_reply_json}"
-            )
-
-        if "command" not in assistant_reply_json:
-            raise InvalidAgentResponseError("Missing 'command' object in JSON")
-
-        command = assistant_reply_json["command"]
-        if not isinstance(command, dict):
-            raise InvalidAgentResponseError("'command' object is not a dictionary")
-
-        if "name" not in command:
-            raise InvalidAgentResponseError("Missing 'name' field in 'command' object")
-
-        command_name = command["name"]
-
-        # Use an empty dictionary if 'args' field is not present in 'command' object
-        arguments = command.get("args", {})
-
-        return command_name, arguments
-
-    except json.decoder.JSONDecodeError:
-        raise InvalidAgentResponseError("Invalid JSON")
-
-    except Exception as e:
-        raise InvalidAgentResponseError(str(e))
-
-
-def execute_command(
-    command_name: str,
-    arguments: dict[str, str],
-    agent: PlanningAgent,
-) -> CommandOutput:
-    """Execute the command and return the result
-
-    Args:
-        command_name (str): The name of the command to execute
-        arguments (dict): The arguments for the command
-        agent (Agent): The agent that is executing the command
-
-    Returns:
-        str: The result of the command
-    """
-    # Execute a native command with the same name or alias, if it exists
-    if command := agent.command_registry.get_command(command_name):
-        try:
-            return command(**arguments, agent=agent)
-        except AgentException:
-            raise
-        except Exception as e:
-            raise CommandExecutionError(str(e))
-
-    # Handle non-native commands (e.g. from plugins)
-    for command in agent.ai_config.prompt_generator.commands:
-        if (
-            command_name == command.label.lower()
-            or command_name == command.name.lower()
-        ):
-            try:
-                return command.function(**arguments)
-            except AgentException:
-                raise
-            except Exception as e:
-                raise CommandExecutionError(str(e))
-
-    raise UnknownCommandError(
-        f"Cannot execute command '{command_name}': unknown command."
-    )
diff --git a/autogpt/agents/utils/context.py b/autogpt/agents/utils/context.py
new file mode 100644
index 000000000000..23cf26b36731
--- /dev/null
+++ b/autogpt/agents/utils/context.py
@@ -0,0 +1,23 @@
+from autogpt.models.context_item import ContextItem
+
+
+class AgentContext:
+    items: list[ContextItem]
+
+    def __init__(self, items: list[ContextItem] = []):
+        self.items = items
+
+    def __bool__(self) -> bool:
+        return len(self.items) > 0
+
+    def add(self, item: ContextItem) -> None:
+        self.items.append(item)
+
+    def close(self, index: int) -> None:
+        self.items.pop(index - 1)
+
+    def clear(self) -> None:
+        self.items.clear()
+
+    def format_numbered(self) -> str:
+        return "\n\n".join([f"{i}. {c}" for i, c in enumerate(self.items, 1)])
diff --git a/autogpt/models/agent_actions.py b/autogpt/models/agent_actions.py
index 82f219b2bc65..2062e2d4be34 100644
--- a/autogpt/models/agent_actions.py
+++ b/autogpt/models/agent_actions.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Any, Literal, Optional
+from typing import Any, Iterator, Literal, Optional
 
 
 @dataclass
@@ -43,3 +43,54 @@ def __str__(self) -> str:
 
 
 ActionResult = ActionSuccessResult | ActionErrorResult | ActionInterruptedByHuman
+
+
+class ActionHistory:
+    """Utility container for an action history"""
+
+    @dataclass
+    class CycleRecord:
+        action: Action | None
+        result: ActionResult | None
+
+    cursor: int
+    cycles: list[CycleRecord]
+
+    def __init__(self, cycles: list[CycleRecord] = []):
+        self.cycles = cycles
+        self.cursor = len(self.cycles)
+
+    @property
+    def current_record(self) -> CycleRecord | None:
+        if self.cursor == len(self):
+            return None
+        return self[self.cursor]
+
+    def __getitem__(self, key: int) -> CycleRecord:
+        return self.cycles[key]
+
+    def __iter__(self) -> Iterator[CycleRecord]:
+        return iter(self.cycles)
+
+    def __len__(self) -> int:
+        return len(self.cycles)
+
+    def __bool__(self) -> bool:
+        return len(self.cycles) > 0
+
+    def register_action(self, action: Action) -> None:
+        if not self.current_record:
+            self.cycles.append(self.CycleRecord(None, None))
+            assert self.current_record
+        elif self.current_record.action:
+            raise ValueError("Action for current cycle already set")
+
+        self.current_record.action = action
+
+    def register_result(self, result: ActionResult) -> None:
+        if not self.current_record:
+            raise RuntimeError("Cannot register result for cycle without action")
+        elif self.current_record.result:
+            raise ValueError("Result for current cycle already set")
+
+        self.current_record.result = result
diff --git a/autogpt/models/command.py b/autogpt/models/command.py
index 5f105d9d0c4f..6cd3364c6b0d 100644
--- a/autogpt/models/command.py
+++ b/autogpt/models/command.py
@@ -3,8 +3,10 @@
 from autogpt.config import Config
 
 from .command_parameter import CommandParameter
+from .context_item import ContextItem
 
-CommandOutput = Any
+CommandReturnValue = Any
+CommandOutput = CommandReturnValue | tuple[CommandReturnValue, ContextItem]
 
 
 class Command:
diff --git a/autogpt/models/context_item.py b/autogpt/models/context_item.py
new file mode 100644
index 000000000000..664de136e819
--- /dev/null
+++ b/autogpt/models/context_item.py
@@ -0,0 +1,76 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional
+
+
+class ContextItem(ABC):
+    @property
+    @abstractmethod
+    def description(self) -> str:
+        """Description of the context item"""
+        ...
+
+    @property
+    @abstractmethod
+    def source(self) -> Optional[str]:
+        """A string indicating the source location of the context item"""
+        ...
+
+    @property
+    @abstractmethod
+    def content(self) -> str:
+        """The content represented by the context item"""
+        ...
+
+    def __str__(self) -> str:
+        return (
+            f"{self.description} (source: {self.source})\n"
+            "```\n"
+            f"{self.content}\n"
+            "```"
+        )
+
+
+@dataclass
+class FileContextItem(ContextItem):
+    file_path: Path
+    description: str
+
+    @property
+    def source(self) -> str:
+        return f"local file '{self.file_path}'"
+
+    @property
+    def content(self) -> str:
+        return self.file_path.read_text()
+
+
+@dataclass
+class FolderContextItem(ContextItem):
+    path: Path
+
+    def __post_init__(self) -> None:
+        assert self.path.exists(), "Selected path does not exist"
+        assert self.path.is_dir(), "Selected path is not a directory"
+
+    @property
+    def description(self) -> str:
+        return f"The contents of the folder '{self.path}' in the workspace"
+
+    @property
+    def source(self) -> str:
+        return f"local folder '{self.path}'"
+
+    @property
+    def content(self) -> str:
+        items = [f"{p.name}{'/' if p.is_dir() else ''}" for p in self.path.iterdir()]
+        items.sort()
+        return "\n".join(items)
+
+
+@dataclass
+class StaticContextItem(ContextItem):
+    description: str
+    source: Optional[str]
+    content: str

From b9d15b6bda9873277a497ff1424b1e1582792e14 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 31 Jul 2023 02:54:52 +0200
Subject: [PATCH 14/20] WIP: response formats for PlanningAgent three-stage
 cycle

---
 autogpt/agents/planning_agent.py | 95 ++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 4 deletions(-)

diff --git a/autogpt/agents/planning_agent.py b/autogpt/agents/planning_agent.py
index 28f83be22b93..bfe6cd2c1b3b 100644
--- a/autogpt/agents/planning_agent.py
+++ b/autogpt/agents/planning_agent.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import re
 from datetime import datetime
 from typing import TYPE_CHECKING, Literal, Optional
 
@@ -16,7 +17,6 @@
 from autogpt.logs import logger
 from autogpt.logs.log_cycle import (
     CURRENT_CONTEXT_FILE_NAME,
-    FULL_MESSAGE_HISTORY_FILE_NAME,
     NEXT_ACTION_FILE_NAME,
     USER_INPUT_FILE_NAME,
     LogCycleHandler,
@@ -142,6 +142,89 @@ def construct_base_prompt(
             thought_process_id=thought_process_id, **kwargs
         )
 
+    def response_format_instruction(self, thought_process_id: ThoughtProcessID) -> str:
+        match thought_process_id:
+            case "plan":
+                # TODO: add planning instructions; details about what to pay attention to when planning
+                response_format = f"""```ts
+                interface Response {{
+                    thoughts: {{
+                        // Thoughts
+                        text: string;
+                        // A short logical explanation about how the action is part of the earlier composed plan
+                        reasoning: string;
+                        // Constructive self-criticism
+                        criticism: string;
+                    }};
+                    // A plan to achieve the goals with the available resources and/or commands.
+                    plan: Array<{{
+                        // An actionable subtask
+                        subtask: string;
+                        // Criterium to determine whether the subtask has been completed
+                        completed_if: string;
+                    }}>;
+                }}
+                ```"""
+                pass
+            case "action":
+                # TODO: need to insert the functions here again?
+                response_format = """```ts
+                interface Response {
+                    thoughts: {
+                        // Thoughts
+                        text: string;
+                        // A short logical explanation about how the action is part of the earlier composed plan
+                        reasoning: string;
+                        // Constructive self-criticism
+                        criticism: string;
+                    };
+                    // The action to take, from the earlier specified list of commands
+                    command: {
+                        name: string;
+                        args: Record<string, any>;
+                    };
+                }
+                ```"""
+                pass
+            case "evaluate":
+                # TODO: insert latest action (with reasoning) + result + evaluation instructions
+                response_format = f"""```ts
+                interface Response {{
+                    thoughts: {{
+                        // Thoughts
+                        text: string;
+                        reasoning: string;
+                        // Constructive self-criticism
+                        criticism: string;
+                    }};
+                    result_evaluation: {{
+                        // A short logical explanation of why the given partial result does or does not complete the corresponding subtask
+                        reasoning: string;
+                        // Whether the current subtask has been completed
+                        completed: boolean;
+                        // An estimate of the progress (0.0 - 1.0) that has been made on the subtask with the actions that have been taken so far
+                        progress: float;
+                    }};
+                }}
+                ```"""
+                pass
+            case _:
+                raise NotImplementedError(
+                    f"Unknown thought process '{thought_process_id}'"
+                )
+
+        response_format = re.sub(
+            r"\n\s+",
+            "\n",
+            response_format,
+        )
+
+        return (
+            f"Respond strictly with JSON. The JSON should be compatible with "
+            "the TypeScript type `Response` from the following:\n"
+            f"{response_format}\n"
+        )
+
     def on_before_think(self, *args, **kwargs) -> ChatSequence:
         prompt = super().on_before_think(*args, **kwargs)
 
@@ -150,8 +233,8 @@ def on_before_think(self, *args, **kwargs) -> ChatSequence:
             self.ai_config.ai_name,
             self.created_at,
             self.cycle_count,
-            self.history.raw(),
-            FULL_MESSAGE_HISTORY_FILE_NAME,
+            self.action_history.cycles,
+            "action_history.json",
         )
         self.log_cycle_handler.log_cycle(
             self.ai_config.ai_name,
@@ -242,7 +325,11 @@ def execute(
         return result
 
     def parse_and_process_response(
-        self, llm_response: ChatModelResponse, *args, **kwargs
+        self,
+        llm_response: ChatModelResponse,
+        thought_process_id: ThoughtProcessID,
+        *args,
+        **kwargs,
     ) -> PlanningAgent.ThoughtProcessOutput:
         if not llm_response.content:
             raise InvalidAgentResponseError("Assistant response has no text content")

From 548888a37e77ed379eb6bfbbdde931d61b340461 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Wed, 9 Aug 2023 18:53:41 +0200
Subject: [PATCH 15/20] Remove browsing overlay & separate browsing from
 extraction code

---
 autogpt/commands/web_selenium.py | 132 +++++++++++++++----------------
 autogpt/js/overlay.js            |  29 -------
 2 files changed, 65 insertions(+), 96 deletions(-)
 delete mode 100644 autogpt/js/overlay.js

diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 5afc478560e7..b0bbae3f697b 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -2,15 +2,13 @@
 
 from __future__ import annotations
 
-from autogpt.llm.utils.token_counter import count_string_tokens
-
 COMMAND_CATEGORY = "web_browse"
 COMMAND_CATEGORY_TITLE = "Web Browsing"
 
 import logging
 from pathlib import Path
 from sys import platform
-from typing import Optional, Type
+from typing import TYPE_CHECKING, Optional, Type
 
 from bs4 import BeautifulSoup
 from selenium.common.exceptions import WebDriverException
@@ -34,9 +32,13 @@
 from webdriver_manager.firefox import GeckoDriverManager
 from webdriver_manager.microsoft import EdgeChromiumDriverManager as EdgeDriverManager
 
-from autogpt.agents.agent import Agent
+if TYPE_CHECKING:
+    from autogpt.config import Config
+    from autogpt.agents.agent import Agent
+
 from autogpt.agents.utils.exceptions import CommandExecutionError
 from autogpt.command_decorator import command
+from autogpt.llm.utils import count_string_tokens
 from autogpt.logs import logger
 from autogpt.memory.vector import MemoryItem, get_memory
 from autogpt.processing.html import extract_hyperlinks, format_hyperlinks
@@ -72,11 +74,9 @@ def browse_website(url: str, question: str, agent: Agent) -> str:
     """
     driver = None
     try:
-        driver, text = scrape_text_with_selenium(url, agent)
-
-        # TODO: remove. This overlay is purely for aesthetic purposes.
-        add_header(driver)
+        driver = open_page_in_browser(url, agent.config)
 
+        text = scrape_text_with_selenium(driver)
         links = scrape_links_with_selenium(driver, url)
 
         if not text:
@@ -99,14 +99,60 @@ def browse_website(url: str, question: str, agent: Agent) -> str:
             close_browser(driver)
 
 
-def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
-    """Scrape text from a website using selenium
+def scrape_text_with_selenium(driver: WebDriver) -> str:
+    """Scrape text from a browser window using selenium
+
+    Args:
+        driver (WebDriver): A driver object representing the browser window to scrape
+
+    Returns:
+        str: the text scraped from the website
+    """
+
+    # Get the HTML content directly from the browser's DOM
+    page_source = driver.execute_script("return document.body.outerHTML;")
+    soup = BeautifulSoup(page_source, "html.parser")
+
+    for script in soup(["script", "style"]):
+        script.extract()
+
+    text = soup.get_text()
+    lines = (line.strip() for line in text.splitlines())
+    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
+    text = "\n".join(chunk for chunk in chunks if chunk)
+    return text
+
+
+def scrape_links_with_selenium(driver: WebDriver, base_url: str) -> list[str]:
+    """Scrape links from a website using selenium
 
     Args:
-        url (str): The url of the website to scrape
+        driver (WebDriver): A driver object representing the browser window to scrape
+        base_url (str): The base URL to use for resolving relative links
+
+    Returns:
+        List[str]: The links scraped from the website
+    """
+    page_source = driver.page_source
+    soup = BeautifulSoup(page_source, "html.parser")
+
+    for script in soup(["script", "style"]):
+        script.extract()
+
+    hyperlinks = extract_hyperlinks(soup, base_url)
+
+    return format_hyperlinks(hyperlinks)
+
+
+def open_page_in_browser(url: str, config: Config) -> WebDriver:
+    """Open a browser window and load a web page using Selenium
+
+    Params:
+        url (str): The URL of the page to load
+        config (Config): The applicable application configuration
 
     Returns:
-        Tuple[WebDriver, str]: The webdriver and the text scraped from the website
+        driver (WebDriver): A driver object representing the browser window to scrape
     """
     logging.getLogger("selenium").setLevel(logging.CRITICAL)
 
@@ -117,23 +163,23 @@ def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
         "safari": SafariOptions,
     }
 
-    options: BrowserOptions = options_available[agent.config.selenium_web_browser]()
+    options: BrowserOptions = options_available[config.selenium_web_browser]()
     options.add_argument(
         "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.5615.49 Safari/537.36"
     )
 
-    if agent.config.selenium_web_browser == "firefox":
-        if agent.config.selenium_headless:
+    if config.selenium_web_browser == "firefox":
+        if config.selenium_headless:
             options.headless = True
             options.add_argument("--disable-gpu")
         driver = FirefoxDriver(
             service=GeckoDriverService(GeckoDriverManager().install()), options=options
         )
-    elif agent.config.selenium_web_browser == "edge":
+    elif config.selenium_web_browser == "edge":
         driver = EdgeDriver(
             service=EdgeDriverService(EdgeDriverManager().install()), options=options
         )
-    elif agent.config.selenium_web_browser == "safari":
+    elif config.selenium_web_browser == "safari":
         # Requires a bit more setup on the users end
         # See https://developer.apple.com/documentation/webkit/testing_with_webdriver_in_safari
         driver = SafariDriver(options=options)
@@ -143,7 +189,7 @@ def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
             options.add_argument("--remote-debugging-port=9222")
 
         options.add_argument("--no-sandbox")
-        if agent.config.selenium_headless:
+        if config.selenium_headless:
             options.add_argument("--headless=new")
             options.add_argument("--disable-gpu")
 
@@ -161,38 +207,7 @@ def scrape_text_with_selenium(url: str, agent: Agent) -> tuple[WebDriver, str]:
         EC.presence_of_element_located((By.TAG_NAME, "body"))
     )
 
-    # Get the HTML content directly from the browser's DOM
-    page_source = driver.execute_script("return document.body.outerHTML;")
-    soup = BeautifulSoup(page_source, "html.parser")
-
-    for script in soup(["script", "style"]):
-        script.extract()
-
-    text = soup.get_text()
-    lines = (line.strip() for line in text.splitlines())
-    chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-    text = "\n".join(chunk for chunk in chunks if chunk)
-    return driver, text
-
-
-def scrape_links_with_selenium(driver: WebDriver, url: str) -> list[str]:
-    """Scrape links from a website using selenium
-
-    Args:
-        driver (WebDriver): The webdriver to use to scrape the links
-
-    Returns:
-        List[str]: The links scraped from the website
-    """
-    page_source = driver.page_source
-    soup = BeautifulSoup(page_source, "html.parser")
-
-    for script in soup(["script", "style"]):
-        script.extract()
-
-    hyperlinks = extract_hyperlinks(soup, url)
-
-    return format_hyperlinks(hyperlinks)
+    return driver
 
 
 def close_browser(driver: WebDriver) -> None:
@@ -207,23 +222,6 @@ def close_browser(driver: WebDriver) -> None:
     driver.quit()
 
 
-def add_header(driver: WebDriver) -> None:
-    """Add a header to the website
-
-    Args:
-        driver (WebDriver): The webdriver to use to add the header
-
-    Returns:
-        None
-    """
-    try:
-        with open(f"{FILE_DIR}/js/overlay.js", "r") as overlay_file:
-            overlay_script = overlay_file.read()
-        driver.execute_script(overlay_script)
-    except Exception as e:
-        print(f"Error executing overlay.js: {e}")
-
-
 def summarize_memorize_webpage(
     url: str,
     text: str,
diff --git a/autogpt/js/overlay.js b/autogpt/js/overlay.js
deleted file mode 100644
index 1c99c7267333..000000000000
--- a/autogpt/js/overlay.js
+++ /dev/null
@@ -1,29 +0,0 @@
-const overlay = document.createElement('div');
-Object.assign(overlay.style, {
-    position: 'fixed',
-    zIndex: 999999,
-    top: 0,
-    left: 0,
-    width: '100%',
-    height: '100%',
-    background: 'rgba(0, 0, 0, 0.7)',
-    color: '#fff',
-    fontSize: '24px',
-    fontWeight: 'bold',
-    display: 'flex',
-    justifyContent: 'center',
-    alignItems: 'center',
-});
-const textContent = document.createElement('div');
-Object.assign(textContent.style, {
-    textAlign: 'center',
-});
-textContent.textContent = 'AutoGPT Analyzing Page';
-overlay.appendChild(textContent);
-document.body.append(overlay);
-document.body.style.overflow = 'hidden';
-let dotCount = 0;
-setInterval(() => {
-    textContent.textContent = 'AutoGPT Analyzing Page' + '.'.repeat(dotCount);
-    dotCount = (dotCount + 1) % 4;
-}, 1000);

From fdfc6c3844b6e17e4cc8aa80cb1ad9e8094c31c6 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Thu, 10 Aug 2023 15:46:45 +0200
Subject: [PATCH 16/20] Fix human feedback

---
 autogpt/agents/agent.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/autogpt/agents/agent.py b/autogpt/agents/agent.py
index a06b39b2590f..c5f5c06a67ab 100644
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -142,6 +142,11 @@ def execute(
 
         if command_name == "human_feedback":
             result = ActionInterruptedByHuman(user_input)
+            self.history.add(
+                "user",
+                "I interrupted the execution of the command you proposed "
+                f"to give you some feedback: {user_input}",
+            )
             self.log_cycle_handler.log_cycle(
                 self.ai_config.ai_name,
                 self.created_at,

From 7b146a04a49da60edfb91aa51a9ade20dd02b559 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Thu, 10 Aug 2023 16:53:22 +0200
Subject: [PATCH 17/20] Fix tests

---
 autogpt/commands/execute_code.py       |  2 +-
 autogpt/commands/web_search.py         |  5 ++--
 autogpt/commands/web_selenium.py       | 11 ++++++++
 tests/integration/test_execute_code.py | 37 +++++++++++++-------------
 tests/integration/test_update_user.py  | 33 -----------------------
 tests/integration/test_web_selenium.py | 14 +++++-----
 tests/unit/test_file_operations.py     |  9 ++++---
 tests/unit/test_git_commands.py        |  6 ++---
 tests/unit/test_web_search.py          | 14 +++++-----
 9 files changed, 54 insertions(+), 77 deletions(-)
 delete mode 100644 tests/integration/test_update_user.py

diff --git a/autogpt/commands/execute_code.py b/autogpt/commands/execute_code.py
index 9e20c46796bd..baab41443888 100644
--- a/autogpt/commands/execute_code.py
+++ b/autogpt/commands/execute_code.py
@@ -111,7 +111,7 @@ def execute_python_file(filename: str, agent: Agent) -> str:
     file_path = Path(filename)
     if not file_path.is_file():
         # Mimic the response that you get from the command line so that it's easier to identify
-        raise InvalidArgumentError(
+        raise FileNotFoundError(
             f"python: can't open file '{filename}': [Errno 2] No such file or directory"
         )
 
diff --git a/autogpt/commands/web_search.py b/autogpt/commands/web_search.py
index 483833604600..df066fea5cb9 100644
--- a/autogpt/commands/web_search.py
+++ b/autogpt/commands/web_search.py
@@ -12,7 +12,7 @@
 from duckduckgo_search import DDGS
 
 from autogpt.agents.agent import Agent
-from autogpt.agents.utils.exceptions import CommandExecutionError, ConfigurationError
+from autogpt.agents.utils.exceptions import ConfigurationError
 from autogpt.command_decorator import command
 
 DUCKDUCKGO_MAX_ATTEMPTS = 3
@@ -123,8 +123,7 @@ def google(query: str, agent: Agent, num_results: int = 8) -> str | list[str]:
             raise ConfigurationError(
                 "The provided Google API key is invalid or missing."
             )
-        else:
-            raise CommandExecutionError(f"An unexpected error occurred: {e}")
+        raise
     # google_result can be a list or a string depending on the search results
 
     # Return the list of search result URLs
diff --git a/autogpt/commands/web_selenium.py b/autogpt/commands/web_selenium.py
index 2c77c57244e3..7c4780583077 100644
--- a/autogpt/commands/web_selenium.py
+++ b/autogpt/commands/web_selenium.py
@@ -2,6 +2,8 @@
 
 from __future__ import annotations
 
+import re
+
 COMMAND_CATEGORY = "web_browse"
 COMMAND_CATEGORY_TITLE = "Web Browsing"
 
@@ -46,6 +48,10 @@
 LINKS_TO_RETURN = 20
 
 
+class BrowsingError(CommandExecutionError):
+    """An error occurred while trying to browse the page"""
+
+
 @command(
     "browse_website",
     "Browses a Website",
@@ -92,6 +98,11 @@ def browse_website(url: str, question: str, agent: Agent) -> str:
         # These errors are often quite long and include lots of context.
         # Just grab the first line.
         msg = e.msg.split("\n")[0]
+        if "net::" in msg:
+            raise BrowsingError(
+                f"A networking error occurred while trying to load the page: "
+                + re.sub(r"^unknown error: ", "", msg)
+            )
         raise CommandExecutionError(msg)
     finally:
         if driver:
diff --git a/tests/integration/test_execute_code.py b/tests/integration/test_execute_code.py
index ad0337a42752..b1e562536a68 100644
--- a/tests/integration/test_execute_code.py
+++ b/tests/integration/test_execute_code.py
@@ -1,6 +1,5 @@
 import os
 import random
-import re
 import string
 import tempfile
 
@@ -8,6 +7,11 @@
 
 import autogpt.commands.execute_code as sut  # system under testing
 from autogpt.agents.agent import Agent
+from autogpt.agents.utils.exceptions import (
+    AccessDeniedError,
+    InvalidArgumentError,
+    OperationNotAllowedError,
+)
 from autogpt.config import Config
 
 
@@ -53,11 +57,8 @@ def test_execute_python_code(random_code: str, random_string: str, agent: Agent)
 def test_execute_python_code_disallows_name_arg_path_traversal(
     random_code: str, agent: Agent
 ):
-    result: str = sut.execute_python_code(
-        random_code, name="../../test_code", agent=agent
-    )
-    assert "Error:" in result, "Path traversal in 'name' argument does not return error"
-    assert "path traversal" in result.lower()
+    with pytest.raises(AccessDeniedError, match="path traversal"):
+        sut.execute_python_code(random_code, name="../../test_code", agent=agent)
 
     # Check that the code is not stored in parent directory
     dst_with_traversal = agent.workspace.get_path("test_code.py")
@@ -82,16 +83,16 @@ def test_execute_python_code_overwrites_file(random_code: str, agent: Agent):
 
 
 def test_execute_python_file_invalid(agent: Agent):
-    assert all(
-        s in sut.execute_python_file("not_python", agent).lower()
-        for s in ["error:", "invalid", ".py"]
-    )
+    with pytest.raises(InvalidArgumentError):
+        sut.execute_python_file("not_python", agent)
 
 
 def test_execute_python_file_not_found(agent: Agent):
-    result = sut.execute_python_file("notexist.py", agent).lower()
-    assert re.match(r"python: can't open file '([A-Z]:)?[/\\\-\w]*notexist.py'", result)
-    assert "[errno 2] no such file or directory" in result
+    with pytest.raises(
+        FileNotFoundError,
+        match=r"python: can't open file '([a-zA-Z]:)?[/\\\-\w]*notexist.py': \[Errno 2\] No such file or directory",
+    ):
+        sut.execute_python_file("notexist.py", agent)
 
 
 def test_execute_shell(random_string: str, agent: Agent):
@@ -107,8 +108,8 @@ def test_execute_shell_local_commands_not_allowed(random_string: str, agent: Age
 def test_execute_shell_denylist_should_deny(agent: Agent, random_string: str):
     agent.config.shell_denylist = ["echo"]
 
-    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
-    assert "Error:" in result and "not allowed" in result
+    with pytest.raises(OperationNotAllowedError, match="not allowed"):
+        sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
 
 
 def test_execute_shell_denylist_should_allow(agent: Agent, random_string: str):
@@ -116,15 +117,14 @@ def test_execute_shell_denylist_should_allow(agent: Agent, random_string: str):
 
     result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
     assert "Hello" in result and random_string in result
-    assert "Error" not in result
 
 
 def test_execute_shell_allowlist_should_deny(agent: Agent, random_string: str):
     agent.config.shell_command_control = sut.ALLOWLIST_CONTROL
     agent.config.shell_allowlist = ["cat"]
 
-    result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
-    assert "Error:" in result and "not allowed" in result
+    with pytest.raises(OperationNotAllowedError, match="not allowed"):
+        sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
 
 
 def test_execute_shell_allowlist_should_allow(agent: Agent, random_string: str):
@@ -133,4 +133,3 @@ def test_execute_shell_allowlist_should_allow(agent: Agent, random_string: str):
 
     result = sut.execute_shell(f"echo 'Hello {random_string}!'", agent)
     assert "Hello" in result and random_string in result
-    assert "Error" not in result
diff --git a/tests/integration/test_update_user.py b/tests/integration/test_update_user.py
deleted file mode 100644
index bc9206317513..000000000000
--- a/tests/integration/test_update_user.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from unittest.mock import MagicMock, patch
-
-from colorama import Fore
-
-from autogpt.app.main import update_user
-
-
-def test_update_user_command_name_is_none() -> None:
-    # Mock necessary objects
-    config = MagicMock()
-    ai_config = MagicMock()
-    assistant_reply_dict = MagicMock()
-
-    # Mock print_assistant_thoughts and logger.typewriter_log
-    with patch(
-        "autogpt.app.main.print_assistant_thoughts"
-    ) as mock_print_assistant_thoughts, patch(
-        "autogpt.app.main.logger.typewriter_log"
-    ) as mock_logger_typewriter_log:
-        # Test the update_user function with None command_name
-        update_user(config, ai_config, None, None, assistant_reply_dict)
-
-    # Check that print_assistant_thoughts was called once
-    mock_print_assistant_thoughts.assert_called_once_with(
-        ai_config.ai_name, assistant_reply_dict, config
-    )
-
-    # Check that logger.typewriter_log was called once with expected arguments
-    mock_logger_typewriter_log.assert_called_once_with(
-        "NO ACTION SELECTED: ",
-        Fore.RED,
-        f"The Agent failed to select an action.",
-    )
diff --git a/tests/integration/test_web_selenium.py b/tests/integration/test_web_selenium.py
index 43de2860eefb..15dcefa9ce9d 100644
--- a/tests/integration/test_web_selenium.py
+++ b/tests/integration/test_web_selenium.py
@@ -1,17 +1,17 @@
 import pytest
-from pytest_mock import MockerFixture
 
 from autogpt.agents.agent import Agent
-from autogpt.commands.web_selenium import browse_website
+from autogpt.commands.web_selenium import BrowsingError, browse_website
 
 
 @pytest.mark.vcr
 @pytest.mark.requires_openai_api_key
-def test_browse_website(agent: Agent, patched_api_requestor: MockerFixture):
+def test_browse_website_nonexistent_url(agent: Agent, patched_api_requestor: None):
     url = "https://barrel-roll.com"
     question = "How to execute a barrel roll"
 
-    response = browse_website(url, question, agent)
-    assert "error" in response.lower()
-    # Sanity check that the response is not too long
-    assert len(response) < 200
+    with pytest.raises(BrowsingError, match=r"CONNECTION_CLOSED") as raised:
+        browse_website(url, question, agent)
+
+        # Sanity check that the response is not too long
+        assert len(raised.exconly()) < 200
diff --git a/tests/unit/test_file_operations.py b/tests/unit/test_file_operations.py
index b3f1fb8f8245..fbf9769ac392 100644
--- a/tests/unit/test_file_operations.py
+++ b/tests/unit/test_file_operations.py
@@ -13,6 +13,7 @@
 
 import autogpt.commands.file_operations as file_ops
 from autogpt.agents.agent import Agent
+from autogpt.agents.utils.exceptions import DuplicateOperationError
 from autogpt.config import Config
 from autogpt.memory.vector.memory_item import MemoryItem
 from autogpt.memory.vector.utils import Embedding
@@ -199,8 +200,8 @@ def test_read_file(
 
 def test_read_file_not_found(agent: Agent):
     filename = "does_not_exist.txt"
-    content = file_ops.read_file(filename, agent=agent)
-    assert "Error:" in content and filename in content and "no such file" in content
+    with pytest.raises(FileNotFoundError):
+        file_ops.read_file(filename, agent=agent)
 
 
 def test_write_to_file_relative_path(test_file_name: Path, agent: Agent):
@@ -236,8 +237,8 @@ def test_write_file_fails_if_content_exists(test_file_name: Path, agent: Agent):
         agent=agent,
         checksum=file_ops.text_checksum(new_content),
     )
-    result = file_ops.write_to_file(str(test_file_name), new_content, agent=agent)
-    assert result == "Error: File has already been updated."
+    with pytest.raises(DuplicateOperationError):
+        file_ops.write_to_file(str(test_file_name), new_content, agent=agent)
 
 
 def test_write_file_succeeds_if_content_different(
diff --git a/tests/unit/test_git_commands.py b/tests/unit/test_git_commands.py
index 9f56a3840947..072c56f31eb0 100644
--- a/tests/unit/test_git_commands.py
+++ b/tests/unit/test_git_commands.py
@@ -3,6 +3,7 @@
 from git.repo.base import Repo
 
 from autogpt.agents.agent import Agent
+from autogpt.agents.utils.exceptions import CommandExecutionError
 from autogpt.commands.git_operations import clone_repository
 
 
@@ -38,6 +39,5 @@ def test_clone_repository_error(workspace, mock_clone_from, agent: Agent):
         "clone", "fatal: repository not found", ""
     )
 
-    result = clone_repository(url=url, clone_path=clone_path, agent=agent)
-
-    assert "Error: " in result
+    with pytest.raises(CommandExecutionError):
+        clone_repository(url=url, clone_path=clone_path, agent=agent)
diff --git a/tests/unit/test_web_search.py b/tests/unit/test_web_search.py
index 790b1c2f6700..7b57b9fa832e 100644
--- a/tests/unit/test_web_search.py
+++ b/tests/unit/test_web_search.py
@@ -4,6 +4,7 @@
 from googleapiclient.errors import HttpError
 
 from autogpt.agents.agent import Agent
+from autogpt.agents.utils.exceptions import ConfigurationError
 from autogpt.commands.web_search import google, safe_google_results, web_search
 
 
@@ -89,20 +90,19 @@ def test_google_official_search(
 
 
 @pytest.mark.parametrize(
-    "query, num_results, expected_output, http_code, error_msg",
+    "query, num_results, expected_error_type, http_code, error_msg",
     [
         (
             "invalid query",
             3,
-            "Error: <HttpError 400 when requesting https://www.googleapis.com/customsearch/v1?q=invalid+query&cx "
-            'returned "Invalid Value". Details: "Invalid Value">',
+            HttpError,
             400,
             "Invalid Value",
         ),
         (
             "invalid API key",
             3,
-            "Error: The provided Google API key is invalid or missing.",
+            ConfigurationError,
             403,
             "invalid API key",
         ),
@@ -111,7 +111,7 @@ def test_google_official_search(
 def test_google_official_search_errors(
     query,
     num_results,
-    expected_output,
+    expected_error_type,
     mock_googleapiclient,
     http_code,
     error_msg,
@@ -132,5 +132,5 @@ def __init__(self, _status, _reason):
     )
 
     mock_googleapiclient.side_effect = error
-    actual_output = google(query, agent=agent, num_results=num_results)
-    assert actual_output == safe_google_results(expected_output)
+    with pytest.raises(expected_error_type):
+        google(query, agent=agent, num_results=num_results)

From fd1e868dbd8d0a16ddee75142f0e0b5a1d81aa10 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Mon, 14 Aug 2023 18:30:08 +0200
Subject: [PATCH 18/20] Include history in Agent prompt generation

---
 autogpt/agents/agent.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autogpt/agents/agent.py b/autogpt/agents/agent.py
index 3a38fc53fac9..e8d05adc8e43 100644
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -111,8 +111,8 @@ def construct_base_prompt(self, *args, **kwargs) -> ChatSequence:
                 kwargs["append_messages"] = []
             kwargs["append_messages"].append(budget_msg)
 
-            # Include message history in base prompt
-            kwargs["with_message_history"] = True
+        # Include message history in base prompt
+        kwargs["with_message_history"] = True
 
         return super().construct_base_prompt(*args, **kwargs)
 

From c89bf581527f0de24c2e85365e2c14c8da29e912 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Tue, 15 Aug 2023 16:03:53 +0200
Subject: [PATCH 19/20] Code improvements in agent.py

---
 autogpt/agents/agent.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/autogpt/agents/agent.py b/autogpt/agents/agent.py
index e8d05adc8e43..5283c3065bbe 100644
--- a/autogpt/agents/agent.py
+++ b/autogpt/agents/agent.py
@@ -176,15 +176,22 @@ def execute(
                 if type(return_value) == tuple and isinstance(
                     return_value[1], ContextItem
                 ):
-                    # self.context.add(return_value[1])
+                    context_item = return_value[1]
                     # return_value = return_value[0]
+                    logger.debug(
+                        f"Command {command_name} returned a ContextItem: {context_item}"
+                    )
+                    # self.context.add(context_item)
+
                     # HACK: use content of ContextItem as return value, for legacy support
-                    return_value = return_value[1]
+                    return_value = context_item.content
 
                 result = ActionSuccessResult(return_value)
             except AgentException as e:
                 result = ActionErrorResult(e.message, e)
 
+            logger.debug(f"Command result: {result}")
+
             result_tlength = count_string_tokens(str(result), self.llm.name)
             memory_tlength = count_string_tokens(
                 str(self.history.summary_message()), self.llm.name
@@ -212,12 +219,15 @@ def execute(
             )
         elif result.status == "error":
             message = f"Command {command_name} failed: {result.reason}"
+
+            # Append hint to the error message if the exception has a hint
             if (
                 result.error
                 and isinstance(result.error, AgentException)
                 and result.error.hint
             ):
                 message = message.rstrip(".") + f". {result.error.hint}"
+
             self.history.add("system", message, "action_result")
 
         return result

From 319ad2e009bf40a6726ba7124ae271a6b17559f7 Mon Sep 17 00:00:00 2001
From: Reinier van der Leer <github@pwuts.nl>
Date: Sat, 19 Aug 2023 16:55:10 +0200
Subject: [PATCH 20/20] Add ask_user command and revise system prompt

---
 autogpt/commands/__init__.py         |  1 +
 autogpt/commands/system.py           |  4 +---
 autogpt/commands/user_interaction.py | 28 ++++++++++++++++++++++++++++
 autogpt/prompts/generator.py         |  6 +++---
 prompt_settings.yaml                 | 11 ++++-------
 5 files changed, 37 insertions(+), 13 deletions(-)
 create mode 100644 autogpt/commands/user_interaction.py

diff --git a/autogpt/commands/__init__.py b/autogpt/commands/__init__.py
index 018f5b8fcfb6..939e66edee76 100644
--- a/autogpt/commands/__init__.py
+++ b/autogpt/commands/__init__.py
@@ -1,6 +1,7 @@
 COMMAND_CATEGORIES = [
     "autogpt.commands.execute_code",
     "autogpt.commands.file_operations",
+    "autogpt.commands.user_interaction",
     "autogpt.commands.web_search",
     "autogpt.commands.web_selenium",
     "autogpt.commands.system",
diff --git a/autogpt/commands/system.py b/autogpt/commands/system.py
index 08bfd5e57ea7..77100413a804 100644
--- a/autogpt/commands/system.py
+++ b/autogpt/commands/system.py
@@ -5,8 +5,6 @@
 COMMAND_CATEGORY = "system"
 COMMAND_CATEGORY_TITLE = "System"
 
-from typing import NoReturn
-
 from autogpt.agents.agent import Agent
 from autogpt.command_decorator import command
 from autogpt.logs import logger
@@ -23,7 +21,7 @@
         }
     },
 )
-def task_complete(reason: str, agent: Agent) -> NoReturn:
+def task_complete(reason: str, agent: Agent) -> None:
     """
     A function that takes in a string and exits the program
 
diff --git a/autogpt/commands/user_interaction.py b/autogpt/commands/user_interaction.py
new file mode 100644
index 000000000000..e741859c8d11
--- /dev/null
+++ b/autogpt/commands/user_interaction.py
@@ -0,0 +1,28 @@
+"""Commands to interact with the user"""
+
+from __future__ import annotations
+
+COMMAND_CATEGORY = "user_interaction"
+COMMAND_CATEGORY_TITLE = "User Interaction"
+
+from autogpt.agents.agent import Agent
+from autogpt.app.utils import clean_input
+from autogpt.command_decorator import command
+
+
+@command(
+    "ask_user",
+    (
+        "If you need more details or information regarding the given goals,"
+        " you can ask the user for input"
+    ),
+    {
+        "question": {
+            "type": "string",
+            "description": "The question or prompt to the user",
+            "required": True,
+        }
+    },
+)
+def ask_user(question: str, agent: Agent) -> str:
+    return clean_input(agent.config, question)
diff --git a/autogpt/prompts/generator.py b/autogpt/prompts/generator.py
index a8217953dbbf..b1f661e1246b 100644
--- a/autogpt/prompts/generator.py
+++ b/autogpt/prompts/generator.py
@@ -131,12 +131,12 @@ def generate_prompt_string(
             "## Constraints\n"
             "You operate within the following constraints:\n"
             f"{self._generate_numbered_list(self.constraints + additional_constraints)}\n\n"
-            "## Commands\n"
-            "You have access to the following commands:\n"
-            f"{self._generate_commands()}\n\n"
             "## Resources\n"
             "You can leverage access to the following resources:\n"
             f"{self._generate_numbered_list(self.resources + additional_resources)}\n\n"
+            "## Commands\n"
+            "You have access to the following commands:\n"
+            f"{self._generate_commands()}\n\n"
             "## Best practices\n"
             f"{self._generate_numbered_list(self.best_practices + additional_best_practices)}"
         )
diff --git a/prompt_settings.yaml b/prompt_settings.yaml
index a83ca6225bba..4cdc03127c60 100644
--- a/prompt_settings.yaml
+++ b/prompt_settings.yaml
@@ -1,14 +1,11 @@
 constraints: [
-  '~4000 word limit for short term memory. Your short term memory is short, so immediately save important information to files.',
-  'If you are unsure how you previously did something or want to recall past events, thinking about similar events will help you remember.',
-  'No user assistance',
-  'Exclusively use the commands listed below e.g. command_name'
+  'Exclusively use the commands listed below.',
+  'You can only act proactively, and are unable to start background jobs or set up webhooks for yourself. Take this into account when planning your actions.',
+  'You are unable to interact with physical objects. If this is absolutely necessary to fulfill a task or objective or to complete a step, you must ask the user to do it for you. If the user refuses this, and there is no other way to achieve your goals, you must terminate to avoid wasting time and energy.'
 ]
 resources: [
   'Internet access for searches and information gathering.',
-  'Long Term memory management.',
-  'File output.',
-  'Command execution'
+  'The ability to read and write files.',
 ]
 best_practices: [
   'Continuously review and analyze your actions to ensure you are performing to the best of your abilities.',