Significant-Gravitas · Pwuts · Mar 21, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
@@ -8,14 +8,14 @@
     LanguageModelClassification,
     PromptStrategy,
 )
-from autogpt.core.prompting.utils import json_loads
 from autogpt.core.resource.model_providers.schema import (
     AssistantChatMessage,
     ChatMessage,
     ChatModelProvider,
     CompletionModelFunction,
 )
 from autogpt.core.utils.json_schema import JSONSchema
+from autogpt.core.utils.json_utils import json_loads
 
 logger = logging.getLogger(__name__)
 

@@ -26,7 +26,7 @@
     CompletionModelFunction,
 )
 from autogpt.core.utils.json_schema import JSONSchema
-from autogpt.json_utils.utilities import extract_dict_from_response
+from autogpt.core.utils.json_utils import extract_dict_from_json, json_loads
 from autogpt.prompts.utils import format_numbered_list, indent
 
 
@@ -386,7 +386,7 @@ def parse_response_content(
                 else f" '{response.content}'"
             )
         )
-        assistant_reply_dict = extract_dict_from_response(response.content)
+        assistant_reply_dict = extract_dict_from_json(response.content)
         self.logger.debug(
             "Validating object extracted from LLM response:\n"
             f"{json.dumps(assistant_reply_dict, indent=4)}"
@@ -439,7 +439,7 @@ def extract_command(
             raise InvalidAgentResponseError("No 'tool_calls' in assistant reply")
         assistant_reply_json["command"] = {
             "name": assistant_reply.tool_calls[0].function.name,
-            "args": json.loads(assistant_reply.tool_calls[0].function.arguments),
+            "args": json_loads(assistant_reply.tool_calls[0].function.arguments),
         }
     try:
         if not isinstance(assistant_reply_json, dict):

@@ -4,13 +4,14 @@
 from autogpt.core.planning.schema import Task, TaskType
 from autogpt.core.prompting import PromptStrategy
 from autogpt.core.prompting.schema import ChatPrompt, LanguageModelClassification
-from autogpt.core.prompting.utils import json_loads, to_numbered_list
+from autogpt.core.prompting.utils import to_numbered_list
 from autogpt.core.resource.model_providers import (
     AssistantChatMessage,
     ChatMessage,
     CompletionModelFunction,
 )
 from autogpt.core.utils.json_schema import JSONSchema
+from autogpt.core.utils.json_utils import json_loads
 
 logger = logging.getLogger(__name__)
 

@@ -3,13 +3,13 @@
 from autogpt.core.configuration import SystemConfiguration, UserConfigurable
 from autogpt.core.prompting import PromptStrategy
 from autogpt.core.prompting.schema import ChatPrompt, LanguageModelClassification
-from autogpt.core.prompting.utils import json_loads
 from autogpt.core.resource.model_providers import (
     AssistantChatMessage,
     ChatMessage,
     CompletionModelFunction,
 )
 from autogpt.core.utils.json_schema import JSONSchema
+from autogpt.core.utils.json_utils import json_loads
 
 logger = logging.getLogger(__name__)
 

@@ -4,13 +4,14 @@
 from autogpt.core.planning.schema import Task
 from autogpt.core.prompting import PromptStrategy
 from autogpt.core.prompting.schema import ChatPrompt, LanguageModelClassification
-from autogpt.core.prompting.utils import json_loads, to_numbered_list
+from autogpt.core.prompting.utils import to_numbered_list
 from autogpt.core.resource.model_providers import (
     AssistantChatMessage,
     ChatMessage,
     CompletionModelFunction,
 )
 from autogpt.core.utils.json_schema import JSONSchema
+from autogpt.core.utils.json_utils import json_loads
 
 logger = logging.getLogger(__name__)
 

@@ -1,7 +1,3 @@
-import ast
-import json
-
-
 def to_numbered_list(
     items: list[str], no_items_response: str = "", **template_args
 ) -> str:
@@ -11,19 +7,3 @@ def to_numbered_list(
         )
     else:
         return no_items_response
-
-
-def json_loads(json_str: str):
-    # TODO: this is a hack function for now. We'll see what errors show up in testing.
-    #   Can hopefully just replace with a call to ast.literal_eval.
-    # Can't use json.loads because the function API still sometimes returns json strings
-    #   with minor issues like trailing commas.
-    try:
-        json_str = json_str[json_str.index("{") : json_str.rindex("}") + 1]
-        return ast.literal_eval(json_str)
-    except json.decoder.JSONDecodeError as e:
-        try:
-            print(f"json decode error {e}. trying literal eval")
-            return ast.literal_eval(json_str)
-        except Exception:
-            breakpoint()
@@ -38,6 +38,7 @@
     ModelTokenizer,
 )
 from autogpt.core.utils.json_schema import JSONSchema
+from autogpt.core.utils.json_utils import json_loads
 
 _T = TypeVar("_T")
 _P = ParamSpec("_P")
@@ -758,19 +759,18 @@ def _functions_compat_fix_kwargs(
 
 
 def _tool_calls_compat_extract_calls(response: str) -> Iterator[AssistantToolCall]:
-    import json
     import re
     import uuid
 
     logging.debug(f"Trying to extract tool calls from response:\n{response}")
 
     if response[0] == "[":
-        tool_calls: list[AssistantToolCallDict] = json.loads(response)
+        tool_calls: list[AssistantToolCallDict] = json_loads(response)
     else:
         block = re.search(r"```(?:tool_calls)?\n(.*)\n```\s*$", response, re.DOTALL)
         if not block:
             raise ValueError("Could not find tool_calls block in response")
-        tool_calls: list[AssistantToolCallDict] = json.loads(block.group(1))
+        tool_calls: list[AssistantToolCallDict] = json_loads(block.group(1))
 
     for t in tool_calls:
         t["id"] = str(uuid.uuid4())

@@ -0,0 +1,92 @@
+import io
+import logging
+import re
+from typing import Any
+
+import demjson3
+
+logger = logging.getLogger(__name__)
+
+
+def json_loads(json_str: str) -> Any:
+    """Parse a JSON string, tolerating minor syntax issues:
+    - Missing, extra and trailing commas
+    - Extraneous newlines and whitespace outside of string literals
+    - Inconsistent spacing after colons and commas
+    - Missing closing brackets or braces
+    - Numbers: binary, hex, octal, trailing and prefixed decimal points
+    - Different encodings
+    - Surrounding markdown code block
+    - Comments
+
+    Args:
+        json_str: The JSON string to parse.
+
+    Returns:
+        The parsed JSON object, same as built-in json.loads.
+    """
+    # Remove possible code block
+    pattern = r"```(?:json|JSON)*([\s\S]*?)```"
+    match = re.search(pattern, json_str)
+
+    if match:
+        json_str = match.group(1).strip()
+
+    error_buffer = io.StringIO()
+    json_result = demjson3.decode(
+        json_str, return_errors=True, write_errors=error_buffer
+    )
+
+    if error_buffer.getvalue():
+        logger.debug(f"JSON parse errors:\n{error_buffer.getvalue()}")
+
+    if json_result is None:
+        raise ValueError(f"Failed to parse JSON string: {json_str}")
+
+    return json_result.object
+
+
+def extract_dict_from_json(json_str: str) -> dict[str, Any]:
+    # Sometimes the response includes the JSON in a code block with ```
+    pattern = r"```(?:json|JSON)*([\s\S]*?)```"
+    match = re.search(pattern, json_str)
+
+    if match:
+        json_str = match.group(1).strip()
+    else:
+        # The string may contain JSON.
+        json_pattern = r"{[\s\S]*}"
+        match = re.search(json_pattern, json_str)
+
+        if match:
+            json_str = match.group()
+
+    result = json_loads(json_str)
+    if not isinstance(result, dict):
+        raise ValueError(
+            f"Response '''{json_str}''' evaluated to non-dict value {repr(result)}"
+        )
+    return result
+
+
+def extract_list_from_json(json_str: str) -> list[Any]:
+    # Sometimes the response includes the JSON in a code block with ```
+    pattern = r"```(?:json|JSON)*([\s\S]*?)```"
+    match = re.search(pattern, json_str)
+
+    if match:
+        json_str = match.group(1).strip()
+    else:
+        # The string may contain JSON.
+        json_pattern = r"\[[\s\S]*\]"
+        match = re.search(json_pattern, json_str)
+
+        if match:
+            json_str = match.group()
+
+    result = json_loads(json_str)
+    if not isinstance(result, list):
+        raise ValueError(
+            f"Response '''{json_str}''' evaluated to non-list value {repr(result)}"
+        )
+    return result
@@ -1,4 +1,5 @@
 """Text processing functions"""
+
 import logging
 import math
 from typing import Iterator, Optional, TypeVar
@@ -12,7 +13,7 @@
     ChatModelProvider,
     ModelTokenizer,
 )
-from autogpt.json_utils.utilities import extract_list_from_response
+from autogpt.core.utils.json_utils import extract_list_from_json
 
 logger = logging.getLogger(__name__)
 
@@ -161,9 +162,7 @@ async def _process_text(
             temperature=0.5,
             max_tokens=max_result_tokens,
             completion_parser=lambda s: (
-                extract_list_from_response(s.content)
-                if output_type is not str
-                else None
+                extract_list_from_json(s.content) if output_type is not str else None
             ),
         )
 

@@ -30,6 +30,7 @@ boto3 = "^1.33.6"
 charset-normalizer = "^3.1.0"
 click = "*"
 colorama = "^0.4.6"
+demjson3 = "^3.0.0"
 distro = "^1.8.0"
 docker = "*"
 duckduckgo-search = "^4.0.0"