Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(agent): Implement more tolerant json_loads function #7016

Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
LanguageModelClassification,
PromptStrategy,
)
from autogpt.core.prompting.utils import json_loads
from autogpt.core.resource.model_providers.schema import (
AssistantChatMessage,
ChatMessage,
ChatModelProvider,
CompletionModelFunction,
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.core.utils.json_utils import json_loads

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
CompletionModelFunction,
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.json_utils.utilities import extract_dict_from_response
from autogpt.core.utils.json_utils import extract_dict_from_json, json_loads
from autogpt.prompts.utils import format_numbered_list, indent


Expand Down Expand Up @@ -386,7 +386,7 @@ def parse_response_content(
else f" '{response.content}'"
)
)
assistant_reply_dict = extract_dict_from_response(response.content)
assistant_reply_dict = extract_dict_from_json(response.content)
self.logger.debug(
"Validating object extracted from LLM response:\n"
f"{json.dumps(assistant_reply_dict, indent=4)}"
Expand Down Expand Up @@ -439,7 +439,7 @@ def extract_command(
raise InvalidAgentResponseError("No 'tool_calls' in assistant reply")
assistant_reply_json["command"] = {
"name": assistant_reply.tool_calls[0].function.name,
"args": json.loads(assistant_reply.tool_calls[0].function.arguments),
"args": json_loads(assistant_reply.tool_calls[0].function.arguments),
}
try:
if not isinstance(assistant_reply_json, dict):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from autogpt.core.planning.schema import Task, TaskType
from autogpt.core.prompting import PromptStrategy
from autogpt.core.prompting.schema import ChatPrompt, LanguageModelClassification
from autogpt.core.prompting.utils import json_loads, to_numbered_list
from autogpt.core.prompting.utils import to_numbered_list
from autogpt.core.resource.model_providers import (
AssistantChatMessage,
ChatMessage,
CompletionModelFunction,
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.core.utils.json_utils import json_loads

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
from autogpt.core.configuration import SystemConfiguration, UserConfigurable
from autogpt.core.prompting import PromptStrategy
from autogpt.core.prompting.schema import ChatPrompt, LanguageModelClassification
from autogpt.core.prompting.utils import json_loads
from autogpt.core.resource.model_providers import (
AssistantChatMessage,
ChatMessage,
CompletionModelFunction,
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.core.utils.json_utils import json_loads

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from autogpt.core.planning.schema import Task
from autogpt.core.prompting import PromptStrategy
from autogpt.core.prompting.schema import ChatPrompt, LanguageModelClassification
from autogpt.core.prompting.utils import json_loads, to_numbered_list
from autogpt.core.prompting.utils import to_numbered_list
from autogpt.core.resource.model_providers import (
AssistantChatMessage,
ChatMessage,
CompletionModelFunction,
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.core.utils.json_utils import json_loads

logger = logging.getLogger(__name__)

Expand Down
20 changes: 0 additions & 20 deletions autogpts/autogpt/autogpt/core/prompting/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
import ast
import json


def to_numbered_list(
items: list[str], no_items_response: str = "", **template_args
) -> str:
Expand All @@ -11,19 +7,3 @@ def to_numbered_list(
)
else:
return no_items_response


def json_loads(json_str: str):
# TODO: this is a hack function for now. We'll see what errors show up in testing.
# Can hopefully just replace with a call to ast.literal_eval.
# Can't use json.loads because the function API still sometimes returns json strings
# with minor issues like trailing commas.
try:
json_str = json_str[json_str.index("{") : json_str.rindex("}") + 1]
return ast.literal_eval(json_str)
except json.decoder.JSONDecodeError as e:
try:
print(f"json decode error {e}. trying literal eval")
return ast.literal_eval(json_str)
except Exception:
breakpoint()
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
ModelTokenizer,
)
from autogpt.core.utils.json_schema import JSONSchema
from autogpt.core.utils.json_utils import json_loads

_T = TypeVar("_T")
_P = ParamSpec("_P")
Expand Down Expand Up @@ -758,19 +759,18 @@ def _functions_compat_fix_kwargs(


def _tool_calls_compat_extract_calls(response: str) -> Iterator[AssistantToolCall]:
import json
import re
import uuid

logging.debug(f"Trying to extract tool calls from response:\n{response}")

if response[0] == "[":
tool_calls: list[AssistantToolCallDict] = json.loads(response)
tool_calls: list[AssistantToolCallDict] = json_loads(response)
else:
block = re.search(r"```(?:tool_calls)?\n(.*)\n```\s*$", response, re.DOTALL)
if not block:
raise ValueError("Could not find tool_calls block in response")
tool_calls: list[AssistantToolCallDict] = json.loads(block.group(1))
tool_calls: list[AssistantToolCallDict] = json_loads(block.group(1))

for t in tool_calls:
t["id"] = str(uuid.uuid4())
Expand Down
92 changes: 92 additions & 0 deletions autogpts/autogpt/autogpt/core/utils/json_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import io
import logging
import re
from typing import Any

import demjson3

logger = logging.getLogger(__name__)


def json_loads(json_str: str) -> Any:
"""Parse a JSON string, tolerating minor syntax issues:
- Missing, extra and trailing commas
- Extraneous newlines and whitespace outside of string literals
- Inconsistent spacing after colons and commas
- Missing closing brackets or braces
- Numbers: binary, hex, octal, trailing and prefixed decimal points
- Different encodings
- Surrounding markdown code block
- Comments

Args:
json_str: The JSON string to parse.

Returns:
The parsed JSON object, same as built-in json.loads.
"""
# Remove possible code block
pattern = r"```(?:json|JSON)*([\s\S]*?)```"
match = re.search(pattern, json_str)

if match:
json_str = match.group(1).strip()

error_buffer = io.StringIO()
json_result = demjson3.decode(
json_str, return_errors=True, write_errors=error_buffer
)

if error_buffer.getvalue():
logger.debug(f"JSON parse errors:\n{error_buffer.getvalue()}")

if json_result is None:
raise ValueError(f"Failed to parse JSON string: {json_str}")

return json_result.object


def extract_dict_from_json(json_str: str) -> dict[str, Any]:
# Sometimes the response includes the JSON in a code block with ```
pattern = r"```(?:json|JSON)*([\s\S]*?)```"
match = re.search(pattern, json_str)

if match:
json_str = match.group(1).strip()
else:
# The string may contain JSON.
json_pattern = r"{[\s\S]*}"
match = re.search(json_pattern, json_str)

if match:
json_str = match.group()

result = json_loads(json_str)
if not isinstance(result, dict):
raise ValueError(
f"Response '''{json_str}''' evaluated to non-dict value {repr(result)}"
)
return result


def extract_list_from_json(json_str: str) -> list[Any]:
# Sometimes the response includes the JSON in a code block with ```
pattern = r"```(?:json|JSON)*([\s\S]*?)```"
match = re.search(pattern, json_str)

if match:
json_str = match.group(1).strip()
else:
# The string may contain JSON.
json_pattern = r"\[[\s\S]*\]"
match = re.search(json_pattern, json_str)

if match:
json_str = match.group()

result = json_loads(json_str)
if not isinstance(result, list):
raise ValueError(
f"Response '''{json_str}''' evaluated to non-list value {repr(result)}"
)
return result
Empty file.
55 changes: 0 additions & 55 deletions autogpts/autogpt/autogpt/json_utils/utilities.py

This file was deleted.

7 changes: 3 additions & 4 deletions autogpts/autogpt/autogpt/processing/text.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Text processing functions"""

import logging
import math
from typing import Iterator, Optional, TypeVar
Expand All @@ -12,7 +13,7 @@
ChatModelProvider,
ModelTokenizer,
)
from autogpt.json_utils.utilities import extract_list_from_response
from autogpt.core.utils.json_utils import extract_list_from_json

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -161,9 +162,7 @@ async def _process_text(
temperature=0.5,
max_tokens=max_result_tokens,
completion_parser=lambda s: (
extract_list_from_response(s.content)
if output_type is not str
else None
extract_list_from_json(s.content) if output_type is not str else None
),
)

Expand Down
15 changes: 13 additions & 2 deletions autogpts/autogpt/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions autogpts/autogpt/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ boto3 = "^1.33.6"
charset-normalizer = "^3.1.0"
click = "*"
colorama = "^0.4.6"
demjson3 = "^3.0.0"
distro = "^1.8.0"
docker = "*"
duckduckgo-search = "^4.0.0"
Expand Down
Loading
Loading