fix(AI Agent Node): Fix tools agent when using memory and Anthropic m…

…odels (#10513)
n8n-io · Sep 4, 2024 · 746e7b8 · 746e7b8
1 parent 8b5c333
commit 746e7b8
Showing 1 changed file with 67 additions and 8 deletions.
diff --git a/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/execute.ts b/packages/@n8n/nodes-langchain/nodes/agents/Agent/agents/ToolsAgent/execute.ts
@@ -1,20 +1,20 @@
 import { BINARY_ENCODING, NodeConnectionType, NodeOperationError } from 'n8n-workflow';
 import type { IExecuteFunctions, INodeExecutionData } from 'n8n-workflow';
 
-import type { AgentAction, AgentFinish, AgentStep } from 'langchain/agents';
+import type { AgentAction, AgentFinish } from 'langchain/agents';
 import { AgentExecutor, createToolCallingAgent } from 'langchain/agents';
 import type { BaseChatMemory } from '@langchain/community/memory/chat_memory';
 import type { BaseMessagePromptTemplateLike } from '@langchain/core/prompts';
 import { ChatPromptTemplate } from '@langchain/core/prompts';
 import { omit } from 'lodash';
 import type { Tool } from '@langchain/core/tools';
 import { DynamicStructuredTool } from '@langchain/core/tools';
-import { RunnableSequence } from '@langchain/core/runnables';
 import type { ZodObject } from 'zod';
 import { z } from 'zod';
 import type { BaseOutputParser, StructuredOutputParser } from '@langchain/core/output_parsers';
 import { OutputFixingParser } from 'langchain/output_parsers';
 import { HumanMessage } from '@langchain/core/messages';
+import { RunnableSequence } from '@langchain/core/runnables';
 import {
 	isChatInstance,
 	getPromptInputByType,
@@ -93,7 +93,69 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
 	const tools = (await getConnectedTools(this, true, false)) as Array<DynamicStructuredTool | Tool>;
 	const outputParser = (await getOptionalOutputParsers(this))?.[0];
 	let structuredOutputParserTool: DynamicStructuredTool | undefined;
+	/**
+	 * Ensures consistent handling of outputs regardless of the model used,
+	 * providing a unified output format for further processing.
+	 *
+	 * This method is necessary to handle different output formats from various language models.
+	 * Specifically, it checks if the agent step is the final step (contains returnValues) and determines
+	 * if the output is a simple string (e.g., from OpenAI models) or an array of outputs (e.g., from Anthropic models).
+	 *
+	 * Examples:
+	 * 1. Anthropic model output:
+	 * ```json
+	 *    {
+	 *      "output": [
+	 *        {
+	 *          "index": 0,
+	 *          "type": "text",
+	 *          "text": "The result of the calculation is approximately 1001.8166..."
+	 *        }
+	 *      ]
+	 *    }
+	 *```
+	 * 2. OpenAI model output:
+	 * ```json
+	 *    {
+	 *      "output": "The result of the calculation is approximately 1001.82..."
+	 *    }
+	 * ```
+	 *
+	 * @param steps - The agent finish or agent action steps.
+	 * @returns The modified agent finish steps or the original steps.
+	 */
+	function handleAgentFinishOutput(steps: AgentFinish | AgentAction[]) {
+		// Check if the steps contain multiple outputs
+		type AgentMultiOutputFinish = AgentFinish & {
+			returnValues: { output: Array<{ text: string; type: string; index: number }> };
+		};
+		const agentFinishSteps = steps as AgentMultiOutputFinish | AgentFinish;
+
+		if (agentFinishSteps.returnValues) {
+			const isMultiOutput = Array.isArray(agentFinishSteps.returnValues?.output);
 
+			if (isMultiOutput) {
+				// Define the type for each item in the multi-output array
+				type MultiOutputItem = { index: number; type: string; text: string };
+				const multiOutputSteps = agentFinishSteps.returnValues.output as MultiOutputItem[];
+
+				// Check if all items in the multi-output array are of type 'text'
+				const isTextOnly = (multiOutputSteps ?? []).every((output) => 'text' in output);
+
+				if (isTextOnly) {
+					// If all items are of type 'text', merge them into a single string
+					agentFinishSteps.returnValues.output = multiOutputSteps
+						.map((output) => output.text)
+						.join('\n')
+						.trim();
+				}
+				return agentFinishSteps;
+			}
+		}
+
+		// If the steps do not contain multiple outputs, return them as is
+		return agentFinishSteps;
+	}
 	async function agentStepsParser(
 		steps: AgentFinish | AgentAction[],
 	): Promise<AgentFinish | AgentAction[]> {
@@ -112,7 +174,6 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
 				};
 			}
 		}
-
 		// If the steps are an AgentFinish and the outputParser is defined it must mean that the LLM didn't use `format_final_response` tool so we will parse the output manually
 		if (outputParser && typeof steps === 'object' && (steps as AgentFinish).returnValues) {
 			const finalResponse = (steps as AgentFinish).returnValues;
@@ -126,7 +187,7 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
 				log: 'Final response formatted',
 			};
 		}
-		return steps;
+		return handleAgentFinishOutput(steps);
 	}
 
 	if (outputParser) {
@@ -172,9 +233,7 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
 	});
 	agent.streamRunnable = false;
 
-	const runnableAgent = RunnableSequence.from<{
-		steps: AgentStep[];
-	}>([agent, agentStepsParser]);
+	const runnableAgent = RunnableSequence.from([agent, agentStepsParser]);
 
 	const executor = AgentExecutor.fromAgentAndTools({
 		agent: runnableAgent,
@@ -196,7 +255,7 @@ export async function toolsAgentExecute(this: IExecuteFunctions): Promise<INodeE
 			});
 
 			if (input === undefined) {
-				throw new NodeOperationError(this.getNode(), 'The ‘text parameter is empty.');
+				throw new NodeOperationError(this.getNode(), 'The ‘text‘ parameter is empty.');
 			}
 
 			// OpenAI doesn't allow empty tools array so we will provide a more user-friendly error message