[SIEM Rule Migration] Minor prompt improvements (elastic#201941)

## Summary Adding some minor improvements and modifies the format on some of the prompts used in the siem_migration process, a few specifics below: 1. Moved prebuilt rules matching to JSON output parsing, as its less prone for issues on some models. 2. Moved some of the formatting of the prompts to focus on xml-like tags, as it often gives good results on multiple models. Also context that is relevant to perform an action is moved to system, while context related to the question (and to the response) is kept in the human prompt. 3. Moved process_query to ChatPromptTemplate, moved the collection of ResourceContext outside of the prompt because of this.
tkajtoch · Nov 27, 2024 · eb87889 · eb87889
1 parent 048d5eb
commit eb87889
Show file tree

Hide file tree

Showing 7 changed files with 187 additions and 139 deletions.
diff --git a/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts b/x-pack/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts
@@ -20,7 +20,7 @@ export function getRuleMigrationAgent({
   connectorId,
   logger,
 }: MigrateRuleGraphParams) {
-  const matchPrebuiltRuleNode = getMatchPrebuiltRuleNode({ model, prebuiltRulesMap, logger });
+  const matchPrebuiltRuleNode = getMatchPrebuiltRuleNode({ model, prebuiltRulesMap });
   const translationSubGraph = getTranslateRuleGraph({
     model,
     inferenceClient,

diff --git a/...ver/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts b/...ver/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts
@@ -5,17 +5,19 @@
  * 2.0.
  */
 
-import type { Logger } from '@kbn/core/server';
-import { StringOutputParser } from '@langchain/core/output_parsers';
+import { JsonOutputParser } from '@langchain/core/output_parsers';
 import type { ChatModel } from '../../../util/actions_client_chat';
-import type { GraphNode } from '../../types';
 import { filterPrebuiltRules, type PrebuiltRulesMapByName } from '../../../util/prebuilt_rules';
+import type { GraphNode } from '../../types';
 import { MATCH_PREBUILT_RULE_PROMPT } from './prompts';
 
 interface GetMatchPrebuiltRuleNodeParams {
   model: ChatModel;
   prebuiltRulesMap: PrebuiltRulesMapByName;
-  logger: Logger;
+}
+
+interface GetMatchedRuleResponse {
+  match: string;
 }
 
 export const getMatchPrebuiltRuleNode =
@@ -25,34 +27,32 @@ export const getMatchPrebuiltRuleNode =
     if (!mitreAttackIds?.length) {
       return {};
     }
+
     const filteredPrebuiltRulesMap = filterPrebuiltRules(prebuiltRulesMap, mitreAttackIds);
     if (filteredPrebuiltRulesMap.size === 0) {
       return {};
     }
 
-    const outputParser = new StringOutputParser();
+    const outputParser = new JsonOutputParser();
     const matchPrebuiltRule = MATCH_PREBUILT_RULE_PROMPT.pipe(model).pipe(outputParser);
 
-    const elasticSecurityRules = Array(filteredPrebuiltRulesMap.keys()).join('\n');
-    const response = await matchPrebuiltRule.invoke({
+    const elasticSecurityRules = [...filteredPrebuiltRulesMap.keys()].join('\n');
+    const response = (await matchPrebuiltRule.invoke({
       elasticSecurityRules,
       ruleTitle: state.original_rule.title,
-    });
-    const cleanResponse = response.trim();
-    if (cleanResponse === 'no_match') {
-      return {};
-    }
-
-    const result = filteredPrebuiltRulesMap.get(cleanResponse);
-    if (result != null) {
-      return {
-        elastic_rule: {
-          title: result.rule.name,
-          description: result.rule.description,
-          prebuilt_rule_id: result.rule.rule_id,
-          id: result.installedRuleId,
-        },
-      };
+    })) as GetMatchedRuleResponse;
+    if (response.match) {
+      const result = filteredPrebuiltRulesMap.get(response.match);
+      if (result != null) {
+        return {
+          elastic_rule: {
+            title: result.rule.name,
+            description: result.rule.description,
+            prebuilt_rule_id: result.rule.rule_id,
+            id: result.installedRuleId,
+          },
+        };
+      }
     }
 
     return {};

diff --git a/...solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts b/...solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts
@@ -11,25 +11,39 @@ export const MATCH_PREBUILT_RULE_PROMPT = ChatPromptTemplate.fromMessages([
     'system',
     `You are an expert assistant in Cybersecurity, your task is to help migrating a SIEM detection rule, from Splunk Security to Elastic Security.
 You will be provided with a Splunk Detection Rule name by the user, your goal is to try find an Elastic Detection Rule that covers the same threat, if any.
-The list of Elastic Detection Rules suggested is provided in the context below.
+Here are some context for you to reference for your task, read it carefully as you will get questions about it later:
 
-Guidelines:
-If there is no Elastic rule in the list that covers the same threat, answer only with the string: no_match
-If there is one Elastic rule in the list that covers the same threat, answer only with its name without any further explanation.
-If there are multiple rules in the list that cover the same threat, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation".
-
-<ELASTIC_DETECTION_RULE_NAMES>
+<context>
+<elastic_detection_rule_names>
 {elasticSecurityRules}
-</ELASTIC_DETECTION_RULE_NAMES>
+</elastic_detection_rule_names>
+</context>
 `,
   ],
   [
     'human',
-    `The Splunk Detection Rule is:
-<<SPLUNK_RULE_TITLE>>
+    `See the below description of the relevant splunk rule and try to match it with any of the elastic detection rules with similar names.     
+<splunk_rule_name>
 {ruleTitle}
-<<SPLUNK_RULE_TITLE>>
+</splunk_rule_name>
+
+<guidelines>
+- Always reply with a JSON object with the key "match" and the value being the most relevant matched elastic detection rule name. Do not reply with anything else.
+- Only reply with exact matches, if you are unsure or do not find a very confident match, always reply with an empty string value in the match key, do not guess or reply with anything else.
+- If there is one Elastic rule in the list that covers the same threat, set the name of the matching rule as a value of the match key. Do not reply with anything else.
+- If there are multiple rules in the list that cover the same threat, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation".
+</guidelines>
+
+<example_response>
+U: <splunk_rule_name>
+Linux Auditd Add User Account Type
+</splunk_rule_name>
+A: Please find the match JSON object below:
+\`\`\`json
+{{"match": "Linux User Account Creation"}}
+\`\`\`
+</example_response>
 `,
   ],
-  ['ai', 'Please find the answer below:'],
+  ['ai', 'Please find the match JSON object below:'],
 ]);
diff --git a/...igrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/process_query.ts b/...igrations/rules/task/agent/sub_graphs/translate_rule/nodes/process_query/process_query.ts
@@ -10,7 +10,7 @@ import { isEmpty } from 'lodash/fp';
 import type { ChatModel } from '../../../../../util/actions_client_chat';
 import type { RuleResourceRetriever } from '../../../../../util/rule_resource_retriever';
 import type { GraphNode } from '../../types';
-import { getReplaceQueryResourcesPrompt } from './prompts';
+import { REPLACE_QUERY_RESOURCE_PROMPT, getResourcesContext } from './prompts';
 
 interface GetProcessQueryNodeParams {
   model: ChatModel;
@@ -25,9 +25,15 @@ export const getProcessQueryNode = ({
     let query = state.original_rule.query;
     const resources = await resourceRetriever.getResources(state.original_rule);
     if (!isEmpty(resources)) {
-      const replaceQueryResourcesPrompt = getReplaceQueryResourcesPrompt(state, resources);
-      const stringParser = new StringOutputParser();
-      query = await model.pipe(stringParser).invoke(replaceQueryResourcesPrompt);
+      const replaceQueryParser = new StringOutputParser();
+      const replaceQueryResourcePrompt =
+        REPLACE_QUERY_RESOURCE_PROMPT.pipe(model).pipe(replaceQueryParser);
+      const resourceContext = getResourcesContext(resources);
+      query = await replaceQueryResourcePrompt.invoke({
+        query: state.original_rule.query,
+        macros: resourceContext.macros,
+        lookup_tables: resourceContext.lists,
+      });
     }
     return { inline_query: query };
   };