From b6599227902c1ec13da98d104a1284ec048ee7d7 Mon Sep 17 00:00:00 2001 From: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> Date: Thu, 3 Oct 2024 20:42:06 +1000 Subject: [PATCH] [8.15] [Automatic Import] Fix Non-ecs compatible fields in grok processor (#194727) (#194792) # Backport This will backport the following commits from `main` to `8.15`: - [[Automatic Import] Fix Non-ecs compatible fields in grok processor (#194727)](https://github.com/elastic/kibana/pull/194727) ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport) Co-authored-by: Bharat Pasupula <123897612+bhapas@users.noreply.github.com> --- .../server/graphs/kv/constants.ts | 7 ++- .../server/graphs/kv/error.ts | 2 + .../server/graphs/kv/header.ts | 2 + .../server/graphs/kv/prompts.ts | 6 ++- .../server/graphs/kv/validate.ts | 51 +++++++++++-------- .../server/graphs/related/prompts.ts | 2 + .../server/graphs/unstructured/error.ts | 2 + .../server/graphs/unstructured/prompts.ts | 2 + .../server/graphs/unstructured/types.ts | 5 +- .../graphs/unstructured/unstructured.ts | 2 + .../graphs/unstructured/validate.test.ts | 12 +++-- .../server/graphs/unstructured/validate.ts | 11 ++-- .../server/templates/pipeline.yml.njk | 4 +- 13 files changed, 74 insertions(+), 34 deletions(-) diff --git a/x-pack/plugins/integration_assistant/server/graphs/kv/constants.ts b/x-pack/plugins/integration_assistant/server/graphs/kv/constants.ts index 92cc55841bb98..183898ec31354 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/kv/constants.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/kv/constants.ts @@ -17,12 +17,13 @@ export const KV_HEADER_EXAMPLE_ANSWER = { rfc: 'RFC2454', regex: '/(?:(d{4}[-]d{2}[-]d{2}[T]d{2}[:]d{2}[:]d{2}(?:.d{1,6})?(?:[+-]d{2}[:]d{2}|Z)?)|-)s(?:([w][wd.@-]*)|-)s(.*)$/', - grok_pattern: '%{WORD:key1}:%{WORD:value1};%{WORD:key2}:%{WORD:value2}:%{GREEDYDATA:message}', + grok_pattern: + '%{WORD:cisco.audit.key1}:%{WORD:cisco.audit.value1};%{WORD:cisco.audit.key2}:%{WORD:cisco.audit.value2}:%{GREEDYDATA:message}', }; export const KV_HEADER_ERROR_EXAMPLE_ANSWER = { grok_pattern: - '%{TIMESTAMP:timestamp}:%{WORD:value1};%{WORD:key2}:%{WORD:value2}:%{GREEDYDATA:message}', + '%{TIMESTAMP:cisco.audit.timestamp}:%{WORD:cisco.audit.value1};%{WORD:cisco.audit.key2}:%{WORD:cisco.audit.value2}:%{GREEDYDATA:message}', }; export const onFailure = { @@ -33,6 +34,8 @@ export const onFailure = { }, }; +export const removeProcessor = { remove: { field: 'message', ignore_missing: true } }; + export const COMMON_ERRORS = [ { error: 'field [message] does not contain value_split [=]', diff --git a/x-pack/plugins/integration_assistant/server/graphs/kv/error.ts b/x-pack/plugins/integration_assistant/server/graphs/kv/error.ts index b1b7c12a68d5a..dabaea0769442 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/kv/error.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/kv/error.ts @@ -46,6 +46,8 @@ export async function handleHeaderError({ const currentPattern = state.grokPattern; const pattern = await kvHeaderErrorGraph.invoke({ + packageName: state.packageName, + dataStreamName: state.dataStreamName, current_pattern: JSON.stringify(currentPattern, null, 2), errors: JSON.stringify(state.errors, null, 2), ex_answer: JSON.stringify(KV_HEADER_ERROR_EXAMPLE_ANSWER, null, 2), diff --git a/x-pack/plugins/integration_assistant/server/graphs/kv/header.ts b/x-pack/plugins/integration_assistant/server/graphs/kv/header.ts index 36d8968ab9e67..532bcfb9672c3 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/kv/header.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/kv/header.ts @@ -21,6 +21,8 @@ export async function handleHeader({ const pattern = await kvHeaderGraph.invoke({ samples: state.logSamples, + packageName: state.packageName, + dataStreamName: state.dataStreamName, ex_answer: JSON.stringify(KV_HEADER_EXAMPLE_ANSWER, null, 2), }); diff --git a/x-pack/plugins/integration_assistant/server/graphs/kv/prompts.ts b/x-pack/plugins/integration_assistant/server/graphs/kv/prompts.ts index 2ab1073a4ba8b..21889be26cfb2 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/kv/prompts.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/kv/prompts.ts @@ -79,8 +79,9 @@ Follow these steps to identify the header pattern: You ALWAYS follow these guidelines when writing your response: - - Do not parse the message part in the regex. Just the header part should be in regex nad grok_pattern. + - Do not parse the message part in the regex. Just the header part should be in regex and grok_pattern. - Make sure to map the remaining message body to \'message\' in grok pattern. + - Make sure to add \`{packageName}.{dataStreamName}\` as a prefix to each field in the pattern. Refer to example response. - Do not respond with anything except the processor as a JSON object enclosed with 3 backticks (\`), see example response above. Use strict JSON response format. @@ -121,8 +122,9 @@ Follow these steps to fix the errors in the header pattern: 4. Make sure the regex and grok pattern matches all the header information. Only the structured message body should be under GREEDYDATA in grok pattern. You ALWAYS follow these guidelines when writing your response: - - Do not parse the message part in the regex. Just the header part should be in regex nad grok_pattern. + - Do not parse the message part in the regex. Just the header part should be in regex and grok_pattern. - Make sure to map the remaining message body to \'message\' in grok pattern. + - Make sure to add \`{packageName}.{dataStreamName}\` as a prefix to each field in the pattern. Refer to example response. - Do not respond with anything except the processor as a JSON object enclosed with 3 backticks (\`), see example response above. Use strict JSON response format. diff --git a/x-pack/plugins/integration_assistant/server/graphs/kv/validate.ts b/x-pack/plugins/integration_assistant/server/graphs/kv/validate.ts index b0601de74aa5e..e130a69910076 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/kv/validate.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/kv/validate.ts @@ -10,10 +10,10 @@ import { ESProcessorItem } from '../../../common'; import type { KVState } from '../../types'; import type { HandleKVNodeParams } from './types'; import { testPipeline } from '../../util'; -import { onFailure } from './constants'; +import { onFailure, removeProcessor } from './constants'; import { createGrokProcessor } from '../../util/processors'; -interface KVResult { +interface StructuredLogResult { [packageName: string]: { [dataStreamName: string]: unknown }; } @@ -32,25 +32,24 @@ export async function handleKVValidate({ // Pick logSamples if there was no header detected. const samples = state.header ? state.kvLogMessages : state.logSamples; - - const { pipelineResults: kvOutputSamples, errors } = (await createJSONInput( - kvProcessor, - samples, - client, - state - )) as { pipelineResults: KVResult[]; errors: object[] }; - + const { errors } = await verifyKVProcessor(kvProcessor, samples, client); if (errors.length > 0) { return { errors, lastExecutedChain: 'kvValidate' }; } // Converts JSON Object into a string and parses it as a array of JSON strings - const jsonSamples = kvOutputSamples + const additionalProcessors = state.additionalProcessors; + additionalProcessors.push(kvProcessor[0]); + const samplesObject: StructuredLogResult[] = await buildJSONSamples( + state.logSamples, + additionalProcessors, + client + ); + + const jsonSamples = samplesObject .map((log) => log[packageName]) .map((log) => log[dataStreamName]) .map((log) => JSON.stringify(log)); - const additionalProcessors = state.additionalProcessors; - additionalProcessors.push(kvProcessor[0]); return { jsonSamples, @@ -89,15 +88,25 @@ export async function handleHeaderValidate({ }; } -async function createJSONInput( +async function verifyKVProcessor( kvProcessor: ESProcessorItem, formattedSamples: string[], - client: IScopedClusterClient, - state: KVState -): Promise<{ pipelineResults: object[]; errors: object[] }> { - // This processor removes the original message field in the JSON output - const removeProcessor = { remove: { field: 'message', ignore_missing: true } }; + client: IScopedClusterClient +): Promise<{ errors: object[] }> { + // This processor removes the original message field in the output const pipeline = { processors: [kvProcessor[0], removeProcessor], on_failure: [onFailure] }; - const { pipelineResults, errors } = await testPipeline(formattedSamples, pipeline, client); - return { pipelineResults, errors }; + const { errors } = await testPipeline(formattedSamples, pipeline, client); + return { errors }; +} + +async function buildJSONSamples( + samples: string[], + processors: object[], + client: IScopedClusterClient +): Promise { + const pipeline = { processors: [...processors, removeProcessor], on_failure: [onFailure] }; + const { pipelineResults } = (await testPipeline(samples, pipeline, client)) as { + pipelineResults: StructuredLogResult[]; + }; + return pipelineResults; } diff --git a/x-pack/plugins/integration_assistant/server/graphs/related/prompts.ts b/x-pack/plugins/integration_assistant/server/graphs/related/prompts.ts index 87947eb8763af..9fa50d5900806 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/related/prompts.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/related/prompts.ts @@ -33,6 +33,7 @@ For each pipeline result you find matching values that would fit any of the rela You ALWAYS follow these guidelines when writing your response: +- The \`message\` field may not be part of related fields. - You can use as many processor objects as needed to map all relevant pipeline result fields to any of the ECS related fields. - If no relevant fields or values are found that could be mapped confidently to any of the related fields, then respond with an empty array [] as valid JSON enclosed with 3 backticks (\`). - Do not respond with anything except the array of processors as a valid JSON objects enclosed with 3 backticks (\`), see example response below. @@ -79,6 +80,7 @@ Follow these steps to help resolve the current ingest pipeline issues: You ALWAYS follow these guidelines when writing your response: +- The \`message\` field may not be part of related fields. - Never use "split" in template values, only use the field name inside the triple brackets. If the error mentions "Improperly closed variable in query-template" then check each "value" field for any special characters and remove them. - If solving an error means removing the last processor in the list, then return an empty array [] as valid JSON enclosed with 3 backticks (\`). - Do not respond with anything except the complete updated array of processors as a valid JSON object enclosed with 3 backticks (\`), see example response below. diff --git a/x-pack/plugins/integration_assistant/server/graphs/unstructured/error.ts b/x-pack/plugins/integration_assistant/server/graphs/unstructured/error.ts index d002dd19d5439..c353ae4d24c43 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/unstructured/error.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/unstructured/error.ts @@ -20,6 +20,8 @@ export async function handleUnstructuredError({ const currentPatterns = state.grokPatterns; const pattern = await grokErrorGraph.invoke({ + packageName: state.packageName, + dataStreamName: state.dataStreamName, current_pattern: JSON.stringify(currentPatterns, null, 2), errors: JSON.stringify(state.errors, null, 2), ex_answer: JSON.stringify(GROK_ERROR_EXAMPLE_ANSWER, null, 2), diff --git a/x-pack/plugins/integration_assistant/server/graphs/unstructured/prompts.ts b/x-pack/plugins/integration_assistant/server/graphs/unstructured/prompts.ts index 5cf5c67135d53..7f19b2b0d28bc 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/unstructured/prompts.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/unstructured/prompts.ts @@ -39,6 +39,7 @@ export const GROK_MAIN_PROMPT = ChatPromptTemplate.fromMessages([ You ALWAYS follow these guidelines when writing your response: - Make sure to map the remaining message part to \'message\' in grok pattern. + - Make sure to add \`{packageName}.{dataStreamName}\` as a prefix to each field in the pattern. Refer to example response. - Do not respond with anything except the processor as a JSON object enclosed with 3 backticks (\`), see example response above. Use strict JSON response format. @@ -89,6 +90,7 @@ Follow these steps to help improve the grok patterns and apply it step by step: You ALWAYS follow these guidelines when writing your response: - Make sure to map the remaining message part to \'message\' in grok pattern. + - Make sure to add \`{packageName}.{dataStreamName}\` as a prefix to each field in the pattern. Refer to example response. - Do not respond with anything except the processor as a JSON object enclosed with 3 backticks (\`), see example response above. Use strict JSON response format. diff --git a/x-pack/plugins/integration_assistant/server/graphs/unstructured/types.ts b/x-pack/plugins/integration_assistant/server/graphs/unstructured/types.ts index 218d3856cb661..8c1a32d5d74d1 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/unstructured/types.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/unstructured/types.ts @@ -25,7 +25,10 @@ export interface HandleUnstructuredNodeParams extends UnstructuredNodeParams { } export interface GrokResult { - [key: string]: unknown; grok_patterns: string[]; message: string; } + +export interface LogResult { + [packageName: string]: { [dataStreamName: string]: unknown }; +} diff --git a/x-pack/plugins/integration_assistant/server/graphs/unstructured/unstructured.ts b/x-pack/plugins/integration_assistant/server/graphs/unstructured/unstructured.ts index 42186e796275f..c00d33a78b2d8 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/unstructured/unstructured.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/unstructured/unstructured.ts @@ -21,6 +21,8 @@ export async function handleUnstructured({ const samples = state.logSamples; const pattern = (await grokMainGraph.invoke({ + packageName: state.packageName, + dataStreamName: state.dataStreamName, samples: samples[0], ex_answer: JSON.stringify(GROK_EXAMPLE_ANSWER, null, 2), })) as GrokResult; diff --git a/x-pack/plugins/integration_assistant/server/graphs/unstructured/validate.test.ts b/x-pack/plugins/integration_assistant/server/graphs/unstructured/validate.test.ts index 493834e3220f9..5251fa1a730a9 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/unstructured/validate.test.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/unstructured/validate.test.ts @@ -28,9 +28,15 @@ describe('Testing unstructured validation without errors', () => { const client = { asCurrentUser: { ingest: { - simulate: jest - .fn() - .mockReturnValue({ docs: [{ doc: { _source: { message: 'dummy data' } } }] }), + simulate: jest.fn().mockReturnValue({ + docs: [ + { + doc: { + _source: { testPackage: { testDatastream: { message: 'dummy data' } } }, + }, + }, + ], + }), }, }, } as unknown as IScopedClusterClient; diff --git a/x-pack/plugins/integration_assistant/server/graphs/unstructured/validate.ts b/x-pack/plugins/integration_assistant/server/graphs/unstructured/validate.ts index 043e38be0983f..eea7602b641d6 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/unstructured/validate.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/unstructured/validate.ts @@ -6,7 +6,7 @@ */ import type { UnstructuredLogState } from '../../types'; -import type { GrokResult, HandleUnstructuredNodeParams } from './types'; +import type { HandleUnstructuredNodeParams, LogResult } from './types'; import { testPipeline } from '../../util'; import { onFailure } from './constants'; import { createGrokProcessor } from '../../util/processors'; @@ -18,9 +18,11 @@ export async function handleUnstructuredValidate({ const grokPatterns = state.grokPatterns; const grokProcessor = createGrokProcessor(grokPatterns); const pipeline = { processors: grokProcessor, on_failure: [onFailure] }; + const packageName = state.packageName; + const dataStreamName = state.dataStreamName; const { pipelineResults, errors } = (await testPipeline(state.logSamples, pipeline, client)) as { - pipelineResults: GrokResult[]; + pipelineResults: LogResult[]; errors: object[]; }; @@ -28,7 +30,10 @@ export async function handleUnstructuredValidate({ return { errors, lastExecutedChain: 'unstructuredValidate' }; } - const jsonSamples: string[] = pipelineResults.map((entry) => JSON.stringify(entry)); + const jsonSamples = pipelineResults + .map((log) => log[packageName]) + .map((log) => log[dataStreamName]) + .map((log) => JSON.stringify(log)); const additionalProcessors = state.additionalProcessors; additionalProcessors.push(grokProcessor[0]); diff --git a/x-pack/plugins/integration_assistant/server/templates/pipeline.yml.njk b/x-pack/plugins/integration_assistant/server/templates/pipeline.yml.njk index d583d68c4b733..ba846dc50fba9 100644 --- a/x-pack/plugins/integration_assistant/server/templates/pipeline.yml.njk +++ b/x-pack/plugins/integration_assistant/server/templates/pipeline.yml.njk @@ -19,11 +19,11 @@ processors: field: originalMessage ignore_missing: true tag: remove_copied_message - if: 'ctx.event?.original != null'{% if log_format != 'unstructured' %} + if: 'ctx.event?.original != null' - remove: field: message ignore_missing: true - tag: remove_message{% endif %}{% if (log_format == 'json') or (log_format == 'ndjson') %} + tag: remove_message{% if (log_format == 'json') or (log_format == 'ndjson') %} - json: field: event.original tag: json_original