From 2a8b6d0a4490fbf09ccd3d03ab1b3a1fcfa9ec1c Mon Sep 17 00:00:00 2001 From: Ilya Nikokoshev Date: Thu, 22 Aug 2024 01:25:12 +0000 Subject: [PATCH] [Automatic Import] Better recognize (ND)JSON formats and send samplesFormat to the backend (#190588) ## Summary This adds a `samplesFormat` group to the API. This group is filled out by the frontend when parsing the provided samples and used to set the log parsing specification for the produced integration. We check this parameter to add toggle to support multiline newline-delimited JSON in the filestream input. ## Release note Automatic Import now supports the 'multiline newline-delimited JSON' log sample format for the Filestream input. ## Detailed Explanation We add the optional `samplesFormat` group to the API, consisting of - `name`, - (optional) `multiline`, - and (optional) `json_path`. Example values of this parameter: - `{ name: 'ndjson', multiline: false }` for a newline-delimited JSON, known as [NDJSON](https://github.com/ndjson/ndjson-spec) (where each entry only takes one line) - `{ name: 'ndjson', multiline: true }` for newline-delimited JSON where each entry can span multiline lines - `{ name: 'json', json_path: [] }` for valid JSON with the structure `[{"key": "message1"}, {"key": "message2"}]` - `{ name: 'json', json_path: ['events'] }` for valid JSON with the structure `{"events": [{"key": "message1"}, {"key": "message2"}]}` The `json_path` parameter is only relevant for `name: 'json'` and refers to the path in the original JSON to the array representing the events to ingest. Currently only one level is recognized: Not all combinations of a log format with input type will work; more supported combinations as well as better user feedback on unsupported combinations will come later (see https://github.com/elastic/security-team/issues/10290). In this PR we add support for the multiline NDJSON format for the `fileinput` input type. This support comes in the form of the user-changeable toggle under "Advanced Settings" that will be set to on in cases where we multiline NDJSON format --------- Co-authored-by: Marius Iversen Co-authored-by: Elastic Machine --- .../__jest__/fixtures/build_integration.ts | 1 + .../__jest__/fixtures/ecs_mapping.ts | 2 +- .../common/api/model/api_test.mock.ts | 1 + .../api/model/common_attributes.schema.yaml | 28 +++ .../common/api/model/common_attributes.ts | 32 +++- .../integration_assistant/common/index.ts | 1 + .../mocks/state.ts | 1 + .../sample_logs_input.test.tsx | 167 +++++++++++++++++- .../data_stream_step/sample_logs_input.tsx | 113 +++++++++--- .../steps/deploy_step/deploy_step.test.tsx | 1 + .../deploy_step/use_deploy_integration.ts | 7 + .../create_integration_assistant/types.ts | 3 +- .../server/graphs/ecs/graph.ts | 2 +- .../server/graphs/ecs/pipeline.ts | 2 +- .../server/integration_builder/data_stream.ts | 3 + .../routes/build_integration_routes.test.ts | 1 + .../server/templates/agent/filestream.yml.hbs | 7 + .../manifest/filestream_manifest.yml.njk | 10 ++ .../integration_assistant/server/types.ts | 2 +- 19 files changed, 350 insertions(+), 34 deletions(-) diff --git a/x-pack/plugins/integration_assistant/__jest__/fixtures/build_integration.ts b/x-pack/plugins/integration_assistant/__jest__/fixtures/build_integration.ts index 78228d5a4cbca..3161f06f8a6ae 100644 --- a/x-pack/plugins/integration_assistant/__jest__/fixtures/build_integration.ts +++ b/x-pack/plugins/integration_assistant/__jest__/fixtures/build_integration.ts @@ -42,6 +42,7 @@ export const testIntegration: Integration = { }, ], }, + samplesFormat: { name: 'ndjson', multiline: false }, }, ], }; diff --git a/x-pack/plugins/integration_assistant/__jest__/fixtures/ecs_mapping.ts b/x-pack/plugins/integration_assistant/__jest__/fixtures/ecs_mapping.ts index d9195f3eca1a9..112f9f2348dec 100644 --- a/x-pack/plugins/integration_assistant/__jest__/fixtures/ecs_mapping.ts +++ b/x-pack/plugins/integration_assistant/__jest__/fixtures/ecs_mapping.ts @@ -446,7 +446,7 @@ export const ecsTestState = { missingKeys: [], invalidEcsFields: [], results: { test: 'testresults' }, - logFormat: 'testlogformat', + samplesFormat: 'testsamplesFormat', ecsVersion: 'testversion', currentMapping: { test1: 'test1' }, lastExecutedChain: 'testchain', diff --git a/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts b/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts index 92208abd04832..e0205a231babd 100644 --- a/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts +++ b/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts @@ -26,6 +26,7 @@ export const getDataStreamMock = (): DataStream => ({ ], rawSamples, pipeline: getPipelineMock(), + samplesFormat: { name: 'ndjson', multiline: false }, }); export const getIntegrationMock = (): Integration => ({ diff --git a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml index 6ded459c876a1..7839a2dd3eaf7 100644 --- a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml +++ b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml @@ -36,6 +36,30 @@ components: items: type: object + SamplesFormatName: + type: string + description: The name of the log samples format. + enum: + - ndjson + - json + + SamplesFormat: + type: object + description: Format of the provided log samples. + required: + - name + properties: + name: + $ref: "#/components/schemas/SamplesFormatName" + multiline: + type: boolean + description: For some formats, specifies whether the samples can be multiline. + json_path: + type: array + description: For a JSON format, describes how to get to the sample array from the root of the JSON. + items: + type: string + Pipeline: type: object description: The pipeline object. @@ -92,6 +116,7 @@ components: - rawSamples - pipeline - docs + - samplesFormat properties: name: type: string @@ -116,6 +141,9 @@ components: docs: $ref: "#/components/schemas/Docs" description: The documents of the dataStream. + samplesFormat: + $ref: "#/components/schemas/SamplesFormat" + description: The format of log samples in this dataStream. Integration: type: object diff --git a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.ts b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.ts index 1c5bcf970a1b4..07d5323dc0969 100644 --- a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.ts +++ b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.ts @@ -45,6 +45,30 @@ export const Connector = z.string(); export type Docs = z.infer; export const Docs = z.array(z.object({}).passthrough()); +/** + * The name of the log samples format. + */ +export type SamplesFormatName = z.infer; +export const SamplesFormatName = z.enum(['ndjson', 'json']); +export type SamplesFormatNameEnum = typeof SamplesFormatName.enum; +export const SamplesFormatNameEnum = SamplesFormatName.enum; + +/** + * Format of the provided log samples. + */ +export type SamplesFormat = z.infer; +export const SamplesFormat = z.object({ + name: SamplesFormatName, + /** + * For some formats, specifies whether the samples can be multiline. + */ + multiline: z.boolean().optional(), + /** + * For a JSON format, describes how to get to the sample array from the root of the JSON. + */ + json_path: z.array(z.string()).optional(), +}); + /** * The pipeline object. */ @@ -128,6 +152,10 @@ export const DataStream = z.object({ * The documents of the dataStream. */ docs: Docs, + /** + * The format of log samples in this dataStream. + */ + samplesFormat: SamplesFormat, }); /** @@ -163,11 +191,11 @@ export const Integration = z.object({ export type LangSmithOptions = z.infer; export const LangSmithOptions = z.object({ /** - * The project name to use with tracing. + * The project name. */ projectName: z.string(), /** - * The api key for the project + * The apiKey to use for tracing. */ apiKey: z.string(), }); diff --git a/x-pack/plugins/integration_assistant/common/index.ts b/x-pack/plugins/integration_assistant/common/index.ts index c49e2825d8206..6a473d976fa88 100644 --- a/x-pack/plugins/integration_assistant/common/index.ts +++ b/x-pack/plugins/integration_assistant/common/index.ts @@ -22,6 +22,7 @@ export type { Integration, Pipeline, Docs, + SamplesFormat, } from './api/model/common_attributes'; export type { ESProcessorItem } from './api/model/processor_attributes'; diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/mocks/state.ts b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/mocks/state.ts index 3b356c39dea7f..aa310f034290c 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/mocks/state.ts +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/mocks/state.ts @@ -420,6 +420,7 @@ export const mockState: State = { dataStreamDescription: 'Mocked Data Stream Description', inputTypes: ['filestream'], logsSampleParsed: rawSamples, + samplesFormat: { name: 'ndjson', multiline: false }, }, isGenerating: false, result, diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx index a137933afed3f..4c15aa8a4785c 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx @@ -8,7 +8,7 @@ import React from 'react'; import { act, fireEvent, render, waitFor, type RenderResult } from '@testing-library/react'; import { TestProvider } from '../../../../../mocks/test_provider'; -import { SampleLogsInput } from './sample_logs_input'; +import { parseNDJSON, parseJSONArray, SampleLogsInput } from './sample_logs_input'; import { ActionsProvider } from '../../state'; import { mockActions } from '../../mocks/state'; import { mockServices } from '../../../../../services/mocks/services'; @@ -27,6 +27,119 @@ const changeFile = async (input: HTMLElement, file: File) => { }); }; +const simpleNDJSON = `{"message":"test message 1"}\n{"message":"test message 2"}`; +const multilineNDJSON = `{"message":"test message 1"}\n\n{\n "message":\n "test message 2"\n}\n\n`; +const splitNDJSON = simpleNDJSON.split('\n'); +const complexEventsJSON = `{"events":[\n{"message":"test message 1"},\n{"message":"test message 2"}\n]}`; +const nonIdentifierLikeKeyInJSON = `{"1event":[\n{"message":"test message 1"},\n{"message":"test message 2"}\n]}`; + +describe('parseNDJSON', () => { + const content = [{ message: 'test message 1' }, { message: 'test message 2' }]; + const validNDJSONWithSpaces = `{"message":"test message 1"} + {"message":"test message 2"}`; + const singlelineArray = '[{"message":"test message 1"}, {"message":"test message 2"}]'; + const multilineArray = '[{"message":"test message 1"},\n{"message":"test message 2"}]'; + + it('should parse valid NDJSON', () => { + expect(parseNDJSON(simpleNDJSON, false)).toEqual(content); + expect(parseNDJSON(simpleNDJSON, true)).toEqual(content); + }); + + it('should parse valid NDJSON with extra spaces in single-line mode', () => { + expect(parseNDJSON(validNDJSONWithSpaces, false)).toEqual(content); + }); + + it('should not parse valid NDJSON with extra spaces in multiline mode', () => { + expect(() => parseNDJSON(validNDJSONWithSpaces, true)).toThrow(); + }); + + it('should not parse multiline NDJSON in single-line mode', () => { + expect(() => parseNDJSON(multilineNDJSON, false)).toThrow(); + }); + + it('should parse multiline NDJSON in multiline mode', () => { + expect(parseNDJSON(multilineNDJSON, true)).toEqual(content); + }); + + it('should parse single-line JSON Array', () => { + expect(parseNDJSON(singlelineArray, false)).toEqual([content]); + expect(parseNDJSON(singlelineArray, true)).toEqual([content]); + }); + + it('should not parse a multi-line JSON Array', () => { + expect(() => parseNDJSON(multilineArray, false)).toThrow(); + expect(() => parseNDJSON(multilineArray, true)).toThrow(); + }); + + it('should parse single-line JSON with one entry', () => { + const fileContent = '{"message":"test message 1"}'; + expect(parseNDJSON(fileContent)).toEqual([{ message: 'test message 1' }]); + }); + + it('should handle empty content', () => { + expect(parseNDJSON(' ', false)).toEqual([]); + expect(parseNDJSON(' ', true)).toEqual([]); + }); + + it('should handle empty lines in file content', () => { + const fileContent = '\n\n{"message":"test message 1"}\n\n{"message":"test message 2"}\n\n'; + expect(parseNDJSON(fileContent, false)).toEqual(content); + expect(parseNDJSON(fileContent, true)).toEqual(content); + }); +}); + +describe('parseJSONArray', () => { + const content = [{ message: 'test message 1' }, { message: 'test message 2' }]; + const singlelineArray = '[{"message":"test message 1"},{"message":"test message 2"}]'; + const multilineArray = '[{"message":"test message 1"},\n{"message":"test message 2"}]'; + const multilineWithSpacesArray = + ' [ \n\n{"message": "test message 1"},\n{"message" :\n\n"test message 2"}\n]\n'; + const malformedJSON = '[{"message":"test message 1"}'; + + it('should parse valid JSON array', () => { + const expected = { + entries: content, + pathToEntries: [], + errorNoArrayFound: false, + }; + expect(parseJSONArray(singlelineArray)).toEqual(expected); + expect(parseJSONArray(multilineArray)).toEqual(expected); + expect(parseJSONArray(multilineWithSpacesArray)).toEqual(expected); + }); + + it('should parse valid JSON object with array entries', () => { + const expected = { + entries: content, + pathToEntries: ['events'], + errorNoArrayFound: false, + }; + expect(parseJSONArray(complexEventsJSON)).toEqual(expected); + }); + + it('should pass even if the JSON object with array entries has not an identifier-like key', () => { + const expected = { + entries: content, + pathToEntries: ['1event'], + errorNoArrayFound: false, + }; + expect(parseJSONArray(nonIdentifierLikeKeyInJSON)).toEqual(expected); + }); + + it('should return error for JSON that does not contain an array', () => { + const fileContent = '{"records" : {"message": "test message 1"}}'; + const expected = { + entries: [], + pathToEntries: [], + errorNoArrayFound: true, + }; + expect(parseJSONArray(fileContent)).toEqual(expected); + }); + + it('should throw an error for invalid JSON object', () => { + expect(() => parseJSONArray(malformedJSON)).toThrow(); + }); +}); + describe('SampleLogsInput', () => { let result: RenderResult; let input: HTMLElement; @@ -49,6 +162,7 @@ describe('SampleLogsInput', () => { it('should set the integrationSetting correctly', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: logsSampleRaw.split(','), + samplesFormat: { name: 'json', json_path: [] }, }); }); @@ -61,6 +175,7 @@ describe('SampleLogsInput', () => { it('should truncate the logs sample', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: tooLargeLogsSample.split(',').slice(0, 10), + samplesFormat: { name: 'json', json_path: [] }, }); }); it('should add a notification toast', () => { @@ -71,6 +186,19 @@ describe('SampleLogsInput', () => { }); }); + describe('when the file is a json array under a key', () => { + beforeEach(async () => { + await changeFile(input, new File([complexEventsJSON], 'test.json', { type })); + }); + + it('should set the integrationSetting correctly', () => { + expect(mockActions.setIntegrationSettings).toBeCalledWith({ + logsSampleParsed: splitNDJSON, + samplesFormat: { name: 'json', json_path: ['events'] }, + }); + }); + }); + describe('when the file is invalid', () => { describe.each([ [ @@ -91,6 +219,7 @@ describe('SampleLogsInput', () => { it('should set the integrationSetting correctly', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: undefined, + samplesFormat: undefined, }); }); }); @@ -101,19 +230,19 @@ describe('SampleLogsInput', () => { const type = 'application/x-ndjson'; describe('when the file is valid ndjson', () => { - const logsSampleRaw = `{"message":"test message 1"}\n{"message":"test message 2"}`; beforeEach(async () => { - await changeFile(input, new File([logsSampleRaw], 'test.json', { type })); + await changeFile(input, new File([simpleNDJSON], 'test.json', { type })); }); it('should set the integrationSetting correctly', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ - logsSampleParsed: logsSampleRaw.split('\n'), + logsSampleParsed: splitNDJSON, + samplesFormat: { name: 'ndjson', multiline: false }, }); }); describe('when the file has too many rows', () => { - const tooLargeLogsSample = Array(6).fill(logsSampleRaw).join('\n'); // 12 entries + const tooLargeLogsSample = Array(6).fill(simpleNDJSON).join('\n'); // 12 entries beforeEach(async () => { await changeFile(input, new File([tooLargeLogsSample], 'test.json', { type })); }); @@ -121,6 +250,7 @@ describe('SampleLogsInput', () => { it('should truncate the logs sample', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: tooLargeLogsSample.split('\n').slice(0, 10), + samplesFormat: { name: 'ndjson', multiline: false }, }); }); it('should add a notification toast', () => { @@ -131,6 +261,32 @@ describe('SampleLogsInput', () => { }); }); + describe('when the file is a an ndjson with a single record', () => { + beforeEach(async () => { + await changeFile(input, new File([multilineNDJSON.split('\n')[0]], 'test.json', { type })); + }); + + it('should set the integrationSetting correctly', () => { + expect(mockActions.setIntegrationSettings).toBeCalledWith({ + logsSampleParsed: [splitNDJSON[0]], + samplesFormat: { name: 'ndjson', multiline: false }, + }); + }); + }); + + describe('when the file is multiline ndjson', () => { + beforeEach(async () => { + await changeFile(input, new File([multilineNDJSON], 'test.json', { type })); + }); + + it('should set the integrationSetting correctly', () => { + expect(mockActions.setIntegrationSettings).toBeCalledWith({ + logsSampleParsed: splitNDJSON, + samplesFormat: { name: 'ndjson', multiline: true }, + }); + }); + }); + describe('when the file is invalid', () => { describe.each([ [ @@ -151,6 +307,7 @@ describe('SampleLogsInput', () => { it('should set the integrationSetting correctly', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: undefined, + samplesFormat: undefined, }); }); }); diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx index cb4f735cc707c..fd9c2e3f8c362 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx @@ -12,46 +12,105 @@ import { isPlainObject } from 'lodash/fp'; import type { IntegrationSettings } from '../../types'; import * as i18n from './translations'; import { useActions } from '../../state'; +import type { SamplesFormat } from '../../../../../../common'; const MaxLogsSampleRows = 10; +/** + * Parse the logs sample file content as newiline-delimited JSON (NDJSON). + * + * This supports multiline JSON objects if passed multiline flag. + * Note that in that case the { character must happen at the beginning of the + * line if and only if it denotes the start of a new JSON object. Thus some + * inputs that will be parsed as NDJSON without the multiline flag will _not_ be + * parsed as NDJSON with the multiline flag. + */ +export const parseNDJSON = (fileContent: string, multiline: boolean = false): unknown[] => { + const separator = multiline ? /\n(?=\{)/ : '\n'; + + return fileContent + .split(separator) // For multiline, split at newline followed by '{'. + .filter((entry) => entry.trim() !== '') // Remove empty entries. + .map((entry) => JSON.parse(entry)); // Parse each entry as JSON. +}; + +/** + * Parse the logs sample file content as a JSON, find an array of entries there. + * + * If the JSON object can be parsed, but is not an array, we try to find a candidate + * among the dictionary keys (it must be identifier-like and its value must be an array). + * + * @returns Both the parsed entries and the path to the entries in the JSON object in case of + * success. Otherwise, an errorNoArrayFound if appropriate. If the parsing failed, raises an error. + */ +export const parseJSONArray = ( + fileContent: string +): { entries: unknown[]; pathToEntries: string[]; errorNoArrayFound: boolean } => { + const jsonContent = JSON.parse(fileContent); + if (Array.isArray(jsonContent)) { + return { entries: jsonContent, pathToEntries: [], errorNoArrayFound: false }; + } + if (typeof jsonContent === 'object' && jsonContent !== null) { + const arrayKeys = Object.keys(jsonContent).filter((key) => Array.isArray(jsonContent[key])); + if (arrayKeys.length === 1) { + const key = arrayKeys[0]; + return { + entries: jsonContent[key], + pathToEntries: [key], + errorNoArrayFound: false, + }; + } + } + return { errorNoArrayFound: true, entries: [], pathToEntries: [] }; +}; + /** * Parse the logs sample file content (json or ndjson) and return the parsed logs sample */ const parseLogsContent = ( fileContent: string | undefined -): { error?: string; isTruncated?: boolean; logsSampleParsed?: string[] } => { +): { + error?: string; + isTruncated?: boolean; + logsSampleParsed?: string[]; + samplesFormat?: SamplesFormat; +} => { if (fileContent == null) { return { error: i18n.LOGS_SAMPLE_ERROR.CAN_NOT_READ }; } - let parsedContent; + let parsedContent: unknown[]; + let samplesFormat: SamplesFormat; + try { - parsedContent = fileContent - .split('\n') - .filter((line) => line.trim() !== '') - .map((line) => JSON.parse(line)); + parsedContent = parseNDJSON(fileContent); // Special case for files that can be parsed as both JSON and NDJSON: - // for a one-line array [] -> extract its contents - // for a one-line object {} -> do nothing - if ( - Array.isArray(parsedContent) && - parsedContent.length === 1 && - Array.isArray(parsedContent[0]) - ) { + // for a one-line array [] -> extract its contents (it's a JSON) + // for a one-line object {} -> do nothing (keep as NDJSON) + if (parsedContent.length === 1 && Array.isArray(parsedContent[0])) { parsedContent = parsedContent[0]; + samplesFormat = { name: 'json', json_path: [] }; + } else { + samplesFormat = { name: 'ndjson', multiline: false }; } } catch (parseNDJSONError) { try { - parsedContent = JSON.parse(fileContent); + const { entries, pathToEntries, errorNoArrayFound } = parseJSONArray(fileContent); + if (errorNoArrayFound) { + return { error: i18n.LOGS_SAMPLE_ERROR.NOT_ARRAY }; + } + parsedContent = entries; + samplesFormat = { name: 'json', json_path: pathToEntries }; } catch (parseJSONError) { - return { error: i18n.LOGS_SAMPLE_ERROR.CAN_NOT_PARSE }; + try { + parsedContent = parseNDJSON(fileContent, true); + samplesFormat = { name: 'ndjson', multiline: true }; + } catch (parseMultilineNDJSONError) { + return { error: i18n.LOGS_SAMPLE_ERROR.CAN_NOT_PARSE }; + } } } - if (!Array.isArray(parsedContent)) { - return { error: i18n.LOGS_SAMPLE_ERROR.NOT_ARRAY }; - } if (parsedContent.length === 0) { return { error: i18n.LOGS_SAMPLE_ERROR.EMPTY }; } @@ -67,7 +126,7 @@ const parseLogsContent = ( } const logsSampleParsed = parsedContent.map((log) => JSON.stringify(log)); - return { isTruncated, logsSampleParsed }; + return { isTruncated, logsSampleParsed, samplesFormat }; }; interface SampleLogsInputProps { @@ -84,18 +143,27 @@ export const SampleLogsInput = React.memo(({ integrationSe const logsSampleFile = files?.[0]; if (logsSampleFile == null) { setSampleFileError(undefined); - setIntegrationSettings({ ...integrationSettings, logsSampleParsed: undefined }); + setIntegrationSettings({ + ...integrationSettings, + logsSampleParsed: undefined, + samplesFormat: undefined, + }); return; } const reader = new FileReader(); reader.onload = function (e) { const fileContent = e.target?.result as string | undefined; // We can safely cast to string since we call `readAsText` to load the file. - const { error, isTruncated, logsSampleParsed } = parseLogsContent(fileContent); + const { error, isTruncated, logsSampleParsed, samplesFormat } = + parseLogsContent(fileContent); setIsParsing(false); setSampleFileError(error); if (error) { - setIntegrationSettings({ ...integrationSettings, logsSampleParsed: undefined }); + setIntegrationSettings({ + ...integrationSettings, + logsSampleParsed: undefined, + samplesFormat: undefined, + }); return; } @@ -106,6 +174,7 @@ export const SampleLogsInput = React.memo(({ integrationSe setIntegrationSettings({ ...integrationSettings, logsSampleParsed, + samplesFormat, }); }; setIsParsing(true); diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/deploy_step.test.tsx b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/deploy_step.test.tsx index 094d4bd37ad31..d4920ba927d20 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/deploy_step.test.tsx +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/deploy_step.test.tsx @@ -34,6 +34,7 @@ const parameters: BuildIntegrationRequestBody = { rawSamples: integrationSettings.logsSampleParsed!, docs: results.docs!, pipeline: results.pipeline, + samplesFormat: integrationSettings.samplesFormat!, }, ], }, diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/use_deploy_integration.ts b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/use_deploy_integration.ts index 7e12cdad8f611..c1451a9d81a9d 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/use_deploy_integration.ts +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/use_deploy_integration.ts @@ -46,6 +46,12 @@ export const useDeployIntegration = ({ (async () => { try { + if (integrationSettings.samplesFormat == null) { + throw new Error( + 'Logic error: samplesFormat is required and cannot be null or undefined when creating integration.' + ); + } + const parameters: BuildIntegrationRequestBody = { integration: { title: integrationSettings.title ?? '', @@ -61,6 +67,7 @@ export const useDeployIntegration = ({ rawSamples: integrationSettings.logsSampleParsed ?? [], docs: result.docs ?? [], pipeline: result.pipeline, + samplesFormat: integrationSettings.samplesFormat, }, ], }, diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/types.ts b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/types.ts index ec0ea443d37c7..c924415ec53e1 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/types.ts +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/types.ts @@ -8,7 +8,7 @@ import type { OpenAiProviderType } from '@kbn/stack-connectors-plugin/public/common'; import type { ActionConnector } from '@kbn/triggers-actions-ui-plugin/public'; import type { UserConfiguredActionConnector } from '@kbn/triggers-actions-ui-plugin/public/types'; -import type { InputType } from '../../../../common'; +import type { InputType, SamplesFormat } from '../../../../common'; interface GenAiConfig { apiUrl?: string; @@ -34,4 +34,5 @@ export interface IntegrationSettings { dataStreamName?: string; inputTypes?: InputType[]; logsSampleParsed?: string[]; + samplesFormat?: SamplesFormat; } diff --git a/x-pack/plugins/integration_assistant/server/graphs/ecs/graph.ts b/x-pack/plugins/integration_assistant/server/graphs/ecs/graph.ts index 2c8e7283d4728..1d02f3c8970d8 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/ecs/graph.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/ecs/graph.ts @@ -82,7 +82,7 @@ const graphState: StateGraphArgs['channels'] = { value: (x: object, y?: object) => y ?? x, default: () => ({}), }, - logFormat: { + samplesFormat: { value: (x: string, y?: string) => y ?? x, default: () => 'json', }, diff --git a/x-pack/plugins/integration_assistant/server/graphs/ecs/pipeline.ts b/x-pack/plugins/integration_assistant/server/graphs/ecs/pipeline.ts index d925f443873e4..0dc7e772a94cf 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/ecs/pipeline.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/ecs/pipeline.ts @@ -173,7 +173,7 @@ export function createPipeline(state: EcsMappingState): IngestPipeline { ecs_version: state.ecsVersion, package_name: state.packageName, data_stream_name: state.dataStreamName, - log_format: state.logFormat, + log_format: state.samplesFormat, fields_to_remove: fieldsToRemove, }; const templatesPath = joinPath(__dirname, '../../templates'); diff --git a/x-pack/plugins/integration_assistant/server/integration_builder/data_stream.ts b/x-pack/plugins/integration_assistant/server/integration_builder/data_stream.ts index 3fb1fa21dc753..02b3f12f53d68 100644 --- a/x-pack/plugins/integration_assistant/server/integration_builder/data_stream.ts +++ b/x-pack/plugins/integration_assistant/server/integration_builder/data_stream.ts @@ -19,6 +19,8 @@ export function createDataStream( const pipelineDir = joinPath(specificDataStreamDir, 'elasticsearch', 'ingest_pipeline'); const title = dataStream.title; const description = dataStream.description; + const samplesFormat = dataStream.samplesFormat; + const useMultilineNDJSON = samplesFormat.name === 'ndjson' && samplesFormat.multiline === true; ensureDirSync(specificDataStreamDir); createDataStreamFolders(specificDataStreamDir, pipelineDir); @@ -31,6 +33,7 @@ export function createDataStream( data_stream_description: description, package_name: packageName, data_stream_name: dataStreamName, + multiline_ndjson: useMultilineNDJSON, }; const dataStreamManifest = nunjucks.render( `${inputType.replaceAll('-', '_')}_manifest.yml.njk`, diff --git a/x-pack/plugins/integration_assistant/server/routes/build_integration_routes.test.ts b/x-pack/plugins/integration_assistant/server/routes/build_integration_routes.test.ts index 3b73e4afb1c94..b57dd670df03f 100644 --- a/x-pack/plugins/integration_assistant/server/routes/build_integration_routes.test.ts +++ b/x-pack/plugins/integration_assistant/server/routes/build_integration_routes.test.ts @@ -40,6 +40,7 @@ describe('registerIntegrationBuilderRoutes', () => { processors: [{ script: { source: {} } }], }, docs: [], + samplesFormat: { name: 'ndjson', multiline: false }, }, ], }, diff --git a/x-pack/plugins/integration_assistant/server/templates/agent/filestream.yml.hbs b/x-pack/plugins/integration_assistant/server/templates/agent/filestream.yml.hbs index 437accfc32650..5732a1e67e41f 100644 --- a/x-pack/plugins/integration_assistant/server/templates/agent/filestream.yml.hbs +++ b/x-pack/plugins/integration_assistant/server/templates/agent/filestream.yml.hbs @@ -8,6 +8,13 @@ prospector.scanner.exclude_files: - {{pattern}} {{/each}} {{/if}} +{{#if multiline_json}} +multiline.pattern: '^{' +multiline.negate: true +multiline.match: after +multiline.max_lines: 5000 +multiline.timeout: 10 +{{/if}} {{#if custom}} {{custom}} {{/if}} \ No newline at end of file diff --git a/x-pack/plugins/integration_assistant/server/templates/manifest/filestream_manifest.yml.njk b/x-pack/plugins/integration_assistant/server/templates/manifest/filestream_manifest.yml.njk index f4e0781a76c27..28870a2e2d338 100644 --- a/x-pack/plugins/integration_assistant/server/templates/manifest/filestream_manifest.yml.njk +++ b/x-pack/plugins/integration_assistant/server/templates/manifest/filestream_manifest.yml.njk @@ -22,6 +22,16 @@ show_user: true default: - '\.gz$' + {% if multiline_ndjson %} + - name: multiline_ndjson + type: bool + title: Parse multiline JSON events + description: >- + Enables parsing of newline-delimited JSON-formatted events that take more than one line. Each event must start with the curly brace at the first column. + required: false + show_user: false + default: true + {% endif %} - name: custom type: yaml title: Additional Filestream Configuration Options diff --git a/x-pack/plugins/integration_assistant/server/types.ts b/x-pack/plugins/integration_assistant/server/types.ts index f98686145690e..3bbe25a8fbd0f 100644 --- a/x-pack/plugins/integration_assistant/server/types.ts +++ b/x-pack/plugins/integration_assistant/server/types.ts @@ -71,7 +71,7 @@ export interface EcsMappingState { missingKeys: string[]; invalidEcsFields: string[]; results: object; - logFormat: string; + samplesFormat: string; ecsVersion: string; }