diff --git a/x-pack/plugins/integration_assistant/__jest__/fixtures/build_integration.ts b/x-pack/plugins/integration_assistant/__jest__/fixtures/build_integration.ts index 78228d5a4cbca..3161f06f8a6ae 100644 --- a/x-pack/plugins/integration_assistant/__jest__/fixtures/build_integration.ts +++ b/x-pack/plugins/integration_assistant/__jest__/fixtures/build_integration.ts @@ -42,6 +42,7 @@ export const testIntegration: Integration = { }, ], }, + samplesFormat: { name: 'ndjson', multiline: false }, }, ], }; diff --git a/x-pack/plugins/integration_assistant/__jest__/fixtures/ecs_mapping.ts b/x-pack/plugins/integration_assistant/__jest__/fixtures/ecs_mapping.ts index d9195f3eca1a9..112f9f2348dec 100644 --- a/x-pack/plugins/integration_assistant/__jest__/fixtures/ecs_mapping.ts +++ b/x-pack/plugins/integration_assistant/__jest__/fixtures/ecs_mapping.ts @@ -446,7 +446,7 @@ export const ecsTestState = { missingKeys: [], invalidEcsFields: [], results: { test: 'testresults' }, - logFormat: 'testlogformat', + samplesFormat: 'testsamplesFormat', ecsVersion: 'testversion', currentMapping: { test1: 'test1' }, lastExecutedChain: 'testchain', diff --git a/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts b/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts index 92208abd04832..e0205a231babd 100644 --- a/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts +++ b/x-pack/plugins/integration_assistant/common/api/model/api_test.mock.ts @@ -26,6 +26,7 @@ export const getDataStreamMock = (): DataStream => ({ ], rawSamples, pipeline: getPipelineMock(), + samplesFormat: { name: 'ndjson', multiline: false }, }); export const getIntegrationMock = (): Integration => ({ diff --git a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml index 6ded459c876a1..7839a2dd3eaf7 100644 --- a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml +++ b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.schema.yaml @@ -36,6 +36,30 @@ components: items: type: object + SamplesFormatName: + type: string + description: The name of the log samples format. + enum: + - ndjson + - json + + SamplesFormat: + type: object + description: Format of the provided log samples. + required: + - name + properties: + name: + $ref: "#/components/schemas/SamplesFormatName" + multiline: + type: boolean + description: For some formats, specifies whether the samples can be multiline. + json_path: + type: array + description: For a JSON format, describes how to get to the sample array from the root of the JSON. + items: + type: string + Pipeline: type: object description: The pipeline object. @@ -92,6 +116,7 @@ components: - rawSamples - pipeline - docs + - samplesFormat properties: name: type: string @@ -116,6 +141,9 @@ components: docs: $ref: "#/components/schemas/Docs" description: The documents of the dataStream. + samplesFormat: + $ref: "#/components/schemas/SamplesFormat" + description: The format of log samples in this dataStream. Integration: type: object diff --git a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.ts b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.ts index 1c5bcf970a1b4..07d5323dc0969 100644 --- a/x-pack/plugins/integration_assistant/common/api/model/common_attributes.ts +++ b/x-pack/plugins/integration_assistant/common/api/model/common_attributes.ts @@ -45,6 +45,30 @@ export const Connector = z.string(); export type Docs = z.infer; export const Docs = z.array(z.object({}).passthrough()); +/** + * The name of the log samples format. + */ +export type SamplesFormatName = z.infer; +export const SamplesFormatName = z.enum(['ndjson', 'json']); +export type SamplesFormatNameEnum = typeof SamplesFormatName.enum; +export const SamplesFormatNameEnum = SamplesFormatName.enum; + +/** + * Format of the provided log samples. + */ +export type SamplesFormat = z.infer; +export const SamplesFormat = z.object({ + name: SamplesFormatName, + /** + * For some formats, specifies whether the samples can be multiline. + */ + multiline: z.boolean().optional(), + /** + * For a JSON format, describes how to get to the sample array from the root of the JSON. + */ + json_path: z.array(z.string()).optional(), +}); + /** * The pipeline object. */ @@ -128,6 +152,10 @@ export const DataStream = z.object({ * The documents of the dataStream. */ docs: Docs, + /** + * The format of log samples in this dataStream. + */ + samplesFormat: SamplesFormat, }); /** @@ -163,11 +191,11 @@ export const Integration = z.object({ export type LangSmithOptions = z.infer; export const LangSmithOptions = z.object({ /** - * The project name to use with tracing. + * The project name. */ projectName: z.string(), /** - * The api key for the project + * The apiKey to use for tracing. */ apiKey: z.string(), }); diff --git a/x-pack/plugins/integration_assistant/common/index.ts b/x-pack/plugins/integration_assistant/common/index.ts index c49e2825d8206..6a473d976fa88 100644 --- a/x-pack/plugins/integration_assistant/common/index.ts +++ b/x-pack/plugins/integration_assistant/common/index.ts @@ -22,6 +22,7 @@ export type { Integration, Pipeline, Docs, + SamplesFormat, } from './api/model/common_attributes'; export type { ESProcessorItem } from './api/model/processor_attributes'; diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/mocks/state.ts b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/mocks/state.ts index 3b356c39dea7f..aa310f034290c 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/mocks/state.ts +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/mocks/state.ts @@ -420,6 +420,7 @@ export const mockState: State = { dataStreamDescription: 'Mocked Data Stream Description', inputTypes: ['filestream'], logsSampleParsed: rawSamples, + samplesFormat: { name: 'ndjson', multiline: false }, }, isGenerating: false, result, diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx index a137933afed3f..4c15aa8a4785c 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.test.tsx @@ -8,7 +8,7 @@ import React from 'react'; import { act, fireEvent, render, waitFor, type RenderResult } from '@testing-library/react'; import { TestProvider } from '../../../../../mocks/test_provider'; -import { SampleLogsInput } from './sample_logs_input'; +import { parseNDJSON, parseJSONArray, SampleLogsInput } from './sample_logs_input'; import { ActionsProvider } from '../../state'; import { mockActions } from '../../mocks/state'; import { mockServices } from '../../../../../services/mocks/services'; @@ -27,6 +27,119 @@ const changeFile = async (input: HTMLElement, file: File) => { }); }; +const simpleNDJSON = `{"message":"test message 1"}\n{"message":"test message 2"}`; +const multilineNDJSON = `{"message":"test message 1"}\n\n{\n "message":\n "test message 2"\n}\n\n`; +const splitNDJSON = simpleNDJSON.split('\n'); +const complexEventsJSON = `{"events":[\n{"message":"test message 1"},\n{"message":"test message 2"}\n]}`; +const nonIdentifierLikeKeyInJSON = `{"1event":[\n{"message":"test message 1"},\n{"message":"test message 2"}\n]}`; + +describe('parseNDJSON', () => { + const content = [{ message: 'test message 1' }, { message: 'test message 2' }]; + const validNDJSONWithSpaces = `{"message":"test message 1"} + {"message":"test message 2"}`; + const singlelineArray = '[{"message":"test message 1"}, {"message":"test message 2"}]'; + const multilineArray = '[{"message":"test message 1"},\n{"message":"test message 2"}]'; + + it('should parse valid NDJSON', () => { + expect(parseNDJSON(simpleNDJSON, false)).toEqual(content); + expect(parseNDJSON(simpleNDJSON, true)).toEqual(content); + }); + + it('should parse valid NDJSON with extra spaces in single-line mode', () => { + expect(parseNDJSON(validNDJSONWithSpaces, false)).toEqual(content); + }); + + it('should not parse valid NDJSON with extra spaces in multiline mode', () => { + expect(() => parseNDJSON(validNDJSONWithSpaces, true)).toThrow(); + }); + + it('should not parse multiline NDJSON in single-line mode', () => { + expect(() => parseNDJSON(multilineNDJSON, false)).toThrow(); + }); + + it('should parse multiline NDJSON in multiline mode', () => { + expect(parseNDJSON(multilineNDJSON, true)).toEqual(content); + }); + + it('should parse single-line JSON Array', () => { + expect(parseNDJSON(singlelineArray, false)).toEqual([content]); + expect(parseNDJSON(singlelineArray, true)).toEqual([content]); + }); + + it('should not parse a multi-line JSON Array', () => { + expect(() => parseNDJSON(multilineArray, false)).toThrow(); + expect(() => parseNDJSON(multilineArray, true)).toThrow(); + }); + + it('should parse single-line JSON with one entry', () => { + const fileContent = '{"message":"test message 1"}'; + expect(parseNDJSON(fileContent)).toEqual([{ message: 'test message 1' }]); + }); + + it('should handle empty content', () => { + expect(parseNDJSON(' ', false)).toEqual([]); + expect(parseNDJSON(' ', true)).toEqual([]); + }); + + it('should handle empty lines in file content', () => { + const fileContent = '\n\n{"message":"test message 1"}\n\n{"message":"test message 2"}\n\n'; + expect(parseNDJSON(fileContent, false)).toEqual(content); + expect(parseNDJSON(fileContent, true)).toEqual(content); + }); +}); + +describe('parseJSONArray', () => { + const content = [{ message: 'test message 1' }, { message: 'test message 2' }]; + const singlelineArray = '[{"message":"test message 1"},{"message":"test message 2"}]'; + const multilineArray = '[{"message":"test message 1"},\n{"message":"test message 2"}]'; + const multilineWithSpacesArray = + ' [ \n\n{"message": "test message 1"},\n{"message" :\n\n"test message 2"}\n]\n'; + const malformedJSON = '[{"message":"test message 1"}'; + + it('should parse valid JSON array', () => { + const expected = { + entries: content, + pathToEntries: [], + errorNoArrayFound: false, + }; + expect(parseJSONArray(singlelineArray)).toEqual(expected); + expect(parseJSONArray(multilineArray)).toEqual(expected); + expect(parseJSONArray(multilineWithSpacesArray)).toEqual(expected); + }); + + it('should parse valid JSON object with array entries', () => { + const expected = { + entries: content, + pathToEntries: ['events'], + errorNoArrayFound: false, + }; + expect(parseJSONArray(complexEventsJSON)).toEqual(expected); + }); + + it('should pass even if the JSON object with array entries has not an identifier-like key', () => { + const expected = { + entries: content, + pathToEntries: ['1event'], + errorNoArrayFound: false, + }; + expect(parseJSONArray(nonIdentifierLikeKeyInJSON)).toEqual(expected); + }); + + it('should return error for JSON that does not contain an array', () => { + const fileContent = '{"records" : {"message": "test message 1"}}'; + const expected = { + entries: [], + pathToEntries: [], + errorNoArrayFound: true, + }; + expect(parseJSONArray(fileContent)).toEqual(expected); + }); + + it('should throw an error for invalid JSON object', () => { + expect(() => parseJSONArray(malformedJSON)).toThrow(); + }); +}); + describe('SampleLogsInput', () => { let result: RenderResult; let input: HTMLElement; @@ -49,6 +162,7 @@ describe('SampleLogsInput', () => { it('should set the integrationSetting correctly', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: logsSampleRaw.split(','), + samplesFormat: { name: 'json', json_path: [] }, }); }); @@ -61,6 +175,7 @@ describe('SampleLogsInput', () => { it('should truncate the logs sample', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: tooLargeLogsSample.split(',').slice(0, 10), + samplesFormat: { name: 'json', json_path: [] }, }); }); it('should add a notification toast', () => { @@ -71,6 +186,19 @@ describe('SampleLogsInput', () => { }); }); + describe('when the file is a json array under a key', () => { + beforeEach(async () => { + await changeFile(input, new File([complexEventsJSON], 'test.json', { type })); + }); + + it('should set the integrationSetting correctly', () => { + expect(mockActions.setIntegrationSettings).toBeCalledWith({ + logsSampleParsed: splitNDJSON, + samplesFormat: { name: 'json', json_path: ['events'] }, + }); + }); + }); + describe('when the file is invalid', () => { describe.each([ [ @@ -91,6 +219,7 @@ describe('SampleLogsInput', () => { it('should set the integrationSetting correctly', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: undefined, + samplesFormat: undefined, }); }); }); @@ -101,19 +230,19 @@ describe('SampleLogsInput', () => { const type = 'application/x-ndjson'; describe('when the file is valid ndjson', () => { - const logsSampleRaw = `{"message":"test message 1"}\n{"message":"test message 2"}`; beforeEach(async () => { - await changeFile(input, new File([logsSampleRaw], 'test.json', { type })); + await changeFile(input, new File([simpleNDJSON], 'test.json', { type })); }); it('should set the integrationSetting correctly', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ - logsSampleParsed: logsSampleRaw.split('\n'), + logsSampleParsed: splitNDJSON, + samplesFormat: { name: 'ndjson', multiline: false }, }); }); describe('when the file has too many rows', () => { - const tooLargeLogsSample = Array(6).fill(logsSampleRaw).join('\n'); // 12 entries + const tooLargeLogsSample = Array(6).fill(simpleNDJSON).join('\n'); // 12 entries beforeEach(async () => { await changeFile(input, new File([tooLargeLogsSample], 'test.json', { type })); }); @@ -121,6 +250,7 @@ describe('SampleLogsInput', () => { it('should truncate the logs sample', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: tooLargeLogsSample.split('\n').slice(0, 10), + samplesFormat: { name: 'ndjson', multiline: false }, }); }); it('should add a notification toast', () => { @@ -131,6 +261,32 @@ describe('SampleLogsInput', () => { }); }); + describe('when the file is a an ndjson with a single record', () => { + beforeEach(async () => { + await changeFile(input, new File([multilineNDJSON.split('\n')[0]], 'test.json', { type })); + }); + + it('should set the integrationSetting correctly', () => { + expect(mockActions.setIntegrationSettings).toBeCalledWith({ + logsSampleParsed: [splitNDJSON[0]], + samplesFormat: { name: 'ndjson', multiline: false }, + }); + }); + }); + + describe('when the file is multiline ndjson', () => { + beforeEach(async () => { + await changeFile(input, new File([multilineNDJSON], 'test.json', { type })); + }); + + it('should set the integrationSetting correctly', () => { + expect(mockActions.setIntegrationSettings).toBeCalledWith({ + logsSampleParsed: splitNDJSON, + samplesFormat: { name: 'ndjson', multiline: true }, + }); + }); + }); + describe('when the file is invalid', () => { describe.each([ [ @@ -151,6 +307,7 @@ describe('SampleLogsInput', () => { it('should set the integrationSetting correctly', () => { expect(mockActions.setIntegrationSettings).toBeCalledWith({ logsSampleParsed: undefined, + samplesFormat: undefined, }); }); }); diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx index cb4f735cc707c..fd9c2e3f8c362 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/data_stream_step/sample_logs_input.tsx @@ -12,46 +12,105 @@ import { isPlainObject } from 'lodash/fp'; import type { IntegrationSettings } from '../../types'; import * as i18n from './translations'; import { useActions } from '../../state'; +import type { SamplesFormat } from '../../../../../../common'; const MaxLogsSampleRows = 10; +/** + * Parse the logs sample file content as newiline-delimited JSON (NDJSON). + * + * This supports multiline JSON objects if passed multiline flag. + * Note that in that case the { character must happen at the beginning of the + * line if and only if it denotes the start of a new JSON object. Thus some + * inputs that will be parsed as NDJSON without the multiline flag will _not_ be + * parsed as NDJSON with the multiline flag. + */ +export const parseNDJSON = (fileContent: string, multiline: boolean = false): unknown[] => { + const separator = multiline ? /\n(?=\{)/ : '\n'; + + return fileContent + .split(separator) // For multiline, split at newline followed by '{'. + .filter((entry) => entry.trim() !== '') // Remove empty entries. + .map((entry) => JSON.parse(entry)); // Parse each entry as JSON. +}; + +/** + * Parse the logs sample file content as a JSON, find an array of entries there. + * + * If the JSON object can be parsed, but is not an array, we try to find a candidate + * among the dictionary keys (it must be identifier-like and its value must be an array). + * + * @returns Both the parsed entries and the path to the entries in the JSON object in case of + * success. Otherwise, an errorNoArrayFound if appropriate. If the parsing failed, raises an error. + */ +export const parseJSONArray = ( + fileContent: string +): { entries: unknown[]; pathToEntries: string[]; errorNoArrayFound: boolean } => { + const jsonContent = JSON.parse(fileContent); + if (Array.isArray(jsonContent)) { + return { entries: jsonContent, pathToEntries: [], errorNoArrayFound: false }; + } + if (typeof jsonContent === 'object' && jsonContent !== null) { + const arrayKeys = Object.keys(jsonContent).filter((key) => Array.isArray(jsonContent[key])); + if (arrayKeys.length === 1) { + const key = arrayKeys[0]; + return { + entries: jsonContent[key], + pathToEntries: [key], + errorNoArrayFound: false, + }; + } + } + return { errorNoArrayFound: true, entries: [], pathToEntries: [] }; +}; + /** * Parse the logs sample file content (json or ndjson) and return the parsed logs sample */ const parseLogsContent = ( fileContent: string | undefined -): { error?: string; isTruncated?: boolean; logsSampleParsed?: string[] } => { +): { + error?: string; + isTruncated?: boolean; + logsSampleParsed?: string[]; + samplesFormat?: SamplesFormat; +} => { if (fileContent == null) { return { error: i18n.LOGS_SAMPLE_ERROR.CAN_NOT_READ }; } - let parsedContent; + let parsedContent: unknown[]; + let samplesFormat: SamplesFormat; + try { - parsedContent = fileContent - .split('\n') - .filter((line) => line.trim() !== '') - .map((line) => JSON.parse(line)); + parsedContent = parseNDJSON(fileContent); // Special case for files that can be parsed as both JSON and NDJSON: - // for a one-line array [] -> extract its contents - // for a one-line object {} -> do nothing - if ( - Array.isArray(parsedContent) && - parsedContent.length === 1 && - Array.isArray(parsedContent[0]) - ) { + // for a one-line array [] -> extract its contents (it's a JSON) + // for a one-line object {} -> do nothing (keep as NDJSON) + if (parsedContent.length === 1 && Array.isArray(parsedContent[0])) { parsedContent = parsedContent[0]; + samplesFormat = { name: 'json', json_path: [] }; + } else { + samplesFormat = { name: 'ndjson', multiline: false }; } } catch (parseNDJSONError) { try { - parsedContent = JSON.parse(fileContent); + const { entries, pathToEntries, errorNoArrayFound } = parseJSONArray(fileContent); + if (errorNoArrayFound) { + return { error: i18n.LOGS_SAMPLE_ERROR.NOT_ARRAY }; + } + parsedContent = entries; + samplesFormat = { name: 'json', json_path: pathToEntries }; } catch (parseJSONError) { - return { error: i18n.LOGS_SAMPLE_ERROR.CAN_NOT_PARSE }; + try { + parsedContent = parseNDJSON(fileContent, true); + samplesFormat = { name: 'ndjson', multiline: true }; + } catch (parseMultilineNDJSONError) { + return { error: i18n.LOGS_SAMPLE_ERROR.CAN_NOT_PARSE }; + } } } - if (!Array.isArray(parsedContent)) { - return { error: i18n.LOGS_SAMPLE_ERROR.NOT_ARRAY }; - } if (parsedContent.length === 0) { return { error: i18n.LOGS_SAMPLE_ERROR.EMPTY }; } @@ -67,7 +126,7 @@ const parseLogsContent = ( } const logsSampleParsed = parsedContent.map((log) => JSON.stringify(log)); - return { isTruncated, logsSampleParsed }; + return { isTruncated, logsSampleParsed, samplesFormat }; }; interface SampleLogsInputProps { @@ -84,18 +143,27 @@ export const SampleLogsInput = React.memo(({ integrationSe const logsSampleFile = files?.[0]; if (logsSampleFile == null) { setSampleFileError(undefined); - setIntegrationSettings({ ...integrationSettings, logsSampleParsed: undefined }); + setIntegrationSettings({ + ...integrationSettings, + logsSampleParsed: undefined, + samplesFormat: undefined, + }); return; } const reader = new FileReader(); reader.onload = function (e) { const fileContent = e.target?.result as string | undefined; // We can safely cast to string since we call `readAsText` to load the file. - const { error, isTruncated, logsSampleParsed } = parseLogsContent(fileContent); + const { error, isTruncated, logsSampleParsed, samplesFormat } = + parseLogsContent(fileContent); setIsParsing(false); setSampleFileError(error); if (error) { - setIntegrationSettings({ ...integrationSettings, logsSampleParsed: undefined }); + setIntegrationSettings({ + ...integrationSettings, + logsSampleParsed: undefined, + samplesFormat: undefined, + }); return; } @@ -106,6 +174,7 @@ export const SampleLogsInput = React.memo(({ integrationSe setIntegrationSettings({ ...integrationSettings, logsSampleParsed, + samplesFormat, }); }; setIsParsing(true); diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/deploy_step.test.tsx b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/deploy_step.test.tsx index 094d4bd37ad31..d4920ba927d20 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/deploy_step.test.tsx +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/deploy_step.test.tsx @@ -34,6 +34,7 @@ const parameters: BuildIntegrationRequestBody = { rawSamples: integrationSettings.logsSampleParsed!, docs: results.docs!, pipeline: results.pipeline, + samplesFormat: integrationSettings.samplesFormat!, }, ], }, diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/use_deploy_integration.ts b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/use_deploy_integration.ts index 7e12cdad8f611..c1451a9d81a9d 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/use_deploy_integration.ts +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/steps/deploy_step/use_deploy_integration.ts @@ -46,6 +46,12 @@ export const useDeployIntegration = ({ (async () => { try { + if (integrationSettings.samplesFormat == null) { + throw new Error( + 'Logic error: samplesFormat is required and cannot be null or undefined when creating integration.' + ); + } + const parameters: BuildIntegrationRequestBody = { integration: { title: integrationSettings.title ?? '', @@ -61,6 +67,7 @@ export const useDeployIntegration = ({ rawSamples: integrationSettings.logsSampleParsed ?? [], docs: result.docs ?? [], pipeline: result.pipeline, + samplesFormat: integrationSettings.samplesFormat, }, ], }, diff --git a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/types.ts b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/types.ts index ec0ea443d37c7..c924415ec53e1 100644 --- a/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/types.ts +++ b/x-pack/plugins/integration_assistant/public/components/create_integration/create_integration_assistant/types.ts @@ -8,7 +8,7 @@ import type { OpenAiProviderType } from '@kbn/stack-connectors-plugin/public/common'; import type { ActionConnector } from '@kbn/triggers-actions-ui-plugin/public'; import type { UserConfiguredActionConnector } from '@kbn/triggers-actions-ui-plugin/public/types'; -import type { InputType } from '../../../../common'; +import type { InputType, SamplesFormat } from '../../../../common'; interface GenAiConfig { apiUrl?: string; @@ -34,4 +34,5 @@ export interface IntegrationSettings { dataStreamName?: string; inputTypes?: InputType[]; logsSampleParsed?: string[]; + samplesFormat?: SamplesFormat; } diff --git a/x-pack/plugins/integration_assistant/server/graphs/ecs/graph.ts b/x-pack/plugins/integration_assistant/server/graphs/ecs/graph.ts index 2c8e7283d4728..1d02f3c8970d8 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/ecs/graph.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/ecs/graph.ts @@ -82,7 +82,7 @@ const graphState: StateGraphArgs['channels'] = { value: (x: object, y?: object) => y ?? x, default: () => ({}), }, - logFormat: { + samplesFormat: { value: (x: string, y?: string) => y ?? x, default: () => 'json', }, diff --git a/x-pack/plugins/integration_assistant/server/graphs/ecs/pipeline.ts b/x-pack/plugins/integration_assistant/server/graphs/ecs/pipeline.ts index d925f443873e4..0dc7e772a94cf 100644 --- a/x-pack/plugins/integration_assistant/server/graphs/ecs/pipeline.ts +++ b/x-pack/plugins/integration_assistant/server/graphs/ecs/pipeline.ts @@ -173,7 +173,7 @@ export function createPipeline(state: EcsMappingState): IngestPipeline { ecs_version: state.ecsVersion, package_name: state.packageName, data_stream_name: state.dataStreamName, - log_format: state.logFormat, + log_format: state.samplesFormat, fields_to_remove: fieldsToRemove, }; const templatesPath = joinPath(__dirname, '../../templates'); diff --git a/x-pack/plugins/integration_assistant/server/integration_builder/data_stream.ts b/x-pack/plugins/integration_assistant/server/integration_builder/data_stream.ts index 3fb1fa21dc753..02b3f12f53d68 100644 --- a/x-pack/plugins/integration_assistant/server/integration_builder/data_stream.ts +++ b/x-pack/plugins/integration_assistant/server/integration_builder/data_stream.ts @@ -19,6 +19,8 @@ export function createDataStream( const pipelineDir = joinPath(specificDataStreamDir, 'elasticsearch', 'ingest_pipeline'); const title = dataStream.title; const description = dataStream.description; + const samplesFormat = dataStream.samplesFormat; + const useMultilineNDJSON = samplesFormat.name === 'ndjson' && samplesFormat.multiline === true; ensureDirSync(specificDataStreamDir); createDataStreamFolders(specificDataStreamDir, pipelineDir); @@ -31,6 +33,7 @@ export function createDataStream( data_stream_description: description, package_name: packageName, data_stream_name: dataStreamName, + multiline_ndjson: useMultilineNDJSON, }; const dataStreamManifest = nunjucks.render( `${inputType.replaceAll('-', '_')}_manifest.yml.njk`, diff --git a/x-pack/plugins/integration_assistant/server/routes/build_integration_routes.test.ts b/x-pack/plugins/integration_assistant/server/routes/build_integration_routes.test.ts index 3b73e4afb1c94..b57dd670df03f 100644 --- a/x-pack/plugins/integration_assistant/server/routes/build_integration_routes.test.ts +++ b/x-pack/plugins/integration_assistant/server/routes/build_integration_routes.test.ts @@ -40,6 +40,7 @@ describe('registerIntegrationBuilderRoutes', () => { processors: [{ script: { source: {} } }], }, docs: [], + samplesFormat: { name: 'ndjson', multiline: false }, }, ], }, diff --git a/x-pack/plugins/integration_assistant/server/templates/agent/filestream.yml.hbs b/x-pack/plugins/integration_assistant/server/templates/agent/filestream.yml.hbs index 437accfc32650..5732a1e67e41f 100644 --- a/x-pack/plugins/integration_assistant/server/templates/agent/filestream.yml.hbs +++ b/x-pack/plugins/integration_assistant/server/templates/agent/filestream.yml.hbs @@ -8,6 +8,13 @@ prospector.scanner.exclude_files: - {{pattern}} {{/each}} {{/if}} +{{#if multiline_json}} +multiline.pattern: '^{' +multiline.negate: true +multiline.match: after +multiline.max_lines: 5000 +multiline.timeout: 10 +{{/if}} {{#if custom}} {{custom}} {{/if}} \ No newline at end of file diff --git a/x-pack/plugins/integration_assistant/server/templates/manifest/filestream_manifest.yml.njk b/x-pack/plugins/integration_assistant/server/templates/manifest/filestream_manifest.yml.njk index f4e0781a76c27..28870a2e2d338 100644 --- a/x-pack/plugins/integration_assistant/server/templates/manifest/filestream_manifest.yml.njk +++ b/x-pack/plugins/integration_assistant/server/templates/manifest/filestream_manifest.yml.njk @@ -22,6 +22,16 @@ show_user: true default: - '\.gz$' + {% if multiline_ndjson %} + - name: multiline_ndjson + type: bool + title: Parse multiline JSON events + description: >- + Enables parsing of newline-delimited JSON-formatted events that take more than one line. Each event must start with the curly brace at the first column. + required: false + show_user: false + default: true + {% endif %} - name: custom type: yaml title: Additional Filestream Configuration Options diff --git a/x-pack/plugins/integration_assistant/server/types.ts b/x-pack/plugins/integration_assistant/server/types.ts index f98686145690e..3bbe25a8fbd0f 100644 --- a/x-pack/plugins/integration_assistant/server/types.ts +++ b/x-pack/plugins/integration_assistant/server/types.ts @@ -71,7 +71,7 @@ export interface EcsMappingState { missingKeys: string[]; invalidEcsFields: string[]; results: object; - logFormat: string; + samplesFormat: string; ecsVersion: string; }