Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] AIOps: Functional/API integration tests for text field support for log rate analysis #168177

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
* 2.0.
*/

import type { ItemsetResult } from '../../types';
import type { ItemSet } from '../../types';

export const filteredFrequentItemSets: ItemsetResult[] = [
export const filteredFrequentItemSets: ItemSet[] = [
{
set: { response_code: '500', url: 'home.php' },
size: 2,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { SignificantTermGroup } from '@kbn/ml-agg-utils';

export const finalSignificantTermGroupsTextfield: SignificantTermGroup[] = [
{
docCount: 636,
group: [
{
docCount: 792,
duplicate: 2,
fieldName: 'url',
fieldValue: 'home.php',
key: 'url:home.php',
pValue: 0.00974308761016614,
type: 'keyword',
},
{
docCount: 636,
duplicate: 2,
fieldName: 'user',
fieldValue: 'Peter',
key: 'user:Peter',
pValue: 0.00974308761016614,
type: 'keyword',
},
],
id: '2091742187',
pValue: 0.00974308761016614,
},
{
docCount: 634,
group: [
{
docCount: 1266,
duplicate: 2,
fieldName: 'response_code',
fieldValue: '500',
key: 'response_code:500',
pValue: 0.012783309213417932,
type: 'keyword',
},
{
docCount: 792,
duplicate: 2,
fieldName: 'url',
fieldValue: 'home.php',
key: 'url:home.php',
pValue: 0.00974308761016614,
type: 'keyword',
},
{
docCount: 634,
duplicate: 2,
fieldName: 'message',
fieldValue: 'an unexpected error occured',
key: 'an unexpected error occured',
pValue: 0.00974308761016614,
type: 'log_pattern',
},
],
id: '1528268618',
pValue: 0.00974308761016614,
},
{
docCount: 632,
group: [
{
docCount: 1266,
duplicate: 2,
fieldName: 'response_code',
fieldValue: '500',
key: 'response_code:500',
pValue: 0.012783309213417932,
type: 'keyword',
},
{
docCount: 790,
duplicate: 2,
fieldName: 'url',
fieldValue: 'login.php',
key: 'url:login.php',
pValue: 0.012783309213417932,
type: 'keyword',
},
{
docCount: 632,
duplicate: 2,
fieldName: 'message',
fieldValue: 'an unexpected error occured',
key: 'an unexpected error occured',
pValue: 0.012783309213417932,
type: 'log_pattern',
},
],
id: '2619569380',
pValue: 0.012783309213417932,
},
{
docCount: 632,
group: [
{
docCount: 790,
duplicate: 2,
fieldName: 'url',
fieldValue: 'login.php',
key: 'url:login.php',
pValue: 0.012783309213417932,
type: 'keyword',
},
{
docCount: 632,
duplicate: 2,
fieldName: 'user',
fieldValue: 'Peter',
key: 'user:Peter',
pValue: 0.012783309213417932,
type: 'keyword',
},
],
id: '1937394803',
pValue: 0.012783309213417932,
},
];
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
* 2.0.
*/

import type { ItemsetResult } from '../../types';
import type { ItemSet } from '../../types';

export const frequentItemSets: ItemsetResult[] = [
export const frequentItemSets: ItemSet[] = [
{
set: { response_code: '500', url: 'home.php' },
size: 2,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { SignificantTerm } from '@kbn/ml-agg-utils';

export const significantLogPatterns: SignificantTerm[] = [
{
bg_count: 0,
doc_count: 1266,
fieldName: 'message',
fieldValue: 'an unexpected error occured',
key: 'an unexpected error occured',
normalizedScore: 0,
pValue: 0.000001,
score: -13.815510557964274,
total_bg_count: 1975,
total_doc_count: 4669,
type: 'log_pattern',
},
];
14 changes: 10 additions & 4 deletions x-pack/plugins/aiops/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@
* 2.0.
*/

/**
* The p-value threshold to be used for statistically significant items.
*/
export const LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD = 0.02;
export const LOG_RATE_ANALYSIS_SETTINGS = {
/**
* The p-value threshold to be used for statistically significant items.
*/
P_VALUE_THRESHOLD: 0.02,
/**
* The minimum support value to be used for the frequent item sets aggration.
*/
FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT: 0.001,
} as const;

/**
* For the technical preview of Log Rate Analysis we use a hard coded seed.
Expand Down
8 changes: 7 additions & 1 deletion x-pack/plugins/aiops/common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export interface SignificantTermDuplicateGroup {

export type FieldValuePairCounts = Record<string, Record<string, number>>;

export interface ItemsetResult {
export interface ItemSet {
set: Record<FieldValuePair['fieldName'], FieldValuePair['fieldValue']>;
size: number;
maxPValue: number;
Expand All @@ -23,6 +23,12 @@ export interface ItemsetResult {
total_doc_count: number;
}

export interface FetchFrequentItemSetsResponse {
fields: string[];
itemSets: ItemSet[];
totalDocCount: number;
}

interface SimpleHierarchicalTreeNodeSet extends FieldValuePair {
key: string;
type: SignificantTermType;
Expand Down
45 changes: 26 additions & 19 deletions x-pack/plugins/aiops/server/routes/log_rate_analysis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ export const defineLogRateAnalysisRoute = (
);

try {
const { fields, df } = await fetchFrequentItemSets(
const { fields, itemSets } = await fetchFrequentItemSets(
client,
request.body.index,
JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
Expand All @@ -520,23 +520,26 @@ export const defineLogRateAnalysisRoute = (
abortSignal
);

if (significantCategories.length > 0) {
const { fields: significantCategoriesFields, df: significantCategoriesDf } =
await fetchTerms2CategoriesCounts(
client,
request.body,
JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
significantTerms,
significantCategories,
request.body.deviationMin,
request.body.deviationMax,
logger,
pushError,
abortSignal
);
if (significantCategories.length > 0 && significantTerms.length > 0) {
const {
fields: significantCategoriesFields,
itemSets: significantCategoriesItemSets,
} = await fetchTerms2CategoriesCounts(
client,
request.body,
JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
significantTerms,
itemSets,
significantCategories,
request.body.deviationMin,
request.body.deviationMax,
logger,
pushError,
abortSignal
);

fields.push(...significantCategoriesFields);
df.push(...significantCategoriesDf);
itemSets.push(...significantCategoriesItemSets);
}

if (shouldStop) {
Expand All @@ -545,9 +548,9 @@ export const defineLogRateAnalysisRoute = (
return;
}

if (fields.length > 0 && df.length > 0) {
if (fields.length > 0 && itemSets.length > 0) {
const significantTermGroups = getSignificantTermGroups(
df,
itemSets,
[...significantTerms, ...significantCategories],
fields
);
Expand Down Expand Up @@ -757,7 +760,11 @@ export const defineLogRateAnalysisRoute = (
}

// histograms for text field patterns
if (overallTimeSeries !== undefined && significantCategories.length > 0) {
if (
overallTimeSeries !== undefined &&
significantCategories.length > 0 &&
!request.body.overrides?.regroupOnly
) {
const significantCategoriesHistogramQueries = significantCategories.map((d) => {
const histogramQuery = getHistogramQuery(request.body);
const categoryQuery = getCategoryQuery(d.fieldName, [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@ export const getCategoryRequest = (
fieldName: string,
from: number | undefined,
to: number | undefined,
filter: estypes.QueryDslQueryContainer,
{ wrap }: RandomSamplerWrapper
): estypes.SearchRequest => {
const { index, timeFieldName } = params;
const query = getQueryWithParams({
params,
termFilters: undefined,
filter,
});
const { params: request } = createCategoryRequest(
index,
Expand All @@ -63,6 +66,7 @@ export const fetchCategories = async (
fieldNames: string[],
from: number | undefined,
to: number | undefined,
filter: estypes.QueryDslQueryContainer,
logger: Logger,
// The default value of 1 means no sampling will be used
sampleProbability: number = 1,
Expand All @@ -78,7 +82,7 @@ export const fetchCategories = async (

const settledPromises = await Promise.allSettled(
fieldNames.map((fieldName) => {
const request = getCategoryRequest(params, fieldName, from, to, randomSamplerWrapper);
const request = getCategoryRequest(params, fieldName, from, to, filter, randomSamplerWrapper);
return esClient.search(request, {
signal: abortSignal,
maxRetries: 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ import type { Logger } from '@kbn/logging';
import { type SignificantTerm } from '@kbn/ml-agg-utils';
import { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils';

import { RANDOM_SAMPLER_SEED } from '../../../common/constants';
import type { SignificantTermDuplicateGroup, ItemsetResult } from '../../../common/types';
import { RANDOM_SAMPLER_SEED, LOG_RATE_ANALYSIS_SETTINGS } from '../../../common/constants';
import type {
SignificantTermDuplicateGroup,
ItemSet,
FetchFrequentItemSetsResponse,
} from '../../../common/types';

interface FrequentItemSetsAggregation extends estypes.AggregationsSamplerAggregation {
fi: {
Expand Down Expand Up @@ -74,7 +78,7 @@ export async function fetchFrequentItemSets(
sampleProbability: number = 1,
emitError: (m: string) => void,
abortSignal?: AbortSignal
) {
): Promise<FetchFrequentItemSetsResponse> {
// Sort significant terms by ascending p-value, necessary to apply the field limit correctly.
const sortedSignificantTerms = significantTerms.slice().sort((a, b) => {
return (a.pValue ?? 0) - (b.pValue ?? 0);
Expand Down Expand Up @@ -103,7 +107,7 @@ export async function fetchFrequentItemSets(
frequent_item_sets: {
minimum_set_size: 2,
size: 200,
minimum_support: 0.001,
minimum_support: LOG_RATE_ANALYSIS_SETTINGS.FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT,
fields: getFrequentItemSetsAggFields(sortedSignificantTerms),
},
},
Expand Down Expand Up @@ -138,7 +142,7 @@ export async function fetchFrequentItemSets(
emitError(`Failed to fetch frequent_item_sets.`);
return {
fields: [],
df: [],
itemSets: [],
totalDocCount: 0,
};
}
Expand All @@ -158,10 +162,10 @@ export async function fetchFrequentItemSets(
const fiss = frequentItemSets.fi.buckets;
fiss.length = maximum;

const results: ItemsetResult[] = [];
const results: ItemSet[] = [];

fiss.forEach((fis) => {
const result: ItemsetResult = {
const result: ItemSet = {
set: {},
size: 0,
maxPValue: 0,
Expand Down Expand Up @@ -203,7 +207,7 @@ export async function fetchFrequentItemSets(

return {
fields: uniqueFields,
df: results,
itemSets: results,
totalDocCount: totalDocCountFi,
};
}
Loading
Loading