Skip to content

Commit

Permalink
[ML] AIOps: Functional/API integration tests for text field support f…
Browse files Browse the repository at this point in the history
…or log rate analysis (#168177)

This updates the artificial dataset generator for log rate analysis to
allow to create variants including text fields.
The artificial dataset is now used for 4 variants of functional and API
integration tests: Testing spike and dip with both with and without a
text field.

The new tests surfaced some issues that were fixed as part of this PR:

- Getting the counts of log patterns in combination with individual
significant terms ended up with to granular groups. This PR adds
additional queries to get counts for log patterns in combination with
item sets already derived from significant terms.
- The `support` value is returned by the frequent item sets agg and is
used as a threshold whether to include an item set for grouping. This
was missing from significant log patterns and is fixed by this PR.
- Adds a check to not get frequent item sets for log patterns if there
are no significant terms.
- The way we fetched log patterns using a time filter that spans the
whole of the baseline start to the deviation end caused problems with
analysing dips. This PR updates those queries to only fetch the actual
baseline and deviation time range.
- The integration tests caught an issue where we'd still fetch the
histogram for log patterns even if we'd request grouping information
only.
  • Loading branch information
walterra authored Oct 10, 2023
1 parent 0c2a496 commit 9259f48
Show file tree
Hide file tree
Showing 25 changed files with 603 additions and 142 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
* 2.0.
*/

import type { ItemsetResult } from '../../types';
import type { ItemSet } from '../../types';

export const filteredFrequentItemSets: ItemsetResult[] = [
export const filteredFrequentItemSets: ItemSet[] = [
{
set: { response_code: '500', url: 'home.php' },
size: 2,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { SignificantTermGroup } from '@kbn/ml-agg-utils';

export const finalSignificantTermGroupsTextfield: SignificantTermGroup[] = [
{
docCount: 636,
group: [
{
docCount: 792,
duplicate: 2,
fieldName: 'url',
fieldValue: 'home.php',
key: 'url:home.php',
pValue: 0.00974308761016614,
type: 'keyword',
},
{
docCount: 636,
duplicate: 2,
fieldName: 'user',
fieldValue: 'Peter',
key: 'user:Peter',
pValue: 0.00974308761016614,
type: 'keyword',
},
],
id: '2091742187',
pValue: 0.00974308761016614,
},
{
docCount: 634,
group: [
{
docCount: 1266,
duplicate: 2,
fieldName: 'response_code',
fieldValue: '500',
key: 'response_code:500',
pValue: 0.012783309213417932,
type: 'keyword',
},
{
docCount: 792,
duplicate: 2,
fieldName: 'url',
fieldValue: 'home.php',
key: 'url:home.php',
pValue: 0.00974308761016614,
type: 'keyword',
},
{
docCount: 634,
duplicate: 2,
fieldName: 'message',
fieldValue: 'an unexpected error occured',
key: 'an unexpected error occured',
pValue: 0.00974308761016614,
type: 'log_pattern',
},
],
id: '1528268618',
pValue: 0.00974308761016614,
},
{
docCount: 632,
group: [
{
docCount: 1266,
duplicate: 2,
fieldName: 'response_code',
fieldValue: '500',
key: 'response_code:500',
pValue: 0.012783309213417932,
type: 'keyword',
},
{
docCount: 790,
duplicate: 2,
fieldName: 'url',
fieldValue: 'login.php',
key: 'url:login.php',
pValue: 0.012783309213417932,
type: 'keyword',
},
{
docCount: 632,
duplicate: 2,
fieldName: 'message',
fieldValue: 'an unexpected error occured',
key: 'an unexpected error occured',
pValue: 0.012783309213417932,
type: 'log_pattern',
},
],
id: '2619569380',
pValue: 0.012783309213417932,
},
{
docCount: 632,
group: [
{
docCount: 790,
duplicate: 2,
fieldName: 'url',
fieldValue: 'login.php',
key: 'url:login.php',
pValue: 0.012783309213417932,
type: 'keyword',
},
{
docCount: 632,
duplicate: 2,
fieldName: 'user',
fieldValue: 'Peter',
key: 'user:Peter',
pValue: 0.012783309213417932,
type: 'keyword',
},
],
id: '1937394803',
pValue: 0.012783309213417932,
},
];
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
* 2.0.
*/

import type { ItemsetResult } from '../../types';
import type { ItemSet } from '../../types';

export const frequentItemSets: ItemsetResult[] = [
export const frequentItemSets: ItemSet[] = [
{
set: { response_code: '500', url: 'home.php' },
size: 2,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { SignificantTerm } from '@kbn/ml-agg-utils';

export const significantLogPatterns: SignificantTerm[] = [
{
bg_count: 0,
doc_count: 1266,
fieldName: 'message',
fieldValue: 'an unexpected error occured',
key: 'an unexpected error occured',
normalizedScore: 0,
pValue: 0.000001,
score: -13.815510557964274,
total_bg_count: 1975,
total_doc_count: 4669,
type: 'log_pattern',
},
];
14 changes: 10 additions & 4 deletions x-pack/plugins/aiops/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,16 @@
* 2.0.
*/

/**
* The p-value threshold to be used for statistically significant items.
*/
export const LOG_RATE_ANALYSIS_P_VALUE_THRESHOLD = 0.02;
export const LOG_RATE_ANALYSIS_SETTINGS = {
/**
* The p-value threshold to be used for statistically significant items.
*/
P_VALUE_THRESHOLD: 0.02,
/**
* The minimum support value to be used for the frequent item sets aggration.
*/
FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT: 0.001,
} as const;

/**
* For the technical preview of Log Rate Analysis we use a hard coded seed.
Expand Down
8 changes: 7 additions & 1 deletion x-pack/plugins/aiops/common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export interface SignificantTermDuplicateGroup {

export type FieldValuePairCounts = Record<string, Record<string, number>>;

export interface ItemsetResult {
export interface ItemSet {
set: Record<FieldValuePair['fieldName'], FieldValuePair['fieldValue']>;
size: number;
maxPValue: number;
Expand All @@ -23,6 +23,12 @@ export interface ItemsetResult {
total_doc_count: number;
}

export interface FetchFrequentItemSetsResponse {
fields: string[];
itemSets: ItemSet[];
totalDocCount: number;
}

interface SimpleHierarchicalTreeNodeSet extends FieldValuePair {
key: string;
type: SignificantTermType;
Expand Down
45 changes: 26 additions & 19 deletions x-pack/plugins/aiops/server/routes/log_rate_analysis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ export const defineLogRateAnalysisRoute = (
);

try {
const { fields, df } = await fetchFrequentItemSets(
const { fields, itemSets } = await fetchFrequentItemSets(
client,
request.body.index,
JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
Expand All @@ -520,23 +520,26 @@ export const defineLogRateAnalysisRoute = (
abortSignal
);

if (significantCategories.length > 0) {
const { fields: significantCategoriesFields, df: significantCategoriesDf } =
await fetchTerms2CategoriesCounts(
client,
request.body,
JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
significantTerms,
significantCategories,
request.body.deviationMin,
request.body.deviationMax,
logger,
pushError,
abortSignal
);
if (significantCategories.length > 0 && significantTerms.length > 0) {
const {
fields: significantCategoriesFields,
itemSets: significantCategoriesItemSets,
} = await fetchTerms2CategoriesCounts(
client,
request.body,
JSON.parse(request.body.searchQuery) as estypes.QueryDslQueryContainer,
significantTerms,
itemSets,
significantCategories,
request.body.deviationMin,
request.body.deviationMax,
logger,
pushError,
abortSignal
);

fields.push(...significantCategoriesFields);
df.push(...significantCategoriesDf);
itemSets.push(...significantCategoriesItemSets);
}

if (shouldStop) {
Expand All @@ -545,9 +548,9 @@ export const defineLogRateAnalysisRoute = (
return;
}

if (fields.length > 0 && df.length > 0) {
if (fields.length > 0 && itemSets.length > 0) {
const significantTermGroups = getSignificantTermGroups(
df,
itemSets,
[...significantTerms, ...significantCategories],
fields
);
Expand Down Expand Up @@ -757,7 +760,11 @@ export const defineLogRateAnalysisRoute = (
}

// histograms for text field patterns
if (overallTimeSeries !== undefined && significantCategories.length > 0) {
if (
overallTimeSeries !== undefined &&
significantCategories.length > 0 &&
!request.body.overrides?.regroupOnly
) {
const significantCategoriesHistogramQueries = significantCategories.map((d) => {
const histogramQuery = getHistogramQuery(request.body);
const categoryQuery = getCategoryQuery(d.fieldName, [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@ export const getCategoryRequest = (
fieldName: string,
from: number | undefined,
to: number | undefined,
filter: estypes.QueryDslQueryContainer,
{ wrap }: RandomSamplerWrapper
): estypes.SearchRequest => {
const { index, timeFieldName } = params;
const query = getQueryWithParams({
params,
termFilters: undefined,
filter,
});
const { params: request } = createCategoryRequest(
index,
Expand All @@ -63,6 +66,7 @@ export const fetchCategories = async (
fieldNames: string[],
from: number | undefined,
to: number | undefined,
filter: estypes.QueryDslQueryContainer,
logger: Logger,
// The default value of 1 means no sampling will be used
sampleProbability: number = 1,
Expand All @@ -78,7 +82,7 @@ export const fetchCategories = async (

const settledPromises = await Promise.allSettled(
fieldNames.map((fieldName) => {
const request = getCategoryRequest(params, fieldName, from, to, randomSamplerWrapper);
const request = getCategoryRequest(params, fieldName, from, to, filter, randomSamplerWrapper);
return esClient.search(request, {
signal: abortSignal,
maxRetries: 0,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,12 @@ import type { Logger } from '@kbn/logging';
import { type SignificantTerm } from '@kbn/ml-agg-utils';
import { createRandomSamplerWrapper } from '@kbn/ml-random-sampler-utils';

import { RANDOM_SAMPLER_SEED } from '../../../common/constants';
import type { SignificantTermDuplicateGroup, ItemsetResult } from '../../../common/types';
import { RANDOM_SAMPLER_SEED, LOG_RATE_ANALYSIS_SETTINGS } from '../../../common/constants';
import type {
SignificantTermDuplicateGroup,
ItemSet,
FetchFrequentItemSetsResponse,
} from '../../../common/types';

interface FrequentItemSetsAggregation extends estypes.AggregationsSamplerAggregation {
fi: {
Expand Down Expand Up @@ -74,7 +78,7 @@ export async function fetchFrequentItemSets(
sampleProbability: number = 1,
emitError: (m: string) => void,
abortSignal?: AbortSignal
) {
): Promise<FetchFrequentItemSetsResponse> {
// Sort significant terms by ascending p-value, necessary to apply the field limit correctly.
const sortedSignificantTerms = significantTerms.slice().sort((a, b) => {
return (a.pValue ?? 0) - (b.pValue ?? 0);
Expand Down Expand Up @@ -103,7 +107,7 @@ export async function fetchFrequentItemSets(
frequent_item_sets: {
minimum_set_size: 2,
size: 200,
minimum_support: 0.001,
minimum_support: LOG_RATE_ANALYSIS_SETTINGS.FREQUENT_ITEMS_SETS_MINIMUM_SUPPORT,
fields: getFrequentItemSetsAggFields(sortedSignificantTerms),
},
},
Expand Down Expand Up @@ -138,7 +142,7 @@ export async function fetchFrequentItemSets(
emitError(`Failed to fetch frequent_item_sets.`);
return {
fields: [],
df: [],
itemSets: [],
totalDocCount: 0,
};
}
Expand All @@ -158,10 +162,10 @@ export async function fetchFrequentItemSets(
const fiss = frequentItemSets.fi.buckets;
fiss.length = maximum;

const results: ItemsetResult[] = [];
const results: ItemSet[] = [];

fiss.forEach((fis) => {
const result: ItemsetResult = {
const result: ItemSet = {
set: {},
size: 0,
maxPValue: 0,
Expand Down Expand Up @@ -203,7 +207,7 @@ export async function fetchFrequentItemSets(

return {
fields: uniqueFields,
df: results,
itemSets: results,
totalDocCount: totalDocCountFi,
};
}
Loading

0 comments on commit 9259f48

Please sign in to comment.