From 42666382145dbdef6dc895c203f40547e6713c16 Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Thu, 29 Oct 2020 12:20:37 +0100 Subject: [PATCH 1/6] [ML] Adds helpers to transform flattened fields back to object structure. --- .../components/data_grid/common.test.ts | 2 +- .../components/data_grid/common.ts | 45 +++++++++++++++++++ .../components/data_grid/data_grid.tsx | 17 ++++--- 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/x-pack/plugins/ml/public/application/components/data_grid/common.test.ts b/x-pack/plugins/ml/public/application/components/data_grid/common.test.ts index 4bb670ad02dfc..aaf6f90b00f4d 100644 --- a/x-pack/plugins/ml/public/application/components/data_grid/common.test.ts +++ b/x-pack/plugins/ml/public/application/components/data_grid/common.test.ts @@ -8,7 +8,7 @@ import { EuiDataGridSorting } from '@elastic/eui'; import { multiColumnSortFactory } from './common'; -describe('Transform: Define Pivot Common', () => { +describe('Data Frame Analytics: Data Grid Common', () => { test('multiColumnSortFactory()', () => { const data = [ { s: 'a', n: 1 }, diff --git a/x-pack/plugins/ml/public/application/components/data_grid/common.ts b/x-pack/plugins/ml/public/application/components/data_grid/common.ts index 642d0ae564b85..1798d38cf6a0f 100644 --- a/x-pack/plugins/ml/public/application/components/data_grid/common.ts +++ b/x-pack/plugins/ml/public/application/components/data_grid/common.ts @@ -25,6 +25,7 @@ import { } from '../../../../../../../src/plugins/data/public'; import { extractErrorMessage } from '../../../../common/util/errors'; +import { FeatureImportance, TopClasses } from '../../../../common/types/feature_importance'; import { BASIC_NUMERICAL_TYPES, @@ -158,6 +159,50 @@ export const getDataGridSchemaFromKibanaFieldType = ( return schema; }; +/** + * Helper to transform feature importance flattened fields with arrays back to object structure + * + * @param row - EUI data grid data row + * @param mlResultsField - Data frame analytics results field + * @returns nested object structure of feature importance values + */ +export const getFeatureImportance = (row: any, mlResultsField: string): FeatureImportance[] => { + const featureNames: string[] = row[`${mlResultsField}.feature_importance.feature_name`]; + const classNames: string[] = row[`${mlResultsField}.feature_importance.classes.class_name`]; + const classImportance: number[] = row[`${mlResultsField}.feature_importance.classes.importance`]; + + return featureNames.map((fName, index) => { + const offset = featureNames.length * index; + const featureClassNames = classNames.slice(offset, offset + featureNames.length); + const featureClassImportance = classImportance.slice(offset, offset + featureNames.length); + return { + feature_name: fName, + classes: featureClassNames.map((fClassName, fIndex) => { + return { class_name: fClassName, importance: featureClassImportance[fIndex] }; + }), + }; + }); +}; + +/** + * Helper to transforms top classes flattened fields with arrays back to object structure + * + * @param row - EUI data grid data row + * @param mlResultsField - Data frame analytics results field + * @returns nested object structure of feature importance values + */ +export const getTopClasses = (row: any, mlResultsField: string): TopClasses => { + const classNames: string[] = row[`${mlResultsField}.top_classes.class_name`]; + const classProbabilities: number[] = row[`${mlResultsField}.top_classes.class_probability`]; + const classScores: number[] = row[`${mlResultsField}.top_classes.class_score`]; + + return classNames.map((className, index) => ({ + class_name: className, + class_probability: classProbabilities[index], + class_score: classScores[index], + })); +}; + export const useRenderCellValue = ( indexPattern: IndexPattern | undefined, pagination: IndexPagination, diff --git a/x-pack/plugins/ml/public/application/components/data_grid/data_grid.tsx b/x-pack/plugins/ml/public/application/components/data_grid/data_grid.tsx index fad2439f5d5ee..cd909f186c469 100644 --- a/x-pack/plugins/ml/public/application/components/data_grid/data_grid.tsx +++ b/x-pack/plugins/ml/public/application/components/data_grid/data_grid.tsx @@ -27,10 +27,15 @@ import { DEFAULT_SAMPLER_SHARD_SIZE } from '../../../../common/constants/field_h import { ANALYSIS_CONFIG_TYPE, INDEX_STATUS } from '../../data_frame_analytics/common'; -import { euiDataGridStyle, euiDataGridToolbarSettings } from './common'; +import { + euiDataGridStyle, + euiDataGridToolbarSettings, + getFeatureImportance, + getTopClasses, +} from './common'; import { UseIndexDataReturnType } from './types'; import { DecisionPathPopover } from './feature_importance/decision_path_popover'; -import { TopClasses } from '../../../../common/types/feature_importance'; +import { FeatureImportance, TopClasses } from '../../../../common/types/feature_importance'; import { DEFAULT_RESULTS_FIELD } from '../../../../common/constants/data_frame_analytics'; import { DataFrameAnalysisConfigType } from '../../../../common/types/data_frame_analytics'; @@ -118,16 +123,16 @@ export const DataGrid: FC = memo( if (!row) return
; // if resultsField for some reason is not available then use ml const mlResultsField = resultsField ?? DEFAULT_RESULTS_FIELD; - const parsedFIArray = row[mlResultsField].feature_importance; + const parsedFIArray: FeatureImportance[] = getFeatureImportance(row, mlResultsField); let predictedValue: string | number | undefined; let topClasses: TopClasses = []; if ( predictionFieldName !== undefined && row && - row[mlResultsField][predictionFieldName] !== undefined + row[`${mlResultsField}.${predictionFieldName}`] !== undefined ) { - predictedValue = row[mlResultsField][predictionFieldName]; - topClasses = row[mlResultsField].top_classes; + predictedValue = row[`${mlResultsField}.${predictionFieldName}`]; + topClasses = getTopClasses(row, mlResultsField); } return ( From 31239b67970a54274126a0f577828edb860fb519 Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Thu, 29 Oct 2020 15:00:48 +0100 Subject: [PATCH 2/6] [ML] Fix cell value. --- .../public/application/components/data_grid/common.ts | 10 ++++++++++ .../application/data_frame_analytics/common/fields.ts | 8 ++++++++ 2 files changed, 18 insertions(+) diff --git a/x-pack/plugins/ml/public/application/components/data_grid/common.ts b/x-pack/plugins/ml/public/application/components/data_grid/common.ts index 1798d38cf6a0f..5e086698f8732 100644 --- a/x-pack/plugins/ml/public/application/components/data_grid/common.ts +++ b/x-pack/plugins/ml/public/application/components/data_grid/common.ts @@ -24,6 +24,7 @@ import { KBN_FIELD_TYPES, } from '../../../../../../../src/plugins/data/public'; +import { DEFAULT_RESULTS_FIELD } from '../../../../common/constants/data_frame_analytics'; import { extractErrorMessage } from '../../../../common/util/errors'; import { FeatureImportance, TopClasses } from '../../../../common/types/feature_importance'; @@ -252,6 +253,15 @@ export const useRenderCellValue = ( return item[cId]; } + // For classification and regression results, we need to treat some fields with a custom transform. + if (cId === `${resultsField}.feature_importance`) { + return getFeatureImportance(fullItem, resultsField ?? DEFAULT_RESULTS_FIELD); + } + + if (cId === `${resultsField}.top_classes`) { + return getTopClasses(fullItem, resultsField ?? DEFAULT_RESULTS_FIELD); + } + // Try if the field name is available as a nested field. return getNestedProperty(tableItems[adjustedRowIndex], cId, null); } diff --git a/x-pack/plugins/ml/public/application/data_frame_analytics/common/fields.ts b/x-pack/plugins/ml/public/application/data_frame_analytics/common/fields.ts index c606cbd1cc11a..785f3ac9cd4dc 100644 --- a/x-pack/plugins/ml/public/application/data_frame_analytics/common/fields.ts +++ b/x-pack/plugins/ml/public/application/data_frame_analytics/common/fields.ts @@ -213,6 +213,10 @@ export const getDefaultFieldsFromJobCaps = ( name: `${resultsField}.${FEATURE_IMPORTANCE}`, type: KBN_FIELD_TYPES.UNKNOWN, }); + // remove flattened feature importance fields + fields = fields.filter( + (field: any) => !field.name.includes(`${resultsField}.${FEATURE_IMPORTANCE}.`) + ); } if ((numTopClasses ?? 0) > 0) { @@ -221,6 +225,10 @@ export const getDefaultFieldsFromJobCaps = ( name: `${resultsField}.${TOP_CLASSES}`, type: KBN_FIELD_TYPES.UNKNOWN, }); + // remove flattened top classes fields + fields = fields.filter( + (field: any) => !field.name.includes(`${resultsField}.${TOP_CLASSES}.`) + ); } // Only need to add these fields if we didn't use dest index pattern to get the fields From edc8e2f256828feb37d9d6a3e7b22e1b283ee734 Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Thu, 29 Oct 2020 15:46:50 +0100 Subject: [PATCH 3/6] [ML] Fix helper functions. --- .../ml/common/types/feature_importance.ts | 12 +++- .../components/data_grid/common.ts | 65 +++++++++++++------ 2 files changed, 54 insertions(+), 23 deletions(-) diff --git a/x-pack/plugins/ml/common/types/feature_importance.ts b/x-pack/plugins/ml/common/types/feature_importance.ts index 4f5619cf3ab7b..046396d633434 100644 --- a/x-pack/plugins/ml/common/types/feature_importance.ts +++ b/x-pack/plugins/ml/common/types/feature_importance.ts @@ -8,11 +8,17 @@ export interface ClassFeatureImportance { class_name: string | boolean; importance: number; } -export interface FeatureImportance { + +interface FeatureImportanceClassification { feature_name: string; - importance?: number; - classes?: ClassFeatureImportance[]; + classes: ClassFeatureImportance[]; +} + +interface FeatureImportanceRegression { + feature_name: string; + importance: number; } +export type FeatureImportance = FeatureImportanceClassification | FeatureImportanceRegression; export interface TopClass { class_name: string; diff --git a/x-pack/plugins/ml/public/application/components/data_grid/common.ts b/x-pack/plugins/ml/public/application/components/data_grid/common.ts index 5e086698f8732..c6dbe3f266608 100644 --- a/x-pack/plugins/ml/public/application/components/data_grid/common.ts +++ b/x-pack/plugins/ml/public/application/components/data_grid/common.ts @@ -167,22 +167,42 @@ export const getDataGridSchemaFromKibanaFieldType = ( * @param mlResultsField - Data frame analytics results field * @returns nested object structure of feature importance values */ -export const getFeatureImportance = (row: any, mlResultsField: string): FeatureImportance[] => { - const featureNames: string[] = row[`${mlResultsField}.feature_importance.feature_name`]; - const classNames: string[] = row[`${mlResultsField}.feature_importance.classes.class_name`]; - const classImportance: number[] = row[`${mlResultsField}.feature_importance.classes.importance`]; - - return featureNames.map((fName, index) => { - const offset = featureNames.length * index; - const featureClassNames = classNames.slice(offset, offset + featureNames.length); - const featureClassImportance = classImportance.slice(offset, offset + featureNames.length); - return { - feature_name: fName, - classes: featureClassNames.map((fClassName, fIndex) => { - return { class_name: fClassName, importance: featureClassImportance[fIndex] }; - }), - }; - }); +export const getFeatureImportance = ( + row: Record, + mlResultsField: string +): FeatureImportance[] => { + const featureNames: string[] | undefined = + row[`${mlResultsField}.feature_importance.feature_name`]; + const classNames: string[] | undefined = + row[`${mlResultsField}.feature_importance.classes.class_name`]; + const classImportance: number[] | undefined = + row[`${mlResultsField}.feature_importance.classes.importance`]; + + if (featureNames === undefined) { + return []; + } + + // return object structure for classification job + if (classNames !== undefined && classImportance !== undefined) { + return featureNames.map((fName, index) => { + const offset = featureNames.length * index; + const featureClassNames = classNames.slice(offset, offset + featureNames.length); + const featureClassImportance = classImportance.slice(offset, offset + featureNames.length); + return { + feature_name: fName, + classes: featureClassNames.map((fClassName, fIndex) => { + return { class_name: fClassName, importance: featureClassImportance[fIndex] }; + }), + }; + }); + } + + // return object structure for regression job + const importance: number[] = row[`${mlResultsField}.feature_importance.importance`]; + return featureNames.map((fName, index) => ({ + feature_name: fName, + importance: importance[index], + })); }; /** @@ -192,10 +212,15 @@ export const getFeatureImportance = (row: any, mlResultsField: string): FeatureI * @param mlResultsField - Data frame analytics results field * @returns nested object structure of feature importance values */ -export const getTopClasses = (row: any, mlResultsField: string): TopClasses => { - const classNames: string[] = row[`${mlResultsField}.top_classes.class_name`]; - const classProbabilities: number[] = row[`${mlResultsField}.top_classes.class_probability`]; - const classScores: number[] = row[`${mlResultsField}.top_classes.class_score`]; +export const getTopClasses = (row: Record, mlResultsField: string): TopClasses => { + const classNames: string[] | undefined = row[`${mlResultsField}.top_classes.class_name`]; + const classProbabilities: number[] | undefined = + row[`${mlResultsField}.top_classes.class_probability`]; + const classScores: number[] | undefined = row[`${mlResultsField}.top_classes.class_score`]; + + if (classNames === undefined || classProbabilities === undefined || classScores === undefined) { + return []; + } return classNames.map((className, index) => ({ class_name: className, From 96ccaef1be6614f3dce368ea228c63282fdf6327 Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Thu, 29 Oct 2020 17:50:41 +0100 Subject: [PATCH 4/6] [ML] Fix classification feature importance object. --- .../application/components/data_grid/common.ts | 12 ++++++++---- .../exploration_results_table.tsx | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/x-pack/plugins/ml/public/application/components/data_grid/common.ts b/x-pack/plugins/ml/public/application/components/data_grid/common.ts index c6dbe3f266608..cb5008d628b51 100644 --- a/x-pack/plugins/ml/public/application/components/data_grid/common.ts +++ b/x-pack/plugins/ml/public/application/components/data_grid/common.ts @@ -184,13 +184,17 @@ export const getFeatureImportance = ( // return object structure for classification job if (classNames !== undefined && classImportance !== undefined) { + const overallClassNames = classNames?.slice(0, classNames.length / featureNames.length); + return featureNames.map((fName, index) => { - const offset = featureNames.length * index; - const featureClassNames = classNames.slice(offset, offset + featureNames.length); - const featureClassImportance = classImportance.slice(offset, offset + featureNames.length); + const offset = overallClassNames.length * index; + const featureClassImportance = classImportance.slice( + offset, + offset + overallClassNames.length + ); return { feature_name: fName, - classes: featureClassNames.map((fClassName, fIndex) => { + classes: overallClassNames.map((fClassName, fIndex) => { return { class_name: fClassName, importance: featureClassImportance[fIndex] }; }), }; diff --git a/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_exploration/components/exploration_results_table/exploration_results_table.tsx b/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_exploration/components/exploration_results_table/exploration_results_table.tsx index a6e95269b3633..10e2ad5b5eb53 100644 --- a/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_exploration/components/exploration_results_table/exploration_results_table.tsx +++ b/x-pack/plugins/ml/public/application/data_frame_analytics/pages/analytics_exploration/components/exploration_results_table/exploration_results_table.tsx @@ -29,7 +29,7 @@ interface Props { } export const ExplorationResultsTable: FC = React.memo( - ({ indexPattern, jobConfig, jobStatus, needsDestIndexPattern, searchQuery }) => { + ({ indexPattern, jobConfig, needsDestIndexPattern, searchQuery }) => { const { services: { mlServices: { mlApiServices }, From 358bb67e72a4fb0f49034bdb540185f81ac8415f Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Thu, 29 Oct 2020 23:28:31 +0100 Subject: [PATCH 5/6] [ML] Fix types. --- .../plugins/ml/common/types/feature_importance.ts | 13 +++++-------- .../data_frame_analytics/common/get_index_data.ts | 2 +- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/x-pack/plugins/ml/common/types/feature_importance.ts b/x-pack/plugins/ml/common/types/feature_importance.ts index 046396d633434..1ae4c7832390c 100644 --- a/x-pack/plugins/ml/common/types/feature_importance.ts +++ b/x-pack/plugins/ml/common/types/feature_importance.ts @@ -9,17 +9,14 @@ export interface ClassFeatureImportance { importance: number; } -interface FeatureImportanceClassification { +// TODO We should separate the interface because classes/importance +// isn't both optional but either/or. +export interface FeatureImportance { feature_name: string; - classes: ClassFeatureImportance[]; + classes?: ClassFeatureImportance[]; + importance?: number; } -interface FeatureImportanceRegression { - feature_name: string; - importance: number; -} -export type FeatureImportance = FeatureImportanceClassification | FeatureImportanceRegression; - export interface TopClass { class_name: string; class_probability: number; diff --git a/x-pack/plugins/ml/public/application/data_frame_analytics/common/get_index_data.ts b/x-pack/plugins/ml/public/application/data_frame_analytics/common/get_index_data.ts index 8e50aab0914db..85f222109d408 100644 --- a/x-pack/plugins/ml/public/application/data_frame_analytics/common/get_index_data.ts +++ b/x-pack/plugins/ml/public/application/data_frame_analytics/common/get_index_data.ts @@ -53,7 +53,7 @@ export const getIndexData = async ( index: jobConfig.dest.index, body: { fields: ['*'], - _source: [], + _source: false, query: searchQuery, from: pageIndex * pageSize, size: pageSize, From 653c84654b41293e79dd5954da11b02f74b0f9f3 Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Fri, 30 Oct 2020 00:21:33 +0100 Subject: [PATCH 6/6] [ML] Fix decision path chart for boolean types. --- .../application/components/data_grid/common.ts | 15 +++++++++++++-- .../components/data_grid/data_grid.tsx | 12 +++++++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/x-pack/plugins/ml/public/application/components/data_grid/common.ts b/x-pack/plugins/ml/public/application/components/data_grid/common.ts index cb5008d628b51..48a0a0c9ab126 100644 --- a/x-pack/plugins/ml/public/application/components/data_grid/common.ts +++ b/x-pack/plugins/ml/public/application/components/data_grid/common.ts @@ -160,6 +160,13 @@ export const getDataGridSchemaFromKibanaFieldType = ( return schema; }; +const getClassName = (className: string, isClassTypeBoolean: boolean) => { + if (isClassTypeBoolean) { + return className === 'true'; + } + + return className; +}; /** * Helper to transform feature importance flattened fields with arrays back to object structure * @@ -169,7 +176,8 @@ export const getDataGridSchemaFromKibanaFieldType = ( */ export const getFeatureImportance = ( row: Record, - mlResultsField: string + mlResultsField: string, + isClassTypeBoolean = false ): FeatureImportance[] => { const featureNames: string[] | undefined = row[`${mlResultsField}.feature_importance.feature_name`]; @@ -195,7 +203,10 @@ export const getFeatureImportance = ( return { feature_name: fName, classes: overallClassNames.map((fClassName, fIndex) => { - return { class_name: fClassName, importance: featureClassImportance[fIndex] }; + return { + class_name: getClassName(fClassName, isClassTypeBoolean), + importance: featureClassImportance[fIndex], + }; }), }; }); diff --git a/x-pack/plugins/ml/public/application/components/data_grid/data_grid.tsx b/x-pack/plugins/ml/public/application/components/data_grid/data_grid.tsx index cd909f186c469..50e9cabc99c35 100644 --- a/x-pack/plugins/ml/public/application/components/data_grid/data_grid.tsx +++ b/x-pack/plugins/ml/public/application/components/data_grid/data_grid.tsx @@ -123,7 +123,6 @@ export const DataGrid: FC = memo( if (!row) return
; // if resultsField for some reason is not available then use ml const mlResultsField = resultsField ?? DEFAULT_RESULTS_FIELD; - const parsedFIArray: FeatureImportance[] = getFeatureImportance(row, mlResultsField); let predictedValue: string | number | undefined; let topClasses: TopClasses = []; if ( @@ -135,6 +134,17 @@ export const DataGrid: FC = memo( topClasses = getTopClasses(row, mlResultsField); } + const isClassTypeBoolean = topClasses.reduce( + (p, c) => typeof c.class_name === 'boolean' || p, + false + ); + + const parsedFIArray: FeatureImportance[] = getFeatureImportance( + row, + mlResultsField, + isClassTypeBoolean + ); + return (