From a117e4374caa905335ba27b0fcd77916c86b5df6 Mon Sep 17 00:00:00 2001 From: Walter Rafelsberger Date: Mon, 13 Jul 2020 08:05:12 +0200 Subject: [PATCH] [ML] Histogram API endpoint for transforms. --- .../models/data_visualizer/data_visualizer.ts | 329 +++++++++--------- .../ml/server/models/data_visualizer/index.ts | 2 +- x-pack/plugins/ml/server/shared.ts | 1 + .../server/routes/api/field_histograms.ts | 50 +++ .../transform/server/routes/api/schema.ts | 17 + .../transform/server/shared_imports.ts | 7 + 6 files changed, 248 insertions(+), 158 deletions(-) create mode 100644 x-pack/plugins/transform/server/routes/api/field_histograms.ts create mode 100644 x-pack/plugins/transform/server/shared_imports.ts diff --git a/x-pack/plugins/ml/server/models/data_visualizer/data_visualizer.ts b/x-pack/plugins/ml/server/models/data_visualizer/data_visualizer.ts index 9512eda0303a0..d1a4a0b585fbb 100644 --- a/x-pack/plugins/ml/server/models/data_visualizer/data_visualizer.ts +++ b/x-pack/plugins/ml/server/models/data_visualizer/data_visualizer.ts @@ -4,7 +4,7 @@ * you may not use this file except in compliance with the Elastic License. */ -import { LegacyCallAPIOptions, LegacyAPICaller } from 'kibana/server'; +import { LegacyAPICaller } from 'kibana/server'; import _ from 'lodash'; import { KBN_FIELD_TYPES } from '../../../../../../src/plugins/data/server'; import { ML_JOB_FIELD_TYPES } from '../../../common/constants/field_types'; @@ -179,12 +179,176 @@ type BatchStats = | DocumentCountStats | FieldExamples; +const getAggIntervals = async ( + callAsCurrentUser: LegacyAPICaller, + indexPatternTitle: string, + query: any, + fields: HistogramField[], + samplerShardSize: number +): Promise => { + const numericColumns = fields.filter((field) => { + return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE; + }); + + if (numericColumns.length === 0) { + return {}; + } + + const minMaxAggs = numericColumns.reduce((aggs, c) => { + const id = stringHash(c.fieldName); + aggs[id] = { + stats: { + field: c.fieldName, + }, + }; + return aggs; + }, {} as Record); + + const respStats = await callAsCurrentUser('search', { + index: indexPatternTitle, + size: 0, + body: { + query, + aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize), + size: 0, + }, + }); + + const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); + const aggregations = + aggsPath.length > 0 ? _.get(respStats.aggregations, aggsPath) : respStats.aggregations; + + return Object.keys(aggregations).reduce((p, aggName) => { + const stats = [aggregations[aggName].min, aggregations[aggName].max]; + if (!stats.includes(null)) { + const delta = aggregations[aggName].max - aggregations[aggName].min; + + let aggInterval = 1; + + if (delta > MAX_CHART_COLUMNS || delta <= 1) { + aggInterval = delta / (MAX_CHART_COLUMNS - 1); + } + + p[aggName] = { interval: aggInterval, min: stats[0], max: stats[1] }; + } + + return p; + }, {} as NumericColumnStatsMap); +}; + +// export for re-use by transforms plugin +export const getHistogramsForFields = async ( + callAsCurrentUser: LegacyAPICaller, + indexPatternTitle: string, + query: any, + fields: HistogramField[], + samplerShardSize: number +) => { + const aggIntervals = await getAggIntervals( + callAsCurrentUser, + indexPatternTitle, + query, + fields, + samplerShardSize + ); + + const chartDataAggs = fields.reduce((aggs, field) => { + const fieldName = field.fieldName; + const fieldType = field.type; + const id = stringHash(fieldName); + if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) { + if (aggIntervals[id] !== undefined) { + aggs[`${id}_histogram`] = { + histogram: { + field: fieldName, + interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1, + }, + }; + } + } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) { + if (fieldType === KBN_FIELD_TYPES.STRING) { + aggs[`${id}_cardinality`] = { + cardinality: { + field: fieldName, + }, + }; + } + aggs[`${id}_terms`] = { + terms: { + field: fieldName, + size: MAX_CHART_COLUMNS, + }, + }; + } + return aggs; + }, {} as Record); + + if (Object.keys(chartDataAggs).length === 0) { + return []; + } + + const respChartsData = await callAsCurrentUser('search', { + index: indexPatternTitle, + size: 0, + body: { + query, + aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize), + size: 0, + }, + }); + + const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); + const aggregations = + aggsPath.length > 0 + ? _.get(respChartsData.aggregations, aggsPath) + : respChartsData.aggregations; + + const chartsData: ChartData[] = fields.map( + (field): ChartData => { + const fieldName = field.fieldName; + const fieldType = field.type; + const id = stringHash(field.fieldName); + + if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) { + if (aggIntervals[id] === undefined) { + return { + type: 'numeric', + data: [], + interval: 0, + stats: [0, 0], + id: fieldName, + }; + } + + return { + data: aggregations[`${id}_histogram`].buckets, + interval: aggIntervals[id].interval, + stats: [aggIntervals[id].min, aggIntervals[id].max], + type: 'numeric', + id: fieldName, + }; + } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) { + return { + type: fieldType === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean', + cardinality: + fieldType === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2, + data: aggregations[`${id}_terms`].buckets, + id: fieldName, + }; + } + + return { + type: 'unsupported', + id: fieldName, + }; + } + ); + + return chartsData; +}; + export class DataVisualizer { - callAsCurrentUser: ( - endpoint: string, - clientParams: Record, - options?: LegacyCallAPIOptions - ) => Promise; + callAsCurrentUser: LegacyAPICaller; constructor(callAsCurrentUser: LegacyAPICaller) { this.callAsCurrentUser = callAsCurrentUser; @@ -273,62 +437,6 @@ export class DataVisualizer { return stats; } - async getAggIntervals( - indexPatternTitle: string, - query: any, - fields: HistogramField[], - samplerShardSize: number - ): Promise { - const numericColumns = fields.filter((field) => { - return field.type === KBN_FIELD_TYPES.NUMBER || field.type === KBN_FIELD_TYPES.DATE; - }); - - if (numericColumns.length === 0) { - return {}; - } - - const minMaxAggs = numericColumns.reduce((aggs, c) => { - const id = stringHash(c.fieldName); - aggs[id] = { - stats: { - field: c.fieldName, - }, - }; - return aggs; - }, {} as Record); - - const respStats = await this.callAsCurrentUser('search', { - index: indexPatternTitle, - size: 0, - body: { - query, - aggs: buildSamplerAggregation(minMaxAggs, samplerShardSize), - size: 0, - }, - }); - - const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const aggregations = - aggsPath.length > 0 ? _.get(respStats.aggregations, aggsPath) : respStats.aggregations; - - return Object.keys(aggregations).reduce((p, aggName) => { - const stats = [aggregations[aggName].min, aggregations[aggName].max]; - if (!stats.includes(null)) { - const delta = aggregations[aggName].max - aggregations[aggName].min; - - let aggInterval = 1; - - if (delta > MAX_CHART_COLUMNS || delta <= 1) { - aggInterval = delta / (MAX_CHART_COLUMNS - 1); - } - - p[aggName] = { interval: aggInterval, min: stats[0], max: stats[1] }; - } - - return p; - }, {} as NumericColumnStatsMap); - } - // Obtains binned histograms for supplied list of fields. The statistics for each field in the // returned array depend on the type of the field (keyword, number, date etc). // Sampling will be used if supplied samplerShardSize > 0. @@ -338,106 +446,13 @@ export class DataVisualizer { fields: HistogramField[], samplerShardSize: number ): Promise { - const aggIntervals = await this.getAggIntervals( + return await getHistogramsForFields( + this.callAsCurrentUser, indexPatternTitle, query, fields, samplerShardSize ); - - const chartDataAggs = fields.reduce((aggs, field) => { - const fieldName = field.fieldName; - const fieldType = field.type; - const id = stringHash(fieldName); - if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) { - if (aggIntervals[id] !== undefined) { - aggs[`${id}_histogram`] = { - histogram: { - field: fieldName, - interval: aggIntervals[id].interval !== 0 ? aggIntervals[id].interval : 1, - }, - }; - } - } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) { - if (fieldType === KBN_FIELD_TYPES.STRING) { - aggs[`${id}_cardinality`] = { - cardinality: { - field: fieldName, - }, - }; - } - aggs[`${id}_terms`] = { - terms: { - field: fieldName, - size: MAX_CHART_COLUMNS, - }, - }; - } - return aggs; - }, {} as Record); - - if (Object.keys(chartDataAggs).length === 0) { - return []; - } - - const respChartsData = await this.callAsCurrentUser('search', { - index: indexPatternTitle, - size: 0, - body: { - query, - aggs: buildSamplerAggregation(chartDataAggs, samplerShardSize), - size: 0, - }, - }); - - const aggsPath = getSamplerAggregationsResponsePath(samplerShardSize); - const aggregations = - aggsPath.length > 0 - ? _.get(respChartsData.aggregations, aggsPath) - : respChartsData.aggregations; - - const chartsData: ChartData[] = fields.map( - (field): ChartData => { - const fieldName = field.fieldName; - const fieldType = field.type; - const id = stringHash(field.fieldName); - - if (fieldType === KBN_FIELD_TYPES.NUMBER || fieldType === KBN_FIELD_TYPES.DATE) { - if (aggIntervals[id] === undefined) { - return { - type: 'numeric', - data: [], - interval: 0, - stats: [0, 0], - id: fieldName, - }; - } - - return { - data: aggregations[`${id}_histogram`].buckets, - interval: aggIntervals[id].interval, - stats: [aggIntervals[id].min, aggIntervals[id].max], - type: 'numeric', - id: fieldName, - }; - } else if (fieldType === KBN_FIELD_TYPES.STRING || fieldType === KBN_FIELD_TYPES.BOOLEAN) { - return { - type: fieldType === KBN_FIELD_TYPES.STRING ? 'ordinal' : 'boolean', - cardinality: - fieldType === KBN_FIELD_TYPES.STRING ? aggregations[`${id}_cardinality`].value : 2, - data: aggregations[`${id}_terms`].buckets, - id: fieldName, - }; - } - - return { - type: 'unsupported', - id: fieldName, - }; - } - ); - - return chartsData; } // Obtains statistics for supplied list of fields. The statistics for each field in the diff --git a/x-pack/plugins/ml/server/models/data_visualizer/index.ts b/x-pack/plugins/ml/server/models/data_visualizer/index.ts index ed44e9b12e1d1..ca1df0fe8300c 100644 --- a/x-pack/plugins/ml/server/models/data_visualizer/index.ts +++ b/x-pack/plugins/ml/server/models/data_visualizer/index.ts @@ -4,4 +4,4 @@ * you may not use this file except in compliance with the Elastic License. */ -export { DataVisualizer } from './data_visualizer'; +export { getHistogramsForFields, DataVisualizer } from './data_visualizer'; diff --git a/x-pack/plugins/ml/server/shared.ts b/x-pack/plugins/ml/server/shared.ts index 3fca8ea1ba047..100433b23f7d1 100644 --- a/x-pack/plugins/ml/server/shared.ts +++ b/x-pack/plugins/ml/server/shared.ts @@ -8,3 +8,4 @@ export * from '../common/types/anomalies'; export * from '../common/types/anomaly_detection_jobs'; export * from './lib/capabilities/errors'; export { ModuleSetupPayload } from './shared_services/providers/modules'; +export { getHistogramsForFields } from './models/data_visualizer/'; diff --git a/x-pack/plugins/transform/server/routes/api/field_histograms.ts b/x-pack/plugins/transform/server/routes/api/field_histograms.ts new file mode 100644 index 0000000000000..732e5e072fe84 --- /dev/null +++ b/x-pack/plugins/transform/server/routes/api/field_histograms.ts @@ -0,0 +1,50 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +import { wrapEsError } from '../../../../../legacy/server/lib/create_router/error_wrappers'; + +import { getHistogramsForFields } from '../../shared_imports'; +import { RouteDependencies } from '../../types'; + +import { addBasePath } from '../index'; + +import { wrapError } from './error_utils'; +import { fieldHistogramsSchema, indexPatternTitleSchema, IndexPatternTitleSchema } from './schema'; + +export function registerFieldHistogramsRoutes({ router, license }: RouteDependencies) { + router.get( + { + path: addBasePath('transforms/field_histograms/{indexPatternTitle}'), + validate: { + params: indexPatternTitleSchema, + body: fieldHistogramsSchema, + }, + }, + license.guardApiRoute(async (ctx, req, res) => { + const { indexPatternTitle } = req.params as IndexPatternTitleSchema; + const { query, fields, samplerShardSize } = req.body; + + try { + const resp = await getHistogramsForFields( + ctx.transform!.dataClient.callAsCurrentUser, + indexPatternTitle, + query, + fields, + samplerShardSize + ); + + return res.ok({ body: resp }); + } catch (e) { + return res.customError(wrapError(wrapEsError(e))); + } + }) + ); +} diff --git a/x-pack/plugins/transform/server/routes/api/schema.ts b/x-pack/plugins/transform/server/routes/api/schema.ts index 7da3f1ccfe55e..152ccd9128621 100644 --- a/x-pack/plugins/transform/server/routes/api/schema.ts +++ b/x-pack/plugins/transform/server/routes/api/schema.ts @@ -5,6 +5,23 @@ */ import { schema } from '@kbn/config-schema'; +export const fieldHistogramsSchema = schema.object({ + /** Query to match documents in the index. */ + query: schema.any(), + fields: schema.arrayOf(schema.any()), + /** Number of documents to be collected in the sample processed on each shard, or -1 for no sampling. */ + samplerShardSize: schema.number(), +}); + +export const indexPatternTitleSchema = schema.object({ + /** Title of the index pattern for which to return stats. */ + indexPatternTitle: schema.string(), +}); + +export interface IndexPatternTitleSchema { + indexPatternTitle: string; +} + export const schemaTransformId = { params: schema.object({ transformId: schema.string(), diff --git a/x-pack/plugins/transform/server/shared_imports.ts b/x-pack/plugins/transform/server/shared_imports.ts new file mode 100644 index 0000000000000..d1f86ac375721 --- /dev/null +++ b/x-pack/plugins/transform/server/shared_imports.ts @@ -0,0 +1,7 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License; + * you may not use this file except in compliance with the Elastic License. + */ + +export { getHistogramsForFields } from '../../ml/server';