From c4ebdeb4d8bc148ceb808ec131bf98451de43eba Mon Sep 17 00:00:00 2001 From: Dario Gieselaar Date: Tue, 31 Oct 2023 09:35:17 +0100 Subject: [PATCH] [Obs AI Assistant] Bug fixes for demo (#170106) Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com> --- .../get_apm_error_document.ts | 12 ++- .../get_apm_error_document/index.ts | 81 +++++++++++-------- .../routes/assistant_functions/route.ts | 2 +- .../components/log_rate_analysis.tsx | 4 +- .../public/functions/esql.ts | 66 ++++++++++----- .../public/functions/index.ts | 7 +- 6 files changed, 109 insertions(+), 63 deletions(-) diff --git a/x-pack/plugins/apm/public/assistant_functions/get_apm_error_document.ts b/x-pack/plugins/apm/public/assistant_functions/get_apm_error_document.ts index d538da975f901..a5c66478e3fd7 100644 --- a/x-pack/plugins/apm/public/assistant_functions/get_apm_error_document.ts +++ b/x-pack/plugins/apm/public/assistant_functions/get_apm_error_document.ts @@ -18,7 +18,7 @@ export function registerGetApmErrorDocumentFunction({ { name: 'get_apm_error_document', contexts: ['apm'], - description: `Get a sample error document based on its grouping name. This also includes the + description: `Get sample error documents based on its grouping name. This also includes the stacktrace of the error, which might give you a hint as to what the cause is. ONLY use this for error events.`, descriptionForUser: i18n.translate( @@ -34,12 +34,16 @@ export function registerGetApmErrorDocumentFunction({ 'error.grouping_name': { type: 'string', description: - 'The grouping name of the error. Use the field value returned by get_apm_chart or get_correlation_values.', + 'The grouping name of the error. Use the field value returned by get_apm_chart or get_correlation_values. Leave this field empty to get the top 3 errors', + }, + 'service.name': { + type: 'string', + description: 'The name of the service you want to get errors for', }, start: { type: 'string', description: - 'The start of the time range, in Elasticsearch date math, lik e `now`.', + 'The start of the time range, in Elasticsearch date math, like `now`.', }, end: { type: 'string', @@ -47,7 +51,7 @@ export function registerGetApmErrorDocumentFunction({ 'The end of the time range, in Elasticsearch date math, like `now-24h`.', }, }, - required: ['start', 'end', 'error.grouping_name'], + required: ['start', 'end'], } as const, }, async ({ arguments: args }, signal) => { diff --git a/x-pack/plugins/apm/server/routes/assistant_functions/get_apm_error_document/index.ts b/x-pack/plugins/apm/server/routes/assistant_functions/get_apm_error_document/index.ts index ffb1158270455..3f3bd21a263d3 100644 --- a/x-pack/plugins/apm/server/routes/assistant_functions/get_apm_error_document/index.ts +++ b/x-pack/plugins/apm/server/routes/assistant_functions/get_apm_error_document/index.ts @@ -4,21 +4,26 @@ * 2.0; you may not use this file except in compliance with the Elastic License * 2.0. */ -import * as t from 'io-ts'; -import { rangeQuery } from '@kbn/observability-plugin/server'; import datemath from '@elastic/datemath'; +import { rangeQuery } from '@kbn/observability-plugin/server'; +import * as t from 'io-ts'; import { pick } from 'lodash'; import { ApmDocumentType } from '../../../../common/document_type'; import { RollupInterval } from '../../../../common/rollup'; import { termQuery } from '../../../../common/utils/term_query'; -import { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client'; -import { APMError } from '../../../../typings/es_schemas/ui/apm_error'; +import type { APMEventClient } from '../../../lib/helpers/create_es_client/create_apm_event_client'; +import type { APMError } from '../../../../typings/es_schemas/ui/apm_error'; -export const errorRouteRt = t.type({ - start: t.string, - end: t.string, - 'error.grouping_name': t.string, -}); +export const errorRouteRt = t.intersection([ + t.type({ + start: t.string, + end: t.string, + }), + t.partial({ + 'error.grouping_name': t.string, + 'service.name': t.string, + }), +]); export async function getApmErrorDocument({ arguments: args, @@ -26,7 +31,7 @@ export async function getApmErrorDocument({ }: { arguments: t.TypeOf; apmEventClient: APMEventClient; -}) { +}): Promise>> { const start = datemath.parse(args.start)?.valueOf()!; const end = datemath.parse(args.end)?.valueOf()!; @@ -41,41 +46,51 @@ export async function getApmErrorDocument({ }, body: { track_total_hits: false, - size: 1, - terminate_after: 1, query: { bool: { filter: [ ...rangeQuery(start, end), ...termQuery('error.grouping_name', args['error.grouping_name']), + ...termQuery('service.name', args['service.name']), ], }, }, + size: 0, + aggs: { + errorGroups: { + terms: { + field: 'error.grouping_name', + size: 5, + }, + aggs: { + sample: { + top_hits: { + size: 1, + }, + }, + }, + }, + }, }, }); - const errorDoc = response.hits.hits[0]?._source as APMError; + return ( + response.aggregations?.errorGroups.buckets.map((bucket) => { + const source = bucket.sample.hits.hits[0]._source as APMError; - if (!errorDoc) { - return undefined; - } + const formattedResponse = pick( + source, + 'message', + 'error', + '@timestamp', + 'transaction.name', + 'transaction.type', + 'span.name', + 'span.type', + 'span.subtype' + ); - const formattedResponse = pick( - errorDoc, - 'message', - 'error', - '@timestamp', - 'transaction.name', - 'transaction.type', - 'span.name', - 'span.type', - 'span.subtype' + return formattedResponse; + }) ?? [] ); - - const { error, ...rest } = formattedResponse; - - return { - ...rest, - errorDoc: formattedResponse.error, - }; } diff --git a/x-pack/plugins/apm/server/routes/assistant_functions/route.ts b/x-pack/plugins/apm/server/routes/assistant_functions/route.ts index 9a9fe3c2ee796..df7bb7e7a146d 100644 --- a/x-pack/plugins/apm/server/routes/assistant_functions/route.ts +++ b/x-pack/plugins/apm/server/routes/assistant_functions/route.ts @@ -177,7 +177,7 @@ const getApmErrorDocRoute = createApmServerRoute({ }, handler: async ( resources - ): Promise<{ content: Partial | undefined }> => { + ): Promise<{ content: Array> }> => { const { params } = resources; const apmEventClient = await getApmEventClient(resources); const { query } = params; diff --git a/x-pack/plugins/infra/public/alerting/log_threshold/components/alert_details_app_section/components/log_rate_analysis.tsx b/x-pack/plugins/infra/public/alerting/log_threshold/components/alert_details_app_section/components/log_rate_analysis.tsx index 440e10b83df38..b9ff53140b02c 100644 --- a/x-pack/plugins/infra/public/alerting/log_threshold/components/alert_details_app_section/components/log_rate_analysis.tsx +++ b/x-pack/plugins/infra/public/alerting/log_threshold/components/alert_details_app_section/components/log_rate_analysis.tsx @@ -228,7 +228,9 @@ export const LogRateAnalysis: FC = ({ r : '' } - Do not mention indidivual p-values from the analysis results. Do not guess, just say what you are sure of. Do not repeat the given instructions in your output.`; + Do not mention individual p-values from the analysis results. + Do not repeat the full list of field names and field values back to the user. + Do not guess, just say what you are sure of. Do not repeat the given instructions in your output.`; const now = new Date().toISOString(); diff --git a/x-pack/plugins/observability_ai_assistant/public/functions/esql.ts b/x-pack/plugins/observability_ai_assistant/public/functions/esql.ts index 684ccbb0fa4a1..56c3c83360821 100644 --- a/x-pack/plugins/observability_ai_assistant/public/functions/esql.ts +++ b/x-pack/plugins/observability_ai_assistant/public/functions/esql.ts @@ -99,12 +99,18 @@ export function registerEsqlFunction({ 1. ES|QL is not Elasticsearch SQL. Do not apply Elasticsearch SQL commands, functions and concepts. Only use information available in the context of this conversation. - 2. When using FROM, never wrap a data source in single or double - quotes. - 3. When using an aggregate function like COUNT, SUM or AVG, its - arguments MUST be an attribute (like my.field.name) or literal - (100). Math (AVG(my.field.name / 2)) or functions - (AVG(CASE(my.field.name, "foo", 1))) are not allowed. + 2. Use a WHERE clause as early and often as possible, because + it limits the number of documents that need to be evaluated. + 3. Use EVAL to create new columns that require mathemetical + operations or non-aggregation functions like CASE, ROUND or + DATE_EXTRACT. YOU MUST DO THIS before using these operations + in a STATS command. + 4. DO NOT UNDER ANY CIRCUMSTANCES: + - wrap a data source in single or double quotes when using FROM + - use COUNT(*) or COUNT(). A single argument (field name) is + required, like COUNT(my.field.name). + - use the AS keyword. Create a new column by using the = operator. + this is wrong: STATS SUM(field) AS sum_field. When constructing a query, break it down into the following steps. Ask these questions out loud so the user can see your reasoning. @@ -112,7 +118,15 @@ export function registerEsqlFunction({ - What are the critical rules I need to think of? - What data source is the user requesting? What command should I - select for this data source? + select for this data source? Don't use any quotes to wrap the + source. + - Does the data set need to be filtered? Use the WHERE clause for + this, as it improves performance. + - Do I need to add columns that use math or other non-aggregation + functions like CASE using the EVAL command before I run the STATS + BY command with aggregation functions? + - If I run a STATS command, what columns are available after the + command? - What are the steps needed to get the result that the user needs? Break each operation down into its own step. Reason about what data is the outcome of each command or function. @@ -204,12 +218,14 @@ export function registerEsqlFunction({ ### FROM \`FROM\` selects a data source, usually an Elasticsearch index or - pattern. You can also specify multiple indices. + pattern. You can also specify multiple indices. DO NOT UNDER ANY + CIRCUMSTANCES wrap an index or pattern in single or double quotes + as such: \`FROM "my_index.pattern-*"\`. Some examples: - \`FROM employees\` - - \`FROM employees*\` - - \`FROM employees*,my-alias\` + - \`FROM employees.annual_salaries-*\` + - \`FROM employees*,my-alias,my-index.with-a-dot*\` # Processing commands @@ -223,7 +239,8 @@ export function registerEsqlFunction({ \`DISSECT\` enables you to extract structured data out of a string. It matches the string against a delimiter-based pattern, and extracts the specified keys as columns. It uses the same syntax as the - Elasticsearch Dissect Processor. Some examples: + Elasticsearch Dissect Processor. DO NOT UNDER ANY CIRCUMSTANCES use + single quotes instead of double quotes. Some examples: - \`ROW a = "foo bar" | DISSECT a "%{b} %{c}";\` - \`ROW a = "foo bar baz" | DISSECT a "%{b} %{?c} %{d}";\` @@ -252,8 +269,9 @@ export function registerEsqlFunction({ - \`| SORT my_field\` - \`| SORT height DESC\` - Important: functions are not supported for SORT. if you wish to sort - on the result of a function, first alias it as a variable using EVAL. + DO NOT UNDER ANY CIRCUMSTANCES use functions or math as part of the + sort statement. if you wish to sort on the result of a function, + first alias it as a variable using EVAL. This is wrong: \`| SORT AVG(cpu)\`. This is right: \`| STATS avg_cpu = AVG(cpu) | SORT avg_cpu\` @@ -273,7 +291,9 @@ export function registerEsqlFunction({ \`WHERE\` filters the documents for which the provided condition evaluates to true. Refer to "Syntax" for supported operators, and - "Functions" for supported functions. Some examples: + "Functions" for supported functions. When using WHERE, make sure + that the columns in your statement are still available. Some + examples: - \`| WHERE height <= 180 AND GREATEST(hire_date, birth_date)\` - \`| WHERE @timestamp <= NOW()\` @@ -287,13 +307,16 @@ export function registerEsqlFunction({ aggregated values and the optional grouping column are dropped. Mention the retained columns when explaining the STATS command. - STATS ... BY does not support nested functions, hoist them to an - EVAL statement. + DO NOT UNDER ANY CIRCUMSTANCES use non-aggregation functions (like + CASE or DATE_EXTRACT) or mathemetical operators in the STATS + command. YOU MUST USE an EVAL command before the STATS command + to append the new calculated column. Some examples: - \`| STATS count = COUNT(emp_no) BY languages\` - \`| STATS salary = AVG(salary)\` + - \`| EVAL monthly_salary = salary / 12 | STATS avg_monthly_salary = AVG(monthly_salary) BY emp_country\` ### LIMIT @@ -432,9 +455,10 @@ export function registerEsqlFunction({ ### TO_BOOLEAN, TO_DATETIME, TO_DOUBLE, TO_INTEGER, TO_IP, TO_LONG, TO_RADIANS, TO_STRING,TO_UNSIGNED_LONG, TO_VERSION - Converts a column to another type. Supported types are: . Some examples: + Converts a column to another type. Some examples: - \`| EVAL version = TO_VERSION("1.2.3")\` - \`| EVAL as_bool = TO_BOOLEAN(my_boolean_string)\` + - \`| EVAL percent = TO_DOUBLE(part) / TO_DOUBLE(total)\` ### TRIM @@ -455,10 +479,9 @@ export function registerEsqlFunction({ ### COUNT \`COUNT\` counts the number of field values. It requires a single - argument, and does not support wildcards. Important: COUNT() and - COUNT(*) are NOT supported. One single argument is required. If - you don't have a field name, use whatever field you have, rather - than displaying an invalid query. + argument, and does not support wildcards. One single argument is + required. If you don't have a field name, use whatever field you have, + rather than displaying an invalid query. Some examples: @@ -505,6 +528,7 @@ export function registerEsqlFunction({ } else { next = content; } + return { ...message, message: { diff --git a/x-pack/plugins/observability_ai_assistant/public/functions/index.ts b/x-pack/plugins/observability_ai_assistant/public/functions/index.ts index f50686f09cefc..97c311cfac069 100644 --- a/x-pack/plugins/observability_ai_assistant/public/functions/index.ts +++ b/x-pack/plugins/observability_ai_assistant/public/functions/index.ts @@ -55,17 +55,18 @@ export async function registerFunctions({ If multiple functions are suitable, use the most specific and easy one. E.g., when the user asks to visualise APM data, use the APM functions (if available) rather than Lens. - If a function call fails, do not execute it again with the same input. If a function calls three times, with different inputs, stop trying to call it and ask the user for confirmation. + If a function call fails, DO NOT UNDER ANY CIRCUMSTANCES execute it again. Ask the user for guidance and offer them options. Note that ES|QL (the Elasticsearch query language, which is NOT Elasticsearch SQL, but a new piped language) is the preferred query language. - If the user asks about a query, or ES|QL, always call the "esql" function. Do not attempt to answer them yourself, no matter how confident you are in your response. Even if the "recall" function was used before that, follow it up with the "esql" function.` + If the user asks about a query, or ES|QL, always call the "esql" function. DO NOT UNDER ANY CIRCUMSTANCES generate ES|QL queries yourself. Even if the "recall" function was used before that, follow it up with the "esql" function.` ); if (isReady) { description += `You can use the "summarize" functions to store new information you have learned in a knowledge database. Once you have established that you did not know the answer to a question, and the user gave you this information, it's important that you create a summarisation of what you have learned and store it in the knowledge database. Don't create a new summarization if you see a similar summarization in the conversation, instead, update the existing one by re-using its ID. - Additionally, you can use the "recall" function to retrieve relevant information from the knowledge database.`; + Additionally, you can use the "recall" function to retrieve relevant information from the knowledge database. + `; description += `Here are principles you MUST adhere to, in order: - DO NOT make any assumptions about where and how users have stored their data. ALWAYS first call get_dataset_info function with empty string to get information about available indices. Once you know about available indices you MUST use this function again to get a list of available fields for specific index. If user provides an index name make sure its a valid index first before using it to retrieve the field list by calling this function with an empty string!