Skip to content

Commit

Permalink
Onboard Uptime rule types with FAAD (elastic#179493)
Browse files Browse the repository at this point in the history
Towards: elastic#169867

This PR onboards Uptime rule types (Tls, Duration Anolamy and Monitor
status) with FAAD.

We are deprecating the rule-registry plugin and onboard the rule types
with the new alertsClient to manage alert-as-data.
There is no new future, all the rule types should work as they were, and
save alerts with all the existing fields.

## To verify:

- Switch to Kibana 8.9.0 in your local repo. (In this version Uptime
rules are not deprecated)
- Run your ES with: `yarn es snapshot -E path.data=../local-es-data`
- Run your Kibana
- Create Uptime rules with an active and a recovered action (You can run
Heartbeat locally if needed, [follow the
instructions](https://www.elastic.co/guide/en/beats/heartbeat/current/heartbeat-installation-configuration.html))
- Stop your ES and Kibana
- Switch to this branch and run your ES with `yarn es snapshot -E
path.data=../local-es-data` again.
- Run your Kibana
- Modify Uptime rulesType codes to force them to create an alert.
Example:
Mock [availabilityResults in
status_check](https://github.com/elastic/kibana/blob/main/x-pack/plugins/observability_solution/uptime/server/legacy_uptime/lib/alerts/status_check.ts#L491)
with below data
```
availabilityResults = [
      {
        monitorId: '1',
        up: 1,
        down: 0,
        location: 'location',
        availabilityRatio: 0.5,
        monitorInfo: {
          timestamp: '',
          monitor: {
            id: '1',
            status: 'down',
            type: 'type',
            check_group: 'default',
          },
          docId: 'docid',
        },
      },
    ];
```

It should create an alert. The alert should be saved under
`.alerts-observability.uptime.alerts` index and be visible under
observability alerts page.

Then remove the mock, the alert should be recovered.
  • Loading branch information
ersin-erdal authored May 2, 2024
1 parent 60fbf3f commit d228f48
Show file tree
Hide file tree
Showing 10 changed files with 690 additions and 523 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@ import { isRight } from 'fp-ts/lib/Either';
import Mustache from 'mustache';
import { AlertsLocatorParams, getAlertUrl } from '@kbn/observability-plugin/common';
import { LocatorPublic } from '@kbn/share-plugin/common';
import { legacyExperimentalFieldMap } from '@kbn/alerts-as-data-utils';
import { legacyExperimentalFieldMap, ObservabilityUptimeAlert } from '@kbn/alerts-as-data-utils';
import { IBasePath } from '@kbn/core/server';
import { type IRuleTypeAlerts, RuleExecutorServices } from '@kbn/alerting-plugin/server';
import type { IRuleTypeAlerts } from '@kbn/alerting-plugin/server';
import { RuleExecutorServices } from '@kbn/alerting-plugin/server';
import { addSpaceIdToPath } from '@kbn/spaces-plugin/common';
import { AlertInstanceState } from '@kbn/alerting-plugin/server';
import { AlertInstanceContext } from '@kbn/alerting-plugin/server';
import { uptimeRuleFieldMap } from '../../../../common/rules/uptime_rule_field_map';
import { SYNTHETICS_RULE_TYPES_ALERT_CONTEXT } from '../../../../common/constants/synthetics_alerts';
import { UptimeCommonState, UptimeCommonStateType } from '../../../../common/runtime_types';
Expand Down Expand Up @@ -82,31 +85,29 @@ export const getAlertDetailsUrl = (
alertUuid: string | null
) => addSpaceIdToPath(basePath.publicBaseUrl, spaceId, `/app/observability/alerts/${alertUuid}`);

export const setRecoveredAlertsContext = async ({
alertFactory,
export const setRecoveredAlertsContext = async <ActionGroupIds extends string>({
alertsClient,
alertsLocator,
basePath,
defaultStartedAt,
getAlertStartedDate,
spaceId,
alertsLocator,
getAlertUuid,
}: {
alertFactory: RuleExecutorServices['alertFactory'];
defaultStartedAt: string;
getAlertStartedDate: (alertInstanceId: string) => string | null;
alertsClient: RuleExecutorServices<
AlertInstanceState,
AlertInstanceContext,
ActionGroupIds,
ObservabilityUptimeAlert
>['alertsClient'];
alertsLocator?: LocatorPublic<AlertsLocatorParams>;
basePath: IBasePath;
defaultStartedAt: string;
spaceId: string;
alertsLocator?: LocatorPublic<AlertsLocatorParams>;
getAlertUuid?: (alertId: string) => string | null;
}) => {
const { getRecoveredAlerts } = alertFactory.done();

for await (const alert of getRecoveredAlerts()) {
const recoveredAlertId = alert.getId();
const alertUuid = getAlertUuid?.(recoveredAlertId) || null;
const indexedStartedAt = getAlertStartedDate(recoveredAlertId) ?? defaultStartedAt;

const state = alert.getState();
for (const recoveredAlert of alertsClient?.getRecoveredAlerts() ?? []) {
const recoveredAlertId = recoveredAlert.alert.getId();
const alertUuid = recoveredAlert.alert.getUuid();
const indexedStartedAt = recoveredAlert.alert.getStart() ?? defaultStartedAt;
const state = recoveredAlert.alert.getState();
const alertUrl = await getAlertUrl(
alertUuid,
spaceId,
Expand All @@ -115,17 +116,21 @@ export const setRecoveredAlertsContext = async ({
basePath.publicBaseUrl
);

alert.setContext({
...state,
[ALERT_DETAILS_URL]: alertUrl,
alertsClient!.setAlertData({
id: recoveredAlertId,
context: {
...state,
[ALERT_DETAILS_URL]: alertUrl,
},
});
}
};

export const uptimeRuleTypeFieldMap = { ...uptimeRuleFieldMap, ...legacyExperimentalFieldMap };

export const UptimeRuleTypeAlertDefinition: IRuleTypeAlerts = {
export const UptimeRuleTypeAlertDefinition: IRuleTypeAlerts<ObservabilityUptimeAlert> = {
context: SYNTHETICS_RULE_TYPES_ALERT_CONTEXT,
mappings: { fieldMap: uptimeRuleTypeFieldMap },
useLegacyAlerts: true,
shouldWrite: true,
};
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,8 @@ import {
} from '@kbn/rule-data-utils';
import { durationAnomalyAlertFactory } from './duration_anomaly';
import { DURATION_ANOMALY } from '../../../../common/constants/uptime_alerts';
import {
getSeverityType,
type MlAnomaliesTableRecord,
type MlAnomalyRecordDoc,
} from '@kbn/ml-anomaly-utils';
import { getSeverityType } from '@kbn/ml-anomaly-utils';
import type { MlAnomaliesTableRecord, MlAnomalyRecordDoc } from '@kbn/ml-anomaly-utils';
import { createRuleTypeMocks, bootstrapDependencies } from './test_utils';
import { Ping } from '../../../../common/runtime_types/ping';

Expand Down Expand Up @@ -104,6 +101,27 @@ const mockOptions = (
): any => {
const { services, setContext } = createRuleTypeMocks(mockRecoveredAlerts);

services.alertsClient.report.mockImplementation((param: any) => {
return {
uuid: `uuid-${param.id}`,
start: new Date().toISOString(),
alertDoc: {},
};
});

services.alertsClient.getRecoveredAlerts.mockImplementation((param: any) => {
return mockRecoveredAlerts.map((alert) => ({
alert: {
getId: () => 'mock-id',
getUuid: () => 'mock-uuiid',
getState: () => alert,
getStart: () => new Date().toISOString(),
setContext,
context: {},
},
}));
});

return {
params,
state,
Expand Down Expand Up @@ -158,12 +176,12 @@ describe('duration anomaly alert', () => {
const alert = durationAnomalyAlertFactory(server, libs, plugins);
const options = mockOptions();
const {
services: { alertWithLifecycle },
services: { alertsClient },
} = options;
// @ts-ignore the executor can return `void`, but ours never does
const state: Record<string, any> = await alert.executor(options);
expect(mockGetAnomliesTableDataGetter).toHaveBeenCalledTimes(1);
expect(alertWithLifecycle).toHaveBeenCalledTimes(2);
expect(alertsClient.report).toHaveBeenCalledTimes(2);
expect(mockGetAnomliesTableDataGetter).toBeCalledWith(
['uptime_monitor_high_latency_by_geo'],
[],
Expand All @@ -177,14 +195,15 @@ describe('duration anomaly alert', () => {
10,
undefined
);
const [{ value: alertInstanceMock }] = alertWithLifecycle.mock.results;
expect(alertInstanceMock.replaceState).toHaveBeenCalledTimes(2);

const reasonMessages: string[] = [];
mockAnomaliesResult.anomalies.forEach((anomaly, index) => {
const slowestResponse = Math.round(anomaly.actualSort / 1000);
const typicalResponse = Math.round(anomaly.typicalSort / 1000);
expect(alertWithLifecycle).toBeCalledWith({
fields: {
expect(alertsClient.report).toHaveBeenCalledWith({
id: `${DURATION_ANOMALY.id}${index}`,
actionGroup: DURATION_ANOMALY.id,
payload: {
'monitor.id': options.params.monitorId,
'url.full': mockPing.url?.full,
'anomaly.start': mockDate,
Expand All @@ -201,27 +220,26 @@ Response times as high as ${slowestResponse} ms have been detected from location
anomaly.entityValue
}. Expected response time is ${typicalResponse} ms.`,
},
id: `${DURATION_ANOMALY.id}${index}`,
state: {
firstCheckedAt: 'date',
firstTriggeredAt: undefined,
lastCheckedAt: 'date',
lastResolvedAt: undefined,
isTriggered: false,
anomalyStartTimestamp: 'date',
currentTriggerStarted: undefined,
expectedResponseTime: `${typicalResponse} ms`,
lastTriggeredAt: undefined,
monitor: monitorId,
monitorUrl: mockPing.url?.full,
observerLocation: anomaly.entityValue,
severity: getSeverityType(anomaly.severity),
severityScore: anomaly.severity,
slowestAnomalyResponse: `${slowestResponse} ms`,
bucketSpan: anomaly.source.bucket_span,
},
});

expect(alertInstanceMock.replaceState).toBeCalledWith({
firstCheckedAt: 'date',
firstTriggeredAt: undefined,
lastCheckedAt: 'date',
lastResolvedAt: undefined,
isTriggered: false,
anomalyStartTimestamp: 'date',
currentTriggerStarted: undefined,
expectedResponseTime: `${typicalResponse} ms`,
lastTriggeredAt: undefined,
monitor: monitorId,
monitorUrl: mockPing.url?.full,
observerLocation: anomaly.entityValue,
severity: getSeverityType(anomaly.severity),
severityScore: anomaly.severity,
slowestAnomalyResponse: `${slowestResponse} ms`,
bucketSpan: anomaly.source.bucket_span,
});
const reasonMsg = `Abnormal (${getSeverityType(
anomaly.severity
)} level) response time detected on uptime-monitor with url ${
Expand All @@ -233,45 +251,48 @@ Response times as high as ${slowestResponse} ms have been detected from location

reasonMessages.push(reasonMsg);
});
expect(alertInstanceMock.scheduleActions).toHaveBeenCalledTimes(2);

expect(alertInstanceMock.scheduleActions.mock.calls[0]).toMatchInlineSnapshot(`
expect(alertsClient.setAlertData.mock.calls[0]).toMatchInlineSnapshot(`
Array [
"xpack.uptime.alerts.actionGroups.durationAnomaly",
Object {
"alertDetailsUrl": "mockedAlertsLocator > getLocation",
"anomalyStartTimestamp": "date",
"bucketSpan": 900,
"expectedResponseTime": "10 ms",
"monitor": "uptime-monitor",
"monitorUrl": "https://elastic.co",
"observerLocation": "harrisburg",
"reason": "Abnormal (minor level) response time detected on uptime-monitor with url https://elastic.co at date. Anomaly severity score is 25.
"context": Object {
"alertDetailsUrl": "mockedAlertsLocator > getLocation",
"anomalyStartTimestamp": "date",
"bucketSpan": 900,
"expectedResponseTime": "10 ms",
"monitor": "uptime-monitor",
"monitorUrl": "https://elastic.co",
"observerLocation": "harrisburg",
"reason": "Abnormal (minor level) response time detected on uptime-monitor with url https://elastic.co at date. Anomaly severity score is 25.
Response times as high as 200 ms have been detected from location harrisburg. Expected response time is 10 ms.",
"severity": "minor",
"severityScore": 25,
"slowestAnomalyResponse": "200 ms",
"viewInAppUrl": "http://localhost:5601/hfe/app/uptime/monitor/eHBhY2sudXB0aW1lLmFsZXJ0cy5hY3Rpb25Hcm91cHMuZHVyYXRpb25Bbm9tYWx5MA==?dateRangeEnd=now&dateRangeStart=2022-03-17T13%3A13%3A33.755Z",
"severity": "minor",
"severityScore": 25,
"slowestAnomalyResponse": "200 ms",
"viewInAppUrl": "http://localhost:5601/hfe/app/uptime/monitor/eHBhY2sudXB0aW1lLmFsZXJ0cy5hY3Rpb25Hcm91cHMuZHVyYXRpb25Bbm9tYWx5MA==?dateRangeEnd=now&dateRangeStart=date",
},
"id": "xpack.uptime.alerts.actionGroups.durationAnomaly",
},
]
`);
expect(alertInstanceMock.scheduleActions.mock.calls[1]).toMatchInlineSnapshot(`
expect(alertsClient.setAlertData.mock.calls[1]).toMatchInlineSnapshot(`
Array [
"xpack.uptime.alerts.actionGroups.durationAnomaly",
Object {
"alertDetailsUrl": "mockedAlertsLocator > getLocation",
"anomalyStartTimestamp": "date",
"bucketSpan": 900,
"expectedResponseTime": "20 ms",
"monitor": "uptime-monitor",
"monitorUrl": "https://elastic.co",
"observerLocation": "fairbanks",
"reason": "Abnormal (warning level) response time detected on uptime-monitor with url https://elastic.co at date. Anomaly severity score is 10.
"context": Object {
"alertDetailsUrl": "mockedAlertsLocator > getLocation",
"anomalyStartTimestamp": "date",
"bucketSpan": 900,
"expectedResponseTime": "20 ms",
"monitor": "uptime-monitor",
"monitorUrl": "https://elastic.co",
"observerLocation": "fairbanks",
"reason": "Abnormal (warning level) response time detected on uptime-monitor with url https://elastic.co at date. Anomaly severity score is 10.
Response times as high as 300 ms have been detected from location fairbanks. Expected response time is 20 ms.",
"severity": "warning",
"severityScore": 10,
"slowestAnomalyResponse": "300 ms",
"viewInAppUrl": "http://localhost:5601/hfe/app/uptime/monitor/eHBhY2sudXB0aW1lLmFsZXJ0cy5hY3Rpb25Hcm91cHMuZHVyYXRpb25Bbm9tYWx5MQ==?dateRangeEnd=now&dateRangeStart=2022-03-17T13%3A13%3A33.755Z",
"severity": "warning",
"severityScore": 10,
"slowestAnomalyResponse": "300 ms",
"viewInAppUrl": "http://localhost:5601/hfe/app/uptime/monitor/eHBhY2sudXB0aW1lLmFsZXJ0cy5hY3Rpb25Hcm91cHMuZHVyYXRpb25Bbm9tYWx5MQ==?dateRangeEnd=now&dateRangeStart=date",
},
"id": "xpack.uptime.alerts.actionGroups.durationAnomaly",
},
]
`);
Expand Down Expand Up @@ -300,11 +321,17 @@ Response times as high as ${slowestResponse} ms have been detected from location
);
const alert = durationAnomalyAlertFactory(server, libs, plugins);
const options = mockOptions();
const {
services: { alertsClient },
} = options;
// @ts-ignore the executor can return `void`, but ours never does
const state: Record<string, any> = await alert.executor(options);
expect(options.setContext).toHaveBeenCalledTimes(2);
mockRecoveredAlerts.forEach((alertState) => {
expect(options.setContext).toHaveBeenCalledWith(alertState);
expect(alertsClient.setAlertData).toHaveBeenCalledTimes(4);
mockRecoveredAlerts.forEach((alertState, index) => {
expect(alertsClient.setAlertData).toHaveBeenNthCalledWith(index + 3, {
context: alertState,
id: 'mock-id',
});
});
});
});
Expand Down
Loading

0 comments on commit d228f48

Please sign in to comment.