Skip to content

Commit

Permalink
Event log shows a failure when ruleResultService.addLastRunError adds…
Browse files Browse the repository at this point in the history
… an error (#179551)

Resolves: #174035

As the new `lastRun` status holder for rule execution can set the
outcome of a successful execution as `failed`
We have to do the same for the old `executionStatus` as well.

This PR, sets rule execution status as `error` when there are error
messages reported by the `ruleResultService.addLastRunError`, even if no
errors are thrown!
So the event-log and task metrics shows the correct results.

## To verify

Please follow the steps in the issue to reproduce.
  • Loading branch information
ersin-erdal authored Mar 28, 2024
1 parent 6f03cc1 commit b0d1cb1
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 53 deletions.
64 changes: 50 additions & 14 deletions x-pack/plugins/alerting/server/lib/rule_execution_status.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import {
import { ErrorWithReason } from './error_with_reason';
import { translations } from '../constants/translations';
import { RuleRunMetrics, RuleRunMetricsStore } from './rule_run_metrics_store';
import { RuleResultService } from '../monitoring/rule_result_service';

const MockLogger = loggingSystemMock.create().get();
const executionMetrics = {
Expand Down Expand Up @@ -56,7 +57,10 @@ describe('RuleExecutionStatus', () => {
describe('executionStatusFromState()', () => {
test('empty task state', () => {
const emptyRuleRunState = new RuleRunMetricsStore().getMetrics();
const { status, metrics } = executionStatusFromState({ metrics: emptyRuleRunState });
const { status, metrics } = executionStatusFromState({
stateWithMetrics: { metrics: emptyRuleRunState },
ruleResultService: new RuleResultService(),
});
checkDateIsNearNow(status.lastExecutionDate);
expect(status.status).toBe('ok');
expect(status.error).toBe(undefined);
Expand All @@ -67,8 +71,11 @@ describe('RuleExecutionStatus', () => {

test('task state with no instances', () => {
const { status, metrics } = executionStatusFromState({
alertInstances: {},
metrics: executionMetrics,
stateWithMetrics: {
alertInstances: {},
metrics: executionMetrics,
},
ruleResultService: new RuleResultService(),
});
checkDateIsNearNow(status.lastExecutionDate);
expect(status.status).toBe('ok');
Expand All @@ -80,8 +87,11 @@ describe('RuleExecutionStatus', () => {

test('task state with one instance', () => {
const { status, metrics } = executionStatusFromState({
alertInstances: { a: {} },
metrics: executionMetrics,
stateWithMetrics: {
alertInstances: { a: {} },
metrics: executionMetrics,
},
ruleResultService: new RuleResultService(),
});
checkDateIsNearNow(status.lastExecutionDate);
expect(status.status).toBe('active');
Expand All @@ -93,8 +103,11 @@ describe('RuleExecutionStatus', () => {

test('task state with max executable actions warning', () => {
const { status, metrics } = executionStatusFromState({
alertInstances: { a: {} },
metrics: { ...executionMetrics, triggeredActionsStatus: ActionsCompletion.PARTIAL },
stateWithMetrics: {
alertInstances: { a: {} },
metrics: { ...executionMetrics, triggeredActionsStatus: ActionsCompletion.PARTIAL },
},
ruleResultService: new RuleResultService(),
});
checkDateIsNearNow(status.lastExecutionDate);
expect(status.warning).toEqual({
Expand All @@ -112,12 +125,15 @@ describe('RuleExecutionStatus', () => {

test('task state with max queued actions warning', () => {
const { status, metrics } = executionStatusFromState({
alertInstances: { a: {} },
metrics: {
...executionMetrics,
triggeredActionsStatus: ActionsCompletion.PARTIAL,
hasReachedQueuedActionsLimit: true,
stateWithMetrics: {
alertInstances: { a: {} },
metrics: {
...executionMetrics,
triggeredActionsStatus: ActionsCompletion.PARTIAL,
hasReachedQueuedActionsLimit: true,
},
},
ruleResultService: new RuleResultService(),
});
checkDateIsNearNow(status.lastExecutionDate);
expect(status.warning).toEqual({
Expand All @@ -136,8 +152,11 @@ describe('RuleExecutionStatus', () => {

test('task state with max alerts warning', () => {
const { status, metrics } = executionStatusFromState({
alertInstances: { a: {} },
metrics: { ...executionMetrics, hasReachedAlertLimit: true },
stateWithMetrics: {
alertInstances: { a: {} },
metrics: { ...executionMetrics, hasReachedAlertLimit: true },
},
ruleResultService: new RuleResultService(),
});
checkDateIsNearNow(status.lastExecutionDate);
expect(status.warning).toEqual({
Expand All @@ -152,6 +171,23 @@ describe('RuleExecutionStatus', () => {
hasReachedAlertLimit: true,
});
});

test('task state with lastRun error', () => {
const ruleResultService = new RuleResultService();
const lastRunSetters = ruleResultService.getLastRunSetters();
lastRunSetters.addLastRunError('an error');

const { status } = executionStatusFromState({
stateWithMetrics: {
alertInstances: {},
metrics: executionMetrics,
},
ruleResultService,
});
expect(status.status).toBe('error');
expect(status.error).toEqual({ message: 'an error', reason: 'unknown' });
expect(status.warning).toBe(undefined);
});
});

describe('executionStatusFromError()', () => {
Expand Down
29 changes: 25 additions & 4 deletions x-pack/plugins/alerting/server/lib/rule_execution_status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@

import { Logger } from '@kbn/core/server';
import { ActionsCompletion } from '@kbn/alerting-state-types';
import { RuleResultService } from '../monitoring/rule_result_service';
import {
RuleExecutionStatus,
RuleExecutionStatusValues,
RuleExecutionStatusWarningReasons,
RawRuleExecutionStatus,
RawRule,
Rule,
RuleExecutionStatusErrorReasons,
} from '../types';
import { getReasonFromError } from './error_with_reason';
import { getEsErrorMessage } from './errors';
Expand All @@ -27,17 +29,24 @@ export interface IExecutionStatusAndMetrics {
metrics: RuleRunMetrics | null;
}

export function executionStatusFromState(
stateWithMetrics: RuleTaskStateAndMetrics,
lastExecutionDate?: Date
): IExecutionStatusAndMetrics {
export function executionStatusFromState({
stateWithMetrics,
ruleResultService,
lastExecutionDate,
}: {
stateWithMetrics: RuleTaskStateAndMetrics;
ruleResultService: RuleResultService;
lastExecutionDate?: Date;
}): IExecutionStatusAndMetrics {
const alertIds = Object.keys(stateWithMetrics.alertInstances ?? {});

let status: RuleExecutionStatuses =
alertIds.length === 0 ? RuleExecutionStatusValues[0] : RuleExecutionStatusValues[1];

// Check for warning states
let warning = null;
let error = null;

// We only have a single warning field so prioritizing the alert circuit breaker over the actions circuit breaker
if (stateWithMetrics.metrics.hasReachedAlertLimit) {
status = RuleExecutionStatusValues[5];
Expand All @@ -60,11 +69,23 @@ export function executionStatusFromState(
}
}

// Overwrite status to be error if last run reported any errors
const { errors: errorsFromLastRun } = ruleResultService.getLastRunResults();
if (errorsFromLastRun.length > 0) {
status = RuleExecutionStatusValues[2];
// These errors are reported by ruleResultService.addLastRunError, therefore they are landed in successful execution map
error = {
reason: RuleExecutionStatusErrorReasons.Unknown,
message: errorsFromLastRun.join(','),
};
}

return {
status: {
lastExecutionDate: lastExecutionDate ?? new Date(),
status,
...(warning ? { warning } : {}),
...(error ? { error } : {}),
},
metrics: stateWithMetrics.metrics,
};
Expand Down
103 changes: 82 additions & 21 deletions x-pack/plugins/alerting/server/task_runner/task_runner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ import { alertsClientMock } from '../alerts_client/alerts_client.mock';
import { MaintenanceWindow } from '../application/maintenance_window/types';
import { RULE_SAVED_OBJECT_TYPE } from '../saved_objects';
import { getErrorSource } from '@kbn/task-manager-plugin/server/task_running';
import { RuleResultService } from '../monitoring/rule_result_service';
import { ruleResultServiceMock } from '../monitoring/rule_result_service.mock';

jest.mock('uuid', () => ({
v4: () => '5f6aa57d-3e22-484e-bae8-cbed868f4d28',
Expand All @@ -99,13 +101,16 @@ jest.mock('../lib/wrap_scoped_cluster_client', () => ({

jest.mock('../lib/alerting_event_logger/alerting_event_logger');

jest.mock('../monitoring/rule_result_service');

let fakeTimer: sinon.SinonFakeTimers;
const logger: ReturnType<typeof loggingSystemMock.createLogger> = loggingSystemMock.createLogger();

const mockUsageCountersSetup = usageCountersServiceMock.createSetupContract();
const mockUsageCounter = mockUsageCountersSetup.createUsageCounter('test');
const alertingEventLogger = alertingEventLoggerMock.create();
const alertsClient = alertsClientMock.create();
const ruleResultService = ruleResultServiceMock.create();

describe('Task Runner', () => {
let mockedTaskInstance: ConcreteTaskInstance;
Expand Down Expand Up @@ -245,6 +250,13 @@ describe('Task Runner', () => {
ruleType.executor.mockResolvedValue({ state: {} });

actionsClient.bulkEnqueueExecution.mockResolvedValue({ errors: false, items: [] });

ruleResultService.getLastRunResults.mockImplementation(() => ({
errors: [],
warnings: [],
outcomeMessage: '',
}));
(RuleResultService as jest.Mock).mockImplementation(() => ruleResultService);
});

test('successfully executes the task', async () => {
Expand Down Expand Up @@ -2743,7 +2755,6 @@ describe('Task Runner', () => {
const taskRunner = new TaskRunner({
ruleType,
taskInstance: mockedTaskInstance,

context: taskRunnerFactoryInitializerParams,
inMemoryMetrics,
});
Expand Down Expand Up @@ -3285,6 +3296,39 @@ describe('Task Runner', () => {
expect(mockUsageCounter.incrementCounter).not.toHaveBeenCalled();
});

test('reports error to eventLogger when ruleResultService.addLastRunError adds an error', async () => {
rulesClient.getAlertFromRaw.mockReturnValue(mockedRuleTypeSavedObject as Rule);
encryptedSavedObjectsClient.getDecryptedAsInternalUser.mockResolvedValue(mockedRawRuleSO);

const taskRunner = new TaskRunner({
ruleType,
taskInstance: mockedTaskInstance,
context: taskRunnerFactoryInitializerParams,
inMemoryMetrics,
});

ruleResultService.getLastRunResults.mockImplementation(() => ({
errors: ['an error occurred'],
warnings: [],
outcomeMessage: '',
}));

const runnerResult = await taskRunner.run();

expect(inMemoryMetrics.increment).toHaveBeenCalledTimes(2);
expect(inMemoryMetrics.increment.mock.calls[0][0]).toBe(IN_MEMORY_METRICS.RULE_EXECUTIONS);
expect(inMemoryMetrics.increment.mock.calls[1][0]).toBe(IN_MEMORY_METRICS.RULE_FAILURES);

testAlertingEventLogCalls({
status: 'error',
softErrorFromLastRun: true,
errorMessage: 'an error occurred',
errorReason: 'unknown',
});

expect(getErrorSource(runnerResult.taskRunError as Error)).toBe(TaskErrorSource.FRAMEWORK);
});

function testAlertingEventLogCalls({
ruleContext = alertingEventLoggerInitializer,
activeAlerts = 0,
Expand All @@ -3302,6 +3346,7 @@ describe('Task Runner', () => {
logAction = 0,
hasReachedAlertLimit = false,
hasReachedQueuedActionsLimit = false,
softErrorFromLastRun = false,
}: {
status: string;
ruleContext?: RuleContextOpts;
Expand All @@ -3319,6 +3364,7 @@ describe('Task Runner', () => {
errorMessage?: string;
hasReachedAlertLimit?: boolean;
hasReachedQueuedActionsLimit?: boolean;
softErrorFromLastRun?: boolean;
}) {
expect(alertingEventLogger.initialize).toHaveBeenCalledWith(ruleContext);
if (status !== 'skip') {
Expand All @@ -3338,27 +3384,42 @@ describe('Task Runner', () => {
);
}
if (status === 'error') {
expect(alertingEventLogger.done).toHaveBeenCalledWith({
metrics: null,
status: {
lastExecutionDate: new Date('1970-01-01T00:00:00.000Z'),
status,
error: {
message: errorMessage,
reason: errorReason,
if (softErrorFromLastRun) {
expect(alertingEventLogger.done).toHaveBeenCalledWith(
expect.objectContaining({
status: {
lastExecutionDate: new Date('1970-01-01T00:00:00.000Z'),
status,
error: {
message: errorMessage,
reason: errorReason,
},
},
})
);
} else {
expect(alertingEventLogger.done).toHaveBeenCalledWith({
metrics: null,
status: {
lastExecutionDate: new Date('1970-01-01T00:00:00.000Z'),
status,
error: {
message: errorMessage,
reason: errorReason,
},
},
},
timings: {
claim_to_start_duration_ms: 0,
persist_alerts_duration_ms: 0,
prepare_rule_duration_ms: 0,
process_alerts_duration_ms: 0,
process_rule_duration_ms: 0,
rule_type_run_duration_ms: 0,
total_run_duration_ms: 0,
trigger_actions_duration_ms: 0,
},
});
timings: {
claim_to_start_duration_ms: 0,
persist_alerts_duration_ms: 0,
prepare_rule_duration_ms: 0,
process_alerts_duration_ms: 0,
process_rule_duration_ms: 0,
rule_type_run_duration_ms: 0,
total_run_duration_ms: 0,
trigger_actions_duration_ms: 0,
},
});
}
} else if (status === 'warning') {
expect(alertingEventLogger.done).toHaveBeenCalledWith({
metrics: {
Expand Down
Loading

0 comments on commit b0d1cb1

Please sign in to comment.