Skip to content

Commit

Permalink
[Monitoring] Thread pool rejections alert (#79433)
Browse files Browse the repository at this point in the history
* Thread pool rejections first draft

* Split search and write rejections to seperate alerts

* Code review feedback

* Optimized page loading and bundle size

* Increased monitoring bundle limit

* Removed server app import into the frontend

* Fixed tests and bundle size

Co-authored-by: Kibana Machine <42973632+kibanamachine@users.noreply.github.com>
  • Loading branch information
igoristic and kibanamachine authored Oct 30, 2020
1 parent 70807c9 commit c1294f0
Show file tree
Hide file tree
Showing 70 changed files with 1,149 additions and 395 deletions.
2 changes: 1 addition & 1 deletion packages/kbn-optimizer/limits.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pageLoadAssetSize:
mapsLegacy: 116817
mapsLegacyLicensing: 20214
ml: 82187
monitoring: 268612
monitoring: 50000
navigation: 37269
newsfeed: 42228
observability: 89709
Expand Down
177 changes: 168 additions & 9 deletions x-pack/plugins/monitoring/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
* you may not use this file except in compliance with the Elastic License.
*/

import { i18n } from '@kbn/i18n';
import { CommonAlertParamDetail } from './types/alerts';
import { AlertParamType } from './enums';

/**
* Helper string to add as a tag in every logging call
*/
Expand Down Expand Up @@ -215,15 +219,6 @@ export const REPORTING_SYSTEM_ID = 'reporting';
*/
export const TELEMETRY_COLLECTION_INTERVAL = 86400000;

/**
* We want to slowly rollout the migration from watcher-based cluster alerts to
* kibana alerts and we only want to enable the kibana alerts once all
* watcher-based cluster alerts have been migrated so this flag will serve
* as the only way to see the new UI and actually run Kibana alerts. It will
* be false until all alerts have been migrated, then it will be removed
*/
export const KIBANA_CLUSTER_ALERTS_ENABLED = false;

/**
* The prefix for all alert types used by monitoring
*/
Expand All @@ -238,6 +233,168 @@ export const ALERT_KIBANA_VERSION_MISMATCH = `${ALERT_PREFIX}alert_kibana_versio
export const ALERT_LOGSTASH_VERSION_MISMATCH = `${ALERT_PREFIX}alert_logstash_version_mismatch`;
export const ALERT_MEMORY_USAGE = `${ALERT_PREFIX}alert_jvm_memory_usage`;
export const ALERT_MISSING_MONITORING_DATA = `${ALERT_PREFIX}alert_missing_monitoring_data`;
export const ALERT_THREAD_POOL_SEARCH_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_search_rejections`;
export const ALERT_THREAD_POOL_WRITE_REJECTIONS = `${ALERT_PREFIX}alert_thread_pool_write_rejections`;

/**
* Legacy alerts details/label for server and public use
*/
export const LEGACY_ALERT_DETAILS = {
[ALERT_CLUSTER_HEALTH]: {
label: i18n.translate('xpack.monitoring.alerts.clusterHealth.label', {
defaultMessage: 'Cluster health',
}),
},
[ALERT_ELASTICSEARCH_VERSION_MISMATCH]: {
label: i18n.translate('xpack.monitoring.alerts.elasticsearchVersionMismatch.label', {
defaultMessage: 'Elasticsearch version mismatch',
}),
},
[ALERT_KIBANA_VERSION_MISMATCH]: {
label: i18n.translate('xpack.monitoring.alerts.kibanaVersionMismatch.label', {
defaultMessage: 'Kibana version mismatch',
}),
},
[ALERT_LICENSE_EXPIRATION]: {
label: i18n.translate('xpack.monitoring.alerts.licenseExpiration.label', {
defaultMessage: 'License expiration',
}),
},
[ALERT_LOGSTASH_VERSION_MISMATCH]: {
label: i18n.translate('xpack.monitoring.alerts.logstashVersionMismatch.label', {
defaultMessage: 'Logstash version mismatch',
}),
},
[ALERT_NODES_CHANGED]: {
label: i18n.translate('xpack.monitoring.alerts.nodesChanged.label', {
defaultMessage: 'Nodes changed',
}),
},
};

/**
* Alerts details/label for server and public use
*/
export const ALERT_DETAILS = {
[ALERT_CPU_USAGE]: {
label: i18n.translate('xpack.monitoring.alerts.cpuUsage.label', {
defaultMessage: 'CPU Usage',
}),
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.cpuUsage.paramDetails.threshold.label', {
defaultMessage: `Notify when CPU is over`,
}),
type: AlertParamType.Percentage,
} as CommonAlertParamDetail,
duration: {
label: i18n.translate('xpack.monitoring.alerts.cpuUsage.paramDetails.duration.label', {
defaultMessage: `Look at the average over`,
}),
type: AlertParamType.Duration,
} as CommonAlertParamDetail,
},
},
[ALERT_DISK_USAGE]: {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.diskUsage.paramDetails.threshold.label', {
defaultMessage: `Notify when disk capacity is over`,
}),
type: AlertParamType.Percentage,
},
duration: {
label: i18n.translate('xpack.monitoring.alerts.diskUsage.paramDetails.duration.label', {
defaultMessage: `Look at the average over`,
}),
type: AlertParamType.Duration,
},
},
label: i18n.translate('xpack.monitoring.alerts.diskUsage.label', {
defaultMessage: 'Disk Usage',
}),
},
[ALERT_MEMORY_USAGE]: {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.memoryUsage.paramDetails.threshold.label', {
defaultMessage: `Notify when memory usage is over`,
}),
type: AlertParamType.Percentage,
},
duration: {
label: i18n.translate('xpack.monitoring.alerts.memoryUsage.paramDetails.duration.label', {
defaultMessage: `Look at the average over`,
}),
type: AlertParamType.Duration,
},
},
label: i18n.translate('xpack.monitoring.alerts.memoryUsage.label', {
defaultMessage: 'Memory Usage (JVM)',
}),
},
[ALERT_MISSING_MONITORING_DATA]: {
paramDetails: {
duration: {
label: i18n.translate('xpack.monitoring.alerts.missingData.paramDetails.duration.label', {
defaultMessage: `Notify if monitoring data is missing for the last`,
}),
type: AlertParamType.Duration,
} as CommonAlertParamDetail,
limit: {
label: i18n.translate('xpack.monitoring.alerts.missingData.paramDetails.limit.label', {
defaultMessage: `looking back`,
}),
type: AlertParamType.Duration,
} as CommonAlertParamDetail,
},
label: i18n.translate('xpack.monitoring.alerts.missingData.label', {
defaultMessage: 'Missing monitoring data',
}),
},
[ALERT_THREAD_POOL_SEARCH_REJECTIONS]: {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.threshold.label', {
defaultMessage: `Notify when {type} rejection count is over`,
values: { type: 'search' },
}),
type: AlertParamType.Number,
},
duration: {
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.duration.label', {
defaultMessage: `In the last`,
}),
type: AlertParamType.Duration,
},
},
label: i18n.translate('xpack.monitoring.alerts.threadPoolRejections.label', {
defaultMessage: 'Thread pool {type} rejections',
values: { type: 'search' },
}),
},
[ALERT_THREAD_POOL_WRITE_REJECTIONS]: {
paramDetails: {
threshold: {
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.threshold.label', {
defaultMessage: `Notify when {type} rejection count is over`,
values: { type: 'write' },
}),
type: AlertParamType.Number,
},
duration: {
label: i18n.translate('xpack.monitoring.alerts.rejection.paramDetails.duration.label', {
defaultMessage: `In the last`,
}),
type: AlertParamType.Duration,
},
},
label: i18n.translate('xpack.monitoring.alerts.threadPoolRejections.label', {
defaultMessage: 'Thread pool {type} rejections',
values: { type: 'write' },
}),
},
};

/**
* A listing of all alert types
Expand All @@ -253,6 +410,8 @@ export const ALERTS = [
ALERT_LOGSTASH_VERSION_MISMATCH,
ALERT_MEMORY_USAGE,
ALERT_MISSING_MONITORING_DATA,
ALERT_THREAD_POOL_SEARCH_REJECTIONS,
ALERT_THREAD_POOL_WRITE_REJECTIONS,
];

/**
Expand Down
1 change: 1 addition & 0 deletions x-pack/plugins/monitoring/common/enums.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export enum AlertMessageTokenType {
export enum AlertParamType {
Duration = 'duration',
Percentage = 'percentage',
Number = 'number',
}

export enum SetupModeFeature {
Expand Down
53 changes: 0 additions & 53 deletions x-pack/plugins/monitoring/common/types.ts

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,70 @@
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
import { AlertMessageTokenType, AlertSeverity } from '../../common/enums';
import { AlertInstanceState as BaseAlertInstanceState } from '../../../alerts/server';

import { Alert } from '../../../alerts/common';
import { AlertParamType, AlertMessageTokenType, AlertSeverity } from '../enums';

export interface CommonBaseAlert {
type: string;
label: string;
paramDetails: CommonAlertParamDetails;
rawAlert: Alert;
isLegacy: boolean;
}

export interface CommonAlertStatus {
exists: boolean;
enabled: boolean;
states: CommonAlertState[];
alert: CommonBaseAlert;
}

export interface CommonAlertState {
firing: boolean;
state: any;
meta: any;
}

export interface CommonAlertFilter {
nodeUuid?: string;
}

export interface CommonAlertNodeUuidFilter extends CommonAlertFilter {
nodeUuid: string;
}

export interface CommonAlertStackProductFilter extends CommonAlertFilter {
stackProduct: string;
}

export interface CommonAlertParamDetail {
label: string;
type?: AlertParamType;
}

export interface CommonAlertParamDetails {
[name: string]: CommonAlertParamDetail | undefined;
}

export interface CommonAlertParams {
[name: string]: string | number;
}

export interface ThreadPoolRejectionsAlertParams {
threshold: number;
duration: string;
}

export interface AlertEnableAction {
id: string;
config: { [key: string]: any };
}

export interface AlertInstanceState {
alertStates: Array<AlertState | AlertCpuUsageState | AlertDiskUsageState>;
alertStates: Array<
AlertState | AlertCpuUsageState | AlertDiskUsageState | AlertThreadPoolRejectionsState
>;
[x: string]: unknown;
}

Expand Down Expand Up @@ -46,6 +100,13 @@ export interface AlertMemoryUsageState extends AlertNodeState {
memoryUsage: number;
}

export interface AlertThreadPoolRejectionsState extends AlertState {
rejectionCount: number;
type: string;
nodeId: string;
nodeName?: string;
}

export interface AlertUiState {
isFiring: boolean;
severity: AlertSeverity;
Expand Down Expand Up @@ -100,6 +161,14 @@ export interface AlertCpuUsageNodeStats extends AlertNodeStats {
containerQuota: number;
}

export interface AlertThreadPoolRejectionsStats {
clusterUuid: string;
nodeId: string;
nodeName: string;
rejectionCount: number;
ccs?: string;
}

export interface AlertDiskUsageNodeStats extends AlertNodeStats {
diskUsage: number;
}
Expand All @@ -121,7 +190,7 @@ export interface AlertData {
instanceKey: string;
clusterUuid: string;
ccs?: string;
shouldFire: boolean;
shouldFire?: boolean;
severity: AlertSeverity;
meta: any;
}
Expand Down
Loading

0 comments on commit c1294f0

Please sign in to comment.