-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[terraform] break up alarms into separate terraform files
Summary: Noticed that our aws_cloudwatch_alarms.tf file was getting a bit large. Separated alarms into different files based on service. The lambda alarm also has its own file. Alarms shared between services like failed connection alarms and ecs task stop alarms were kept in aws_cloudwatch_alarms.tf This is simply moving around code. Will not result in any changes in deployment Depends on D13942 Test Plan: terraform plan had no changes Reviewers: bartek, varun Reviewed By: bartek Subscribers: ashoat, tomek Differential Revision: https://phab.comm.dev/D13943
- Loading branch information
Showing
5 changed files
with
254 additions
and
250 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
locals { | ||
blob_error_patterns = { | ||
S3 = { name = "S3", pattern = "S3 Error" }, | ||
DDB = { name = "DDB", pattern = "DDB Error" }, | ||
HTTP = { name = "HTTP", pattern = "HTTP Error" }, | ||
Other = { name = "Other", pattern = "Other Error" }, | ||
} | ||
} | ||
|
||
resource "aws_sns_topic" "blob_error_topic" { | ||
name = "blob-error-topic" | ||
} | ||
|
||
resource "aws_sns_topic_subscription" "blob_email_subscription" { | ||
topic_arn = aws_sns_topic.blob_error_topic.arn | ||
protocol = "email" | ||
endpoint = local.error_reports_subscribed_email | ||
} | ||
|
||
resource "aws_cloudwatch_log_metric_filter" "blob_error_filters" { | ||
for_each = local.blob_error_patterns | ||
|
||
name = "Blob${each.value.name}ErrorCount" | ||
pattern = "{ $.level = \"ERROR\" && $.fields.errorType = \"${each.value.pattern}\" }" | ||
log_group_name = "/ecs/blob-service-task-def" | ||
|
||
metric_transformation { | ||
name = "Blob${each.value.name}ErrorCount" | ||
namespace = "BlobServiceMetricFilters" | ||
value = "1" | ||
} | ||
} | ||
|
||
resource "aws_cloudwatch_metric_alarm" "blob_error_alarms" { | ||
for_each = local.blob_error_patterns | ||
|
||
alarm_name = "Blob${local.is_staging ? "Staging" : "Production"}${each.value.name}ErrorAlarm" | ||
comparison_operator = "GreaterThanOrEqualToThreshold" | ||
evaluation_periods = "1" | ||
metric_name = "Blob${each.value.name}ErrorCount" | ||
namespace = "BlobServiceMetricFilters" | ||
period = "300" | ||
statistic = "Sum" | ||
threshold = 1 | ||
alarm_description = "Alarm when Blob ${each.value.name} errors exceed threshold" | ||
actions_enabled = true | ||
alarm_actions = [aws_sns_topic.blob_error_topic.arn] | ||
} | ||
|
||
resource "aws_cloudwatch_metric_alarm" "blob_memory_utilization" { | ||
alarm_name = "BlobMemoryUtilizationAlarm" | ||
comparison_operator = "GreaterThanOrEqualToThreshold" | ||
evaluation_periods = 1 | ||
metric_name = "MemoryUtilization" | ||
namespace = "AWS/ECS" | ||
period = 60 | ||
statistic = "Average" | ||
threshold = 90 | ||
alarm_description = "Alarm when Blob service memory utilization exceeds 90%" | ||
dimensions = { | ||
ClusterName = aws_ecs_cluster.comm_services.name | ||
ServiceName = aws_ecs_service.blob_service.name | ||
} | ||
alarm_actions = [aws_sns_topic.blob_error_topic.arn] | ||
} | ||
|
||
resource "aws_cloudwatch_metric_alarm" "blob_cpu_utilization" { | ||
alarm_name = "BlobCPUUtilizationAlarm" | ||
comparison_operator = "GreaterThanOrEqualToThreshold" | ||
evaluation_periods = 1 | ||
metric_name = "CPUUtilization" | ||
namespace = "AWS/ECS" | ||
period = 60 | ||
statistic = "Average" | ||
threshold = 90 | ||
alarm_description = "Alarm when Blob service CPU utilization exceeds 90%" | ||
dimensions = { | ||
ClusterName = aws_ecs_cluster.comm_services.name | ||
ServiceName = aws_ecs_service.blob_service.name | ||
} | ||
alarm_actions = [aws_sns_topic.blob_error_topic.arn] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
locals { | ||
identity_error_patterns = { | ||
Search = { name = "Search", pattern = "Search Error" }, | ||
Sync = { name = "Sync", pattern = "Sync Error" }, | ||
Database = { name = "DB", pattern = "*DB Error" }, | ||
GrpcServices = { name = "GrpcServices", pattern = "gRPC Services Error" }, | ||
Siwe = { name = "Siwe", pattern = "SIWE Error" }, | ||
Tunnelbroker = { name = "Tunnelbroker", pattern = "Tunnelbroker Error" } | ||
Http = { name = "HTTP", pattern = "HTTP Error" } | ||
} | ||
|
||
identity_error_threshold = 1 | ||
} | ||
|
||
resource "aws_sns_topic" "identity_error_topic" { | ||
name = "identity-error-topic" | ||
} | ||
|
||
resource "aws_sns_topic_subscription" "identity_email_subscription" { | ||
topic_arn = aws_sns_topic.identity_error_topic.arn | ||
protocol = "email" | ||
endpoint = local.error_reports_subscribed_email | ||
} | ||
|
||
resource "aws_cloudwatch_log_metric_filter" "identity_error_filters" { | ||
for_each = local.identity_error_patterns | ||
|
||
name = "Identity${each.value.name}ErrorCount" | ||
pattern = "{ $.level = \"ERROR\" && $.fields.errorType = \"${each.value.pattern}\" }" | ||
log_group_name = "/ecs/identity-service-task-def" | ||
|
||
metric_transformation { | ||
name = "Identity${each.value.name}ErrorCount" | ||
namespace = "IdentityServiceMetricFilters" | ||
value = "1" | ||
} | ||
} | ||
|
||
resource "aws_cloudwatch_metric_alarm" "identity_error_alarms" { | ||
for_each = local.identity_error_patterns | ||
|
||
alarm_name = "Identity${local.is_staging ? "Staging" : "Production"}${each.value.name}ErrorAlarm" | ||
comparison_operator = "GreaterThanOrEqualToThreshold" | ||
evaluation_periods = "1" | ||
metric_name = "Identity${each.value.name}ErrorCount" | ||
namespace = "IdentityServiceMetricFilters" | ||
period = "300" | ||
statistic = "Sum" | ||
threshold = local.identity_error_threshold | ||
alarm_description = "Alarm when Identity ${each.value.name} errors exceed threshold" | ||
actions_enabled = true | ||
alarm_actions = [aws_sns_topic.identity_error_topic.arn] | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
locals { | ||
lambda_error_threshold = "2" | ||
} | ||
|
||
resource "aws_sns_topic" "lambda_alarm_topic" { | ||
name = "lambda-error-alarm-topic" | ||
} | ||
|
||
resource "aws_sns_topic_subscription" "email_subscription" { | ||
topic_arn = aws_sns_topic.lambda_alarm_topic.arn | ||
protocol = "email" | ||
endpoint = local.error_reports_subscribed_email | ||
} | ||
|
||
resource "aws_cloudwatch_metric_alarm" "lambda_error_alarm" { | ||
alarm_name = "SearchIndexLambdaErrorAlarm" | ||
comparison_operator = "GreaterThanOrEqualToThreshold" | ||
evaluation_periods = "1" | ||
metric_name = "LambdaErrors" | ||
namespace = "AWS/Lambda" | ||
period = "300" | ||
statistic = "Sum" | ||
threshold = local.lambda_error_threshold | ||
alarm_description = "Alarm tracking search index lambda function failure" | ||
actions_enabled = true | ||
alarm_actions = [aws_sns_topic.lambda_alarm_topic.arn] | ||
dimensions = { | ||
FunctionName = module.shared.search_index_lambda.function_name | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
locals { | ||
tunnelbroker_error_patterns = { | ||
AMQP = { name = "AMQP", pattern = "AMQP Error" }, | ||
DDB = { name = "DDB", pattern = "DDB Error" }, | ||
FCM = { name = "FCM", pattern = "FCM Error" }, | ||
APNs = { name = "APNs", pattern = "APNs Error" }, | ||
WebPush = { name = "WebPush", pattern = "Web Push Error" }, | ||
WNS = { name = "WNS", pattern = "WNS Error" }, | ||
Identity = { name = "Identity", pattern = "Identity Error" }, | ||
Websocket = { name = "Websocket", pattern = "Websocket Error" }, | ||
Server = { name = "Server", pattern = "Server Error" }, | ||
} | ||
} | ||
|
||
resource "aws_sns_topic" "tunnelbroker_error_topic" { | ||
name = "tunnelbroker-error-topic" | ||
} | ||
|
||
resource "aws_sns_topic_subscription" "tunnelbroker_email_subscription" { | ||
topic_arn = aws_sns_topic.tunnelbroker_error_topic.arn | ||
protocol = "email" | ||
endpoint = local.error_reports_subscribed_email | ||
} | ||
|
||
resource "aws_cloudwatch_log_metric_filter" "tunnelbroker_error_filters" { | ||
for_each = local.tunnelbroker_error_patterns | ||
|
||
name = "Tunnelbroker${each.value.name}ErrorCount" | ||
pattern = "{ $.level = \"ERROR\" && $.fields.errorType = \"${each.value.pattern}\" }" | ||
log_group_name = "/ecs/tunnelbroker-task-def" | ||
|
||
metric_transformation { | ||
name = "Tunnelbroker${each.value.name}ErrorCount" | ||
namespace = "TunnelbrokerServiceMetricFilters" | ||
value = "1" | ||
} | ||
} | ||
|
||
resource "aws_cloudwatch_metric_alarm" "tunnelbroker_error_alarms" { | ||
for_each = local.tunnelbroker_error_patterns | ||
|
||
alarm_name = "Tunnelbroker${local.is_staging ? "Staging" : "Production"}${each.value.name}ErrorAlarm" | ||
comparison_operator = "GreaterThanOrEqualToThreshold" | ||
evaluation_periods = "1" | ||
metric_name = "Tunnelbroker${each.value.name}ErrorCount" | ||
namespace = "TunnelbrokerServiceMetricFilters" | ||
period = "300" | ||
statistic = "Sum" | ||
threshold = 1 | ||
alarm_description = "Alarm when Tunnelbroker ${each.value.name} errors exceed threshold" | ||
actions_enabled = true | ||
alarm_actions = [aws_sns_topic.tunnelbroker_error_topic.arn] | ||
} | ||
|
||
resource "aws_cloudwatch_metric_alarm" "tunnelbroker_memory_utilization" { | ||
alarm_name = "TunnelbrokerMemoryUtilizationAlarm" | ||
comparison_operator = "GreaterThanOrEqualToThreshold" | ||
evaluation_periods = 1 | ||
metric_name = "MemoryUtilization" | ||
period = 60 | ||
statistic = "Average" | ||
threshold = 90 | ||
alarm_description = "Alarm when Tunnelbroker service memory utilization exceeds 90%" | ||
alarm_actions = [aws_sns_topic.tunnelbroker_error_topic.arn] | ||
namespace = "AWS/ECS" | ||
dimensions = { | ||
ClusterName = aws_ecs_cluster.comm_services.name | ||
ServiceName = aws_ecs_service.tunnelbroker.name | ||
} | ||
} | ||
|
||
|
||
resource "aws_cloudwatch_metric_alarm" "tunnelbroker_cpu_utilization" { | ||
alarm_name = "TunnelbrokerCPUUtilizationAlarm" | ||
comparison_operator = "GreaterThanOrEqualToThreshold" | ||
evaluation_periods = 1 | ||
metric_name = "CPUUtilization" | ||
period = 60 | ||
statistic = "Average" | ||
threshold = 90 | ||
alarm_description = "Alarm when Tunnelbroker service CPU utilization exceeds 90%" | ||
alarm_actions = [aws_sns_topic.tunnelbroker_error_topic.arn] | ||
namespace = "AWS/ECS" | ||
dimensions = { | ||
ClusterName = aws_ecs_cluster.comm_services.name | ||
ServiceName = aws_ecs_service.tunnelbroker.name | ||
} | ||
} |
Oops, something went wrong.