Skip to content

Commit

Permalink
Merge pull request #3353 from alphagov/ris-shield-cloudwatch
Browse files Browse the repository at this point in the history
AWS Shield CloudWatch metrics and alertmanager alerts
  • Loading branch information
risicle committed Aug 16, 2023
2 parents 4377a8e + dad68c9 commit 3d056d8
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 15 deletions.
30 changes: 30 additions & 0 deletions config/cloudwatch-exporter/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,33 @@ discovery:
- Rule
- Region
addCloudwatchTimestamp: true
- type: "AWS/DDoSProtection"
regions:
- (( grab $AWS_REGION ))
# can't currently filter DDoSProtection by tag, region
# will have to do
metrics:
- name: DDoSDetected
statistics:
- Maximum
period: 60
length: 600
dimensionNameRequirements:
- ResourceArn
addCloudwatchTimestamp: true
- type: "AWS/DDoSProtection"
regions:
- (( grab $AWS_REGION ))
# can't currently filter DDoSProtection by tag, region
# will have to do
metrics:
- name: VolumePacketsPerSecond
statistics:
- Average
- Maximum
period: 60
length: 600
dimensionNameRequirements:
- ResourceArn
- MitigationAction
addCloudwatchTimestamp: true
24 changes: 23 additions & 1 deletion manifests/prometheus/alerts.d/shield-attacks.yml
Original file line number Diff line number Diff line change
@@ -1,15 +1,37 @@
# Source: paas-metrics
# Source: yet-another-cloudwatch-exporter
---

- type: replace
path: /instance_groups/name=prometheus2/jobs/name=prometheus2/properties/prometheus/custom_rules?/-
value:
name: ShieldOngoingAttacks
name: ShieldAttacks
rules:
- alert: ShieldAnyOngoingAttacks
expr: paas_aws_shield_ongoing_attacks > 0
labels:
severity: warning
service: elb
annotations:
summary: "AWS Shield reporting an ongoing attack"
description: "An ongoing attack being reported by AWS Shield could be indicative of a large traffic spike. Is a tenant load testing?"

- alert: DDoSDetected
# weird metric name comes from yet-another-cloudwatch-exporter's
# capitalization-based auto-snake-case-conversion
expr: aws_ddosprotection_ddo_sdetected_maximum{dimension_ResourceArn=~".*/((metrics_environment))-cf-rtr-.*"} > 0
labels:
severity: warning
service: elb
annotations:
summary: "AWS has detected a DDoS attack"
description: "AWS has detected a DDoS attack affecting {{$labels.dimension_ResourceArn}}"

- alert: DDoSBeingMitigated
expr: aws_ddosprotection_volume_packets_per_second_average{dimension_ResourceArn=~".*/((metrics_environment))-cf-rtr-.*", dimension_MitigationAction="Drop"} > 0
labels:
severity: critical
service: elb
annotations:
summary: "AWS is mitigating a DDoS attack"
description: "AWS is mitigating a DDoS attack affecting {{$labels.dimension_ResourceArn}} - you should probably check it isn't preventing genuine traffic getting through."
4 changes: 1 addition & 3 deletions manifests/prometheus/alerts.d/wafv2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
name: WafThrottlingIPsMaxRate
rules:
- alert: WafThrottlingIPsMaxRate
# the cloudwatch exporter should already restrict the metrics available
# to those of our deploy_env
expr: (aws_wafv2_blocked_requests_maximum{dimension_WebACL=~".+-rtr-lbs-web-acl$", dimension_Rule=~".+-rtr-lbs-max-request-rate-xff-blocked$"} > 0) or (aws_wafv2_blocked_requests_maximum{dimension_WebACL=~".+-rtr-lbs-web-acl$", dimension_Rule=~".+-rtr-lbs-max-request-rate-direct-blocked$"} > 0)
expr: (aws_wafv2_blocked_requests_maximum{dimension_WebACL="((metrics_environment))-rtr-lbs-web-acl", dimension_Rule="((metrics_environment))-rtr-lbs-max-request-rate-xff-blocked"} > 0) or (aws_wafv2_blocked_requests_maximum{dimension_WebACL="((metrics_environment))-rtr-lbs-web-acl", dimension_Rule="((metrics_environment))-rtr-lbs-max-request-rate-direct-blocked"} > 0)
labels:
severity: warning
service: elb
Expand Down
58 changes: 58 additions & 0 deletions manifests/prometheus/spec/alerts/shield-attacks.test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
---
rule_files:
# See alerts_validation_spec.rb for details of how stdin gets set:
- fixtures/rules.yml

evaluation_interval: 1m

tests:
- interval: 5m
input_series:
- series: 'paas_aws_shield_ongoing_attacks'
values: 1

alert_rule_test:
- alertname: ShieldAnyOngoingAttacks
eval_time: 5m
exp_alerts:
- exp_annotations:
summary: "AWS Shield reporting an ongoing attack"
description: "An ongoing attack being reported by AWS Shield could be indicative of a large traffic spike. Is a tenant load testing?"
exp_labels:
severity: warning
service: elb

- interval: 5m
input_series:
- series: 'aws_ddosprotection_ddo_sdetected_maximum{dimension_ResourceArn="arn:aws:elasticloadbalancing:eu-west-1:999999999999:loadbalancer/app/test-cf-rtr-sys/fefefefefefefefe"}'
values: 123

alert_rule_test:
- alertname: DDoSDetected
eval_time: 5m
exp_alerts:
- exp_annotations:
summary: "AWS has detected a DDoS attack"
description: "AWS has detected a DDoS attack affecting arn:aws:elasticloadbalancing:eu-west-1:999999999999:loadbalancer/app/test-cf-rtr-sys/fefefefefefefefe"
exp_labels:
severity: warning
service: elb
dimension_ResourceArn: "arn:aws:elasticloadbalancing:eu-west-1:999999999999:loadbalancer/app/test-cf-rtr-sys/fefefefefefefefe"

- interval: 5m
input_series:
- series: 'aws_ddosprotection_volume_packets_per_second_average{dimension_ResourceArn="arn:aws:elasticloadbalancing:eu-west-1:999999999999:loadbalancer/app/test-cf-rtr-sys/fefefefefefefefe", dimension_MitigationAction="Drop"}'
values: 123

alert_rule_test:
- alertname: DDoSBeingMitigated
eval_time: 5m
exp_alerts:
- exp_annotations:
summary: "AWS is mitigating a DDoS attack"
description: "AWS is mitigating a DDoS attack affecting arn:aws:elasticloadbalancing:eu-west-1:999999999999:loadbalancer/app/test-cf-rtr-sys/fefefefefefefefe - you should probably check it isn't preventing genuine traffic getting through."
exp_labels:
severity: critical
service: elb
dimension_ResourceArn: "arn:aws:elasticloadbalancing:eu-west-1:999999999999:loadbalancer/app/test-cf-rtr-sys/fefefefefefefefe"
dimension_MitigationAction: "Drop"
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ evaluation_interval: 1m
tests:
- interval: 5m
input_series:
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="foo-rtr-lbs-web-acl",dimension_Rule="foo-rtr-lbs-max-request-rate-xff-blocked"}'
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="test-rtr-lbs-web-acl",dimension_Rule="test-rtr-lbs-max-request-rate-xff-blocked"}'
values: 123
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="foo-rtr-lbs-web-acl",dimension_Rule="foo-rtr-lbs-max-request-rate-direct-blocked"}'
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="test-rtr-lbs-web-acl",dimension_Rule="test-rtr-lbs-max-request-rate-direct-blocked"}'
values: _

alert_rule_test:
Expand All @@ -23,14 +23,14 @@ tests:
exp_labels:
severity: warning
service: elb
dimension_WebACL: "foo-rtr-lbs-web-acl"
dimension_Rule: "foo-rtr-lbs-max-request-rate-xff-blocked"
dimension_WebACL: "test-rtr-lbs-web-acl"
dimension_Rule: "test-rtr-lbs-max-request-rate-xff-blocked"

- interval: 5m
input_series:
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="foo-rtr-lbs-web-acl",dimension_Rule="foo-rtr-lbs-max-request-rate-xff-blocked"}'
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="test-rtr-lbs-web-acl",dimension_Rule="test-rtr-lbs-max-request-rate-xff-blocked"}'
values: _
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="foo-rtr-lbs-web-acl",dimension_Rule="foo-rtr-lbs-max-request-rate-direct-blocked"}'
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="test-rtr-lbs-web-acl",dimension_Rule="test-rtr-lbs-max-request-rate-direct-blocked"}'
values: 123

alert_rule_test:
Expand All @@ -43,16 +43,16 @@ tests:
exp_labels:
severity: warning
service: elb
dimension_WebACL: "foo-rtr-lbs-web-acl"
dimension_Rule: "foo-rtr-lbs-max-request-rate-direct-blocked"
dimension_WebACL: "test-rtr-lbs-web-acl"
dimension_Rule: "test-rtr-lbs-max-request-rate-direct-blocked"

- interval: 5m
input_series:
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="foo-rtr-lbs-web-acl",dimension_Rule="foo-rtr-lbs-max-request-rate-xff-blocked"}'
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="test-rtr-lbs-web-acl",dimension_Rule="test-rtr-lbs-max-request-rate-xff-blocked"}'
values: 0
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="foo-rtr-lbs-web-acl",dimension_Rule="foo-rtr-lbs-max-request-rate-direct-blocked"}'
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="test-rtr-lbs-web-acl",dimension_Rule="test-rtr-lbs-max-request-rate-direct-blocked"}'
values: _
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="foo-rtr-lbs-web-acl-bar",dimension_Rule="foo-rtr-lbs-max-request-rate-direct-blocked"}'
- series: 'aws_wafv2_blocked_requests_maximum{dimension_WebACL="test-rtr-lbs-web-acl-bar",dimension_Rule="test-rtr-lbs-max-request-rate-direct-blocked"}'
values: 111

alert_rule_test: []
40 changes: 40 additions & 0 deletions terraform/cloudfoundry/lbs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ resource "aws_lb" "cf_loggregator" {
prefix = "cf-loggregator"
enabled = true
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_target_group" "cf_loggregator_rlp" {
Expand Down Expand Up @@ -48,6 +52,10 @@ resource "aws_lb_listener" "cf_loggregator" {
status_code = "404"
}
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_listener_rule" "cf_loggregator_rlp_log_api" {
Expand All @@ -64,6 +72,10 @@ resource "aws_lb_listener_rule" "cf_loggregator_rlp_log_api" {
values = ["log-api.*"]
}
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_listener_rule" "cf_loggregator_rlp_log_stream" {
Expand All @@ -80,6 +92,10 @@ resource "aws_lb_listener_rule" "cf_loggregator_rlp_log_stream" {
values = ["log-stream.*"]
}
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_listener_rule" "cf_doppler" {
Expand All @@ -96,6 +112,10 @@ resource "aws_lb_listener_rule" "cf_doppler" {
values = ["doppler.*"]
}
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_target_group" "cf_doppler" {
Expand Down Expand Up @@ -131,6 +151,10 @@ resource "aws_lb" "cf_router_app_domain" {
prefix = "cf-rtr-apps"
enabled = true
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_listener" "cf_router_app_domain_http" {
Expand All @@ -149,6 +173,10 @@ resource "aws_lb_listener" "cf_router_app_domain_http" {
query = ""
}
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_listener" "cf_router_app_domain_https" {
Expand All @@ -162,6 +190,10 @@ resource "aws_lb_listener" "cf_router_app_domain_https" {
type = "forward"
target_group_arn = aws_lb_target_group.cf_router_app_domain_https.arn
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_target_group" "cf_router_app_domain_https" {
Expand Down Expand Up @@ -224,6 +256,10 @@ resource "aws_lb" "cf_router_system_domain" {
prefix = "cf-rtr-sys"
enabled = true
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_listener" "cf_router_system_domain_https" {
Expand All @@ -237,6 +273,10 @@ resource "aws_lb_listener" "cf_router_system_domain_https" {
type = "forward"
target_group_arn = aws_lb_target_group.cf_router_system_domain_https.arn
}

tags = {
deploy_env = var.env
}
}

resource "aws_lb_listener_certificate" "cf_router_metrics_domain_https" {
Expand Down

0 comments on commit 3d056d8

Please sign in to comment.