From 164f952cafecd53bb44bd7bc05134e32a7904d29 Mon Sep 17 00:00:00 2001 From: aknysh Date: Fri, 12 Aug 2022 12:35:49 -0400 Subject: [PATCH 01/12] Update SLO module --- modules/slo/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/slo/main.tf b/modules/slo/main.tf index cc5dd78..fdccc1a 100644 --- a/modules/slo/main.tf +++ b/modules/slo/main.tf @@ -107,6 +107,7 @@ resource "datadog_monitor" "metric_slo_alert" { query = < ${lookup(each.value.threshold, "target", "99.00")} EOF + monitor_thresholds { critical = lookup(each.value.threshold, "target", null) } From 4afa2e5bb4cf2636e16d58518cefa214714b76f3 Mon Sep 17 00:00:00 2001 From: aknysh Date: Fri, 12 Aug 2022 13:33:28 -0400 Subject: [PATCH 02/12] Update SLO module --- modules/slo/main.tf | 120 ------------------------------------- modules/slo/metric_slo.tf | 83 +++++++++++++++++++++++++ modules/slo/monitor_slo.tf | 42 +++++++++++++ modules/slo/outputs.tf | 14 ++--- 4 files changed, 132 insertions(+), 127 deletions(-) create mode 100644 modules/slo/metric_slo.tf create mode 100644 modules/slo/monitor_slo.tf diff --git a/modules/slo/main.tf b/modules/slo/main.tf index fdccc1a..3618ebf 100644 --- a/modules/slo/main.tf +++ b/modules/slo/main.tf @@ -1,125 +1,5 @@ locals { enabled = module.this.enabled - datadog_monitor_slos = { for slo in var.datadog_slos : slo.name => slo if slo.type == "monitor" && lookup(slo, "enabled", true) && local.enabled } - datadog_metric_slos = { for slo in var.datadog_slos : slo.name => slo if slo.type == "metric" && lookup(slo, "enabled", true) && local.enabled } - - temp_datadog_slo_metric_monitors = flatten([ - for name, slo in var.datadog_slos : [ - for i, threshold in slo.thresholds : { - slo = slo, - slo_name = format("%s_threshold%s", name, i) - threshold = threshold - } - if slo.type == "metric" && local.enabled && lookup(slo, "enabled", true) - ] - ]) - - datadog_slo_metric_monitors = { for monitor in local.temp_datadog_slo_metric_monitors : monitor.slo_name => monitor } - alert_tags = local.enabled && var.alert_tags != null ? format("%s%s", var.alert_tags_separator, join(var.alert_tags_separator, var.alert_tags)) : "" } - -resource "datadog_service_level_objective" "monitor_slo" { - for_each = local.datadog_monitor_slos - - # Required - name = each.value.name - type = each.value.type - - dynamic "thresholds" { - for_each = each.value.thresholds - content { - target = lookup(thresholds, "target", "99.00") - timeframe = lookup(thresholds, "timeframe", "7d") - - target_display = lookup(thresholds, "target_display", "98.00") - warning = lookup(thresholds, "warning", "99.95") - warning_display = lookup(thresholds, "warning_display", "98.00") - } - } - - groups = lookup(each.value, "groups", []) - monitor_ids = each.value.monitor_ids - - # Optional - description = lookup(each.value, "description", null) - force_delete = lookup(each.value, "force_delete", true) - validate = lookup(each.value, "validate", false) - - # Convert terraform tags map to Datadog tags map - # If a key is supplied with a value, it will render "key:value" as a tag - # tags: - # key: value - # If a key is supplied without a value (null), it will render "key" as a tag - # tags: - # key: null - tags = [ - for tagk, tagv in lookup(each.value, "tags", module.this.tags) : (tagv != null ? format("%s:%s", tagk, tagv) : tagk) - ] -} - -resource "datadog_service_level_objective" "metric_slo" { - for_each = local.datadog_metric_slos - - # Required - name = each.value.name - type = each.value.type - - query { - denominator = each.value.query.denominator - numerator = each.value.query.numerator - } - - # Optional - description = lookup(each.value, "description", null) - force_delete = lookup(each.value, "force_delete", true) - validate = lookup(each.value, "validate", false) - - dynamic "thresholds" { - for_each = each.value.thresholds - content { - target = lookup(thresholds.value, "target", null) - timeframe = lookup(thresholds.value, "timeframe", null) - warning = lookup(thresholds.value, "warning", null) - } - } - - # Convert terraform tags map to Datadog tags map - # If a key is supplied with a value, it will render "key:value" as a tag - # tags: - # key: value - # If a key is supplied without a value (null), it will render "key" as a tag - # tags: - # key: null - tags = [ - for tagk, tagv in lookup(each.value, "tags", module.this.tags) : (tagv != null ? format("%s:%s", tagk, tagv) : tagk) - ] -} - -resource "datadog_monitor" "metric_slo_alert" { - for_each = local.datadog_slo_metric_monitors - - name = format("(SLO Error Budget Alert) %s", each.value.slo.name) - type = "slo alert" - message = format("%s%s", each.value.slo.message, local.alert_tags) - - query = < ${lookup(each.value.threshold, "target", "99.00")} - EOF - - monitor_thresholds { - critical = lookup(each.value.threshold, "target", null) - } - - # Convert terraform tags map to Datadog tags map - # If a key is supplied with a value, it will render "key:value" as a tag - # tags: - # key: value - # If a key is supplied without a value (null), it will render "key" as a tag - # tags: - # key: null - tags = [ - for tagk, tagv in lookup(each.value.slo, "tags", module.this.tags) : (tagv != null ? format("%s:%s", tagk, tagv) : tagk) - ] -} diff --git a/modules/slo/metric_slo.tf b/modules/slo/metric_slo.tf new file mode 100644 index 0000000..aa767e5 --- /dev/null +++ b/modules/slo/metric_slo.tf @@ -0,0 +1,83 @@ +locals { + datadog_metric_slos = { for slo in var.datadog_slos : slo.name => slo if slo.type == "metric" && lookup(slo, "enabled", true) && local.enabled } + + temp_datadog_slo_metric_monitors = flatten([ + for name, slo in var.datadog_slos : [ + for i, threshold in slo.thresholds : { + slo = slo, + slo_name = format("%s_threshold%s", name, i) + threshold = threshold + } + if slo.type == "metric" && local.enabled && lookup(slo, "enabled", true) + ] + ]) + + datadog_slo_metric_monitors = { for monitor in local.temp_datadog_slo_metric_monitors : monitor.slo_name => monitor } +} + +resource "datadog_service_level_objective" "metric_slo" { + for_each = local.datadog_metric_slos + + # Required + name = each.value.name + type = each.value.type + + # Optional + description = lookup(each.value, "description", null) + force_delete = lookup(each.value, "force_delete", true) + validate = lookup(each.value, "validate", false) + + query { + denominator = each.value.query.denominator + numerator = each.value.query.numerator + } + + dynamic "thresholds" { + for_each = each.value.thresholds + content { + target = lookup(thresholds.value, "target", null) + timeframe = lookup(thresholds.value, "timeframe", null) + warning = lookup(thresholds.value, "warning", null) + } + } + + # Convert terraform tags map to Datadog tags map + # If a key is supplied with a value, it will render "key:value" as a tag + # tags: + # key: value + # If a key is supplied without a value (null), it will render "key" as a tag + # tags: + # key: null + tags = [ + for tagk, tagv in lookup(each.value, "tags", module.this.tags) : (tagv != null ? format("%s:%s", tagk, tagv) : tagk) + ] +} + +resource "datadog_monitor" "metric_slo_alert" { + for_each = local.datadog_slo_metric_monitors + + name = format("(SLO Error Budget Alert) %s", each.value.slo.name) + type = "slo alert" + message = format("%s%s", each.value.slo.message, local.alert_tags) + + query = < ${lookup(each.value.threshold, "target", "99.00")} + EOF + + force_delete = lookup(each.value, "force_delete", true) + + monitor_thresholds { + critical = lookup(each.value.threshold, "target", null) + } + + # Convert terraform tags map to Datadog tags map + # If a key is supplied with a value, it will render "key:value" as a tag + # tags: + # key: value + # If a key is supplied without a value (null), it will render "key" as a tag + # tags: + # key: null + tags = [ + for tagk, tagv in lookup(each.value.slo, "tags", module.this.tags) : (tagv != null ? format("%s:%s", tagk, tagv) : tagk) + ] +} diff --git a/modules/slo/monitor_slo.tf b/modules/slo/monitor_slo.tf new file mode 100644 index 0000000..1d637a0 --- /dev/null +++ b/modules/slo/monitor_slo.tf @@ -0,0 +1,42 @@ +locals { + datadog_monitor_slos = { for slo in var.datadog_slos : slo.name => slo if slo.type == "monitor" && lookup(slo, "enabled", true) && local.enabled } +} + +resource "datadog_service_level_objective" "monitor_slo" { + for_each = local.datadog_monitor_slos + + # Required + name = each.value.name + type = each.value.type + + dynamic "thresholds" { + for_each = each.value.thresholds + content { + target = lookup(thresholds, "target", "99.00") + timeframe = lookup(thresholds, "timeframe", "7d") + + target_display = lookup(thresholds, "target_display", "98.00") + warning = lookup(thresholds, "warning", "99.95") + warning_display = lookup(thresholds, "warning_display", "98.00") + } + } + + groups = lookup(each.value, "groups", []) + monitor_ids = each.value.monitor_ids + + # Optional + description = lookup(each.value, "description", null) + force_delete = lookup(each.value, "force_delete", true) + validate = lookup(each.value, "validate", false) + + # Convert terraform tags map to Datadog tags map + # If a key is supplied with a value, it will render "key:value" as a tag + # tags: + # key: value + # If a key is supplied without a value (null), it will render "key" as a tag + # tags: + # key: null + tags = [ + for tagk, tagv in lookup(each.value, "tags", module.this.tags) : (tagv != null ? format("%s:%s", tagk, tagv) : tagk) + ] +} diff --git a/modules/slo/outputs.tf b/modules/slo/outputs.tf index b31a41e..1b8aa87 100644 --- a/modules/slo/outputs.tf +++ b/modules/slo/outputs.tf @@ -1,14 +1,14 @@ -output "datadog_metric_slos" { - value = datadog_service_level_objective.metric_slo[*] - description = "Map of created Metric Based SLOs" -} - output "datadog_monitor_slos" { value = datadog_service_level_objective.monitor_slo[*] - description = "Map of created Monitor Based SLOs" + description = "Map of created monitor-based SLOs" +} + +output "datadog_metric_slos" { + value = datadog_service_level_objective.metric_slo[*] + description = "Map of created metric-based SLOs" } output "datadog_slo_alerts" { value = datadog_monitor.metric_slo_alert - description = "Map of created SLO Based Alerts" + description = "Map of created metric-based SLO Alerts" } From e60251de64ef6fe5a4f8e671b2d35b1f5fcdb567 Mon Sep 17 00:00:00 2001 From: aknysh Date: Fri, 12 Aug 2022 13:41:07 -0400 Subject: [PATCH 03/12] Update SLO module --- examples/slo/outputs.tf | 18 +++++++++--------- modules/slo/metric_slo.tf | 6 +++--- modules/slo/outputs.tf | 8 ++++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/examples/slo/outputs.tf b/examples/slo/outputs.tf index 7a966ce..0b8518a 100644 --- a/examples/slo/outputs.tf +++ b/examples/slo/outputs.tf @@ -1,14 +1,14 @@ -output "datadog_metric_slos" { - value = module.datadog_slo.datadog_metric_slos - description = "Map of created Metric Based SLOs" -} - output "datadog_monitor_slos" { value = module.datadog_slo.datadog_monitor_slos - description = "Map of created Monitor Based SLOs" + description = "Map of created monitor-based SLOs" +} + +output "datadog_metric_slos" { + value = module.datadog_slo.datadog_metric_slos + description = "Map of created metric-based SLOs" } -output "datadog_slo_alerts" { - value = module.datadog_slo.datadog_slo_alerts - description = "Map of created SLO Based Alerts" +output "datadog_metric_slo_alerts" { + value = module.datadog_slo.datadog_metric_slo_alerts + description = "Map of created metric-based SLO alerts" } diff --git a/modules/slo/metric_slo.tf b/modules/slo/metric_slo.tf index aa767e5..353cbf4 100644 --- a/modules/slo/metric_slo.tf +++ b/modules/slo/metric_slo.tf @@ -1,7 +1,7 @@ locals { datadog_metric_slos = { for slo in var.datadog_slos : slo.name => slo if slo.type == "metric" && lookup(slo, "enabled", true) && local.enabled } - temp_datadog_slo_metric_monitors = flatten([ + temp_datadog_metric_slo_alerts = flatten([ for name, slo in var.datadog_slos : [ for i, threshold in slo.thresholds : { slo = slo, @@ -12,7 +12,7 @@ locals { ] ]) - datadog_slo_metric_monitors = { for monitor in local.temp_datadog_slo_metric_monitors : monitor.slo_name => monitor } + datadog_metric_slo_alerts = { for monitor in local.temp_datadog_metric_slo_alerts : monitor.slo_name => monitor } } resource "datadog_service_level_objective" "metric_slo" { @@ -54,7 +54,7 @@ resource "datadog_service_level_objective" "metric_slo" { } resource "datadog_monitor" "metric_slo_alert" { - for_each = local.datadog_slo_metric_monitors + for_each = local.datadog_metric_slo_alerts name = format("(SLO Error Budget Alert) %s", each.value.slo.name) type = "slo alert" diff --git a/modules/slo/outputs.tf b/modules/slo/outputs.tf index 1b8aa87..bfc8acb 100644 --- a/modules/slo/outputs.tf +++ b/modules/slo/outputs.tf @@ -1,14 +1,14 @@ output "datadog_monitor_slos" { - value = datadog_service_level_objective.monitor_slo[*] + value = datadog_service_level_objective.monitor_slo description = "Map of created monitor-based SLOs" } output "datadog_metric_slos" { - value = datadog_service_level_objective.metric_slo[*] + value = datadog_service_level_objective.metric_slo description = "Map of created metric-based SLOs" } -output "datadog_slo_alerts" { +output "datadog_metric_slo_alerts" { value = datadog_monitor.metric_slo_alert - description = "Map of created metric-based SLO Alerts" + description = "Map of created metric-based SLO alerts" } From bf13824b5be0b4732030c19874d94ff3af143263 Mon Sep 17 00:00:00 2001 From: aknysh Date: Fri, 12 Aug 2022 16:14:53 -0400 Subject: [PATCH 04/12] Update SLO module --- modules/slo/metric_slo.tf | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/slo/metric_slo.tf b/modules/slo/metric_slo.tf index 353cbf4..89eca88 100644 --- a/modules/slo/metric_slo.tf +++ b/modules/slo/metric_slo.tf @@ -53,23 +53,27 @@ resource "datadog_service_level_objective" "metric_slo" { ] } +# https://registry.terraform.io/providers/DataDog/datadog/latest/docs/resources/monitor resource "datadog_monitor" "metric_slo_alert" { for_each = local.datadog_metric_slo_alerts - name = format("(SLO Error Budget Alert) %s", each.value.slo.name) type = "slo alert" - message = format("%s%s", each.value.slo.message, local.alert_tags) + name = format("(SLO Error Budget Alert) %s", each.value.slo.name) + message = format("%s %s", each.value.slo.message, local.alert_tags) query = < ${lookup(each.value.threshold, "target", "99.00")} EOF - force_delete = lookup(each.value, "force_delete", true) - monitor_thresholds { critical = lookup(each.value.threshold, "target", null) } + force_delete = lookup(each.value, "force_delete", false) + enable_logs_sample = lookup(each.value, "enable_logs_sample", false) + groupby_simple_monitor = lookup(each.value, "groupby_simple_monitor", false) + include_tags = lookup(each.value, "include_tags ", true) + # Convert terraform tags map to Datadog tags map # If a key is supplied with a value, it will render "key:value" as a tag # tags: From 017aa367b0e7943aa79e627a15da417d99d0841e Mon Sep 17 00:00:00 2001 From: aknysh Date: Fri, 12 Aug 2022 16:21:59 -0400 Subject: [PATCH 05/12] Update SLO module --- modules/slo/metric_slo.tf | 5 ----- 1 file changed, 5 deletions(-) diff --git a/modules/slo/metric_slo.tf b/modules/slo/metric_slo.tf index 89eca88..3db3021 100644 --- a/modules/slo/metric_slo.tf +++ b/modules/slo/metric_slo.tf @@ -69,11 +69,6 @@ resource "datadog_monitor" "metric_slo_alert" { critical = lookup(each.value.threshold, "target", null) } - force_delete = lookup(each.value, "force_delete", false) - enable_logs_sample = lookup(each.value, "enable_logs_sample", false) - groupby_simple_monitor = lookup(each.value, "groupby_simple_monitor", false) - include_tags = lookup(each.value, "include_tags ", true) - # Convert terraform tags map to Datadog tags map # If a key is supplied with a value, it will render "key:value" as a tag # tags: From fe8290618a9a7b61aa7ab4a68f65e818f5cf46ce Mon Sep 17 00:00:00 2001 From: aknysh Date: Fri, 12 Aug 2022 23:16:47 -0400 Subject: [PATCH 06/12] Update SLO module --- .../{synthetic.yaml => metric_slo.yaml} | 2 -- modules/slo/README.md | 1 + modules/slo/monitor_slo.tf | 24 +++++++++++++++---- 3 files changed, 20 insertions(+), 7 deletions(-) rename examples/slo/catalog/{synthetic.yaml => metric_slo.yaml} (94%) diff --git a/examples/slo/catalog/synthetic.yaml b/examples/slo/catalog/metric_slo.yaml similarity index 94% rename from examples/slo/catalog/synthetic.yaml rename to examples/slo/catalog/metric_slo.yaml index c5eac3b..44945d1 100644 --- a/examples/slo/catalog/synthetic.yaml +++ b/examples/slo/catalog/metric_slo.yaml @@ -17,8 +17,6 @@ synthetics-slo: - target: "99" timeframe: "30d" warning: "99.5" - groups: [] - monitor_ids: [] tags: managedby: terraform test: true diff --git a/modules/slo/README.md b/modules/slo/README.md index 6c8ae21..e8a9fe8 100644 --- a/modules/slo/README.md +++ b/modules/slo/README.md @@ -42,4 +42,5 @@ synthetics-slo: ## References - [Service Level Objectives](https://docs.datadoghq.com/monitors/service_level_objectives/) + - [Monitor-based SLOs](https://docs.datadoghq.com/monitors/service_level_objectives/monitor/) - [Datadog Error Budget](https://docs.datadoghq.com/monitors/service_level_objectives/error_budget/) diff --git a/modules/slo/monitor_slo.tf b/modules/slo/monitor_slo.tf index 1d637a0..f6ccbdd 100644 --- a/modules/slo/monitor_slo.tf +++ b/modules/slo/monitor_slo.tf @@ -2,6 +2,18 @@ locals { datadog_monitor_slos = { for slo in var.datadog_slos : slo.name => slo if slo.type == "monitor" && lookup(slo, "enabled", true) && local.enabled } } +module "datadog_monitors" { + source = "../monitors" + + for_each = local.datadog_monitor_slos + + datadog_monitors = lookup(each.value, "monitors", {}) + alert_tags = var.alert_tags + alert_tags_separator = var.alert_tags_separator + + context = module.this.context +} + resource "datadog_service_level_objective" "monitor_slo" { for_each = local.datadog_monitor_slos @@ -12,17 +24,19 @@ resource "datadog_service_level_objective" "monitor_slo" { dynamic "thresholds" { for_each = each.value.thresholds content { - target = lookup(thresholds, "target", "99.00") - timeframe = lookup(thresholds, "timeframe", "7d") - + target = lookup(thresholds, "target", "99.00") + timeframe = lookup(thresholds, "timeframe", "7d") target_display = lookup(thresholds, "target_display", "98.00") warning = lookup(thresholds, "warning", "99.95") warning_display = lookup(thresholds, "warning_display", "98.00") } } - groups = lookup(each.value, "groups", []) - monitor_ids = each.value.monitor_ids + groups = lookup(each.value, "groups", []) + + # Either `monitor_ids` or `monitors` should be provided + # If `monitors` map is provided, the monitors are created in the `datadog_monitors` module + monitor_ids = try(each.value.monitor_ids, null) != null ? each.value.monitor_ids : module.datadog_monitors[each.key].datadog_monitor_ids # Optional description = lookup(each.value, "description", null) From 66bc2fe5a44a27493d8c8a893d46314a57f3e0f4 Mon Sep 17 00:00:00 2001 From: aknysh Date: Fri, 12 Aug 2022 23:20:49 -0400 Subject: [PATCH 07/12] Update SLO module --- examples/slo/outputs.tf | 5 +++++ modules/slo/outputs.tf | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/examples/slo/outputs.tf b/examples/slo/outputs.tf index 0b8518a..a1b46ba 100644 --- a/examples/slo/outputs.tf +++ b/examples/slo/outputs.tf @@ -3,6 +3,11 @@ output "datadog_monitor_slos" { description = "Map of created monitor-based SLOs" } +output "datadog_monitor_slo_monitors" { + value = module.datadog_slo.datadog_monitor_slo_monitors + description = "Map of created monitors for the monitor-based SLOs" +} + output "datadog_metric_slos" { value = module.datadog_slo.datadog_metric_slos description = "Map of created metric-based SLOs" diff --git a/modules/slo/outputs.tf b/modules/slo/outputs.tf index bfc8acb..eefc2ab 100644 --- a/modules/slo/outputs.tf +++ b/modules/slo/outputs.tf @@ -3,6 +3,11 @@ output "datadog_monitor_slos" { description = "Map of created monitor-based SLOs" } +output "datadog_monitor_slo_monitors" { + value = module.datadog_monitors.datadog_monitors + description = "Map of created monitors for the monitor-based SLOs" +} + output "datadog_metric_slos" { value = datadog_service_level_objective.metric_slo description = "Map of created metric-based SLOs" From 0c0fede7e182c6f91785229ada057fdc2772005c Mon Sep 17 00:00:00 2001 From: aknysh Date: Fri, 12 Aug 2022 23:41:08 -0400 Subject: [PATCH 08/12] Update SLO module --- examples/slo/catalog/metric_slo.yaml | 2 +- examples/slo/catalog/monitor_slo.yaml | 24 ++++++++++++++++++++++++ examples/slo/outputs.tf | 2 +- modules/slo/monitor_slo.tf | 8 +++----- modules/slo/outputs.tf | 4 ++-- 5 files changed, 31 insertions(+), 9 deletions(-) create mode 100644 examples/slo/catalog/monitor_slo.yaml diff --git a/examples/slo/catalog/metric_slo.yaml b/examples/slo/catalog/metric_slo.yaml index 44945d1..60ebc4a 100644 --- a/examples/slo/catalog/metric_slo.yaml +++ b/examples/slo/catalog/metric_slo.yaml @@ -1,4 +1,4 @@ -synthetics-slo: +metric-slo: name: "(SLO) Synthetic Checks" type: metric query: diff --git a/examples/slo/catalog/monitor_slo.yaml b/examples/slo/catalog/monitor_slo.yaml new file mode 100644 index 0000000..8a953f7 --- /dev/null +++ b/examples/slo/catalog/monitor_slo.yaml @@ -0,0 +1,24 @@ +monitor-slo: + name: "(SLO) Availability" + type: monitor + query: + numerator: sum:synthetics.test_runs{status:success}.as_count() + denominator: sum:synthetics.test_runs{*}.as_count() + description: | + Number of Successful Synthetic Checks. + message: | + ({stage} {region}) {instance_id} failed a SLO check + force_delete: true + validate: true + thresholds: + - target: "99.5" + timeframe: "7d" + warning: "99.9" + - target: "99" + timeframe: "30d" + warning: "99.5" + monitor_ids: null + tags: + managedby: terraform + test: true + api_version: null diff --git a/examples/slo/outputs.tf b/examples/slo/outputs.tf index a1b46ba..9d834a1 100644 --- a/examples/slo/outputs.tf +++ b/examples/slo/outputs.tf @@ -5,7 +5,7 @@ output "datadog_monitor_slos" { output "datadog_monitor_slo_monitors" { value = module.datadog_slo.datadog_monitor_slo_monitors - description = "Map of created monitors for the monitor-based SLOs" + description = "Created monitors for the monitor-based SLOs" } output "datadog_metric_slos" { diff --git a/modules/slo/monitor_slo.tf b/modules/slo/monitor_slo.tf index f6ccbdd..0df4d36 100644 --- a/modules/slo/monitor_slo.tf +++ b/modules/slo/monitor_slo.tf @@ -24,11 +24,9 @@ resource "datadog_service_level_objective" "monitor_slo" { dynamic "thresholds" { for_each = each.value.thresholds content { - target = lookup(thresholds, "target", "99.00") - timeframe = lookup(thresholds, "timeframe", "7d") - target_display = lookup(thresholds, "target_display", "98.00") - warning = lookup(thresholds, "warning", "99.95") - warning_display = lookup(thresholds, "warning_display", "98.00") + target = lookup(thresholds, "target", "99.00") + timeframe = lookup(thresholds, "timeframe", "7d") + warning = lookup(thresholds, "warning", "99.95") } } diff --git a/modules/slo/outputs.tf b/modules/slo/outputs.tf index eefc2ab..6b07234 100644 --- a/modules/slo/outputs.tf +++ b/modules/slo/outputs.tf @@ -4,8 +4,8 @@ output "datadog_monitor_slos" { } output "datadog_monitor_slo_monitors" { - value = module.datadog_monitors.datadog_monitors - description = "Map of created monitors for the monitor-based SLOs" + value = module.datadog_monitors + description = "Created monitors for the monitor-based SLOs" } output "datadog_metric_slos" { From 2133f3ae80a173c7ed3cbf32fc43e26f8b33471d Mon Sep 17 00:00:00 2001 From: aknysh Date: Sat, 13 Aug 2022 00:11:44 -0400 Subject: [PATCH 09/12] Update SLO module --- examples/slo/catalog/metric_slo.yaml | 2 +- examples/slo/catalog/monitor_slo.yaml | 44 +++++++++++-- modules/slo/README.md | 95 ++++++++++++++++++++++----- 3 files changed, 117 insertions(+), 24 deletions(-) diff --git a/examples/slo/catalog/metric_slo.yaml b/examples/slo/catalog/metric_slo.yaml index 60ebc4a..8ee0d0e 100644 --- a/examples/slo/catalog/metric_slo.yaml +++ b/examples/slo/catalog/metric_slo.yaml @@ -18,6 +18,6 @@ metric-slo: timeframe: "30d" warning: "99.5" tags: - managedby: terraform + ManagedBy: terraform test: true api_version: null diff --git a/examples/slo/catalog/monitor_slo.yaml b/examples/slo/catalog/monitor_slo.yaml index 8a953f7..f511eac 100644 --- a/examples/slo/catalog/monitor_slo.yaml +++ b/examples/slo/catalog/monitor_slo.yaml @@ -1,11 +1,8 @@ monitor-slo: - name: "(SLO) Availability" + name: "(SLO) EC2 Availability" type: monitor - query: - numerator: sum:synthetics.test_runs{status:success}.as_count() - denominator: sum:synthetics.test_runs{*}.as_count() description: | - Number of Successful Synthetic Checks. + Number of EC2 failed status checks. message: | ({stage} {region}) {instance_id} failed a SLO check force_delete: true @@ -17,8 +14,43 @@ monitor-slo: - target: "99" timeframe: "30d" warning: "99.5" + # Either `monitor_ids` or `monitors` should be provided + # `monitor_ids` is a list of externally created monitors to use for this monitor-based SLO + # If `monitors` mao is provided, the monitors will be created by the module and assigned to the SLO monitor_ids: null + monitors: + ec2-failed-status-check: + name: "(EC2) Status Check" + type: metric alert + query: | + avg(last_10m):avg:aws.ec2.status_check_failed{*} by {instance_id} > 0 + message: | + ({stage} {region}) {instance_id} failed a status check + escalation_message: "" + tags: + ManagedBy: Terraform + priority: 3 + notify_no_data: false + notify_audit: true + require_full_window: true + enable_logs_sample: false + force_delete: true + include_tags: true + locked: false + renotify_interval: 60 + timeout_h: 0 + evaluation_delay: 60 + new_host_delay: 300 + new_group_delay: 0 + groupby_simple_monitor: false + renotify_occurrences: 0 + renotify_statuses: [] + validate: true + no_data_timeframe: 10 + threshold_windows: {} + thresholds: + critical: 0 tags: - managedby: terraform + ManagedBy: terraform test: true api_version: null diff --git a/modules/slo/README.md b/modules/slo/README.md index e8a9fe8..d0aa973 100644 --- a/modules/slo/README.md +++ b/modules/slo/README.md @@ -1,16 +1,21 @@ # Datadog SLO -This module is responsible for creating Datadog [Service Level Objectives](https://docs.datadoghq.com/monitors/service_level_objectives/) and their related alerts. +This module is responsible for creating Datadog [Service Level Objectives](https://docs.datadoghq.com/monitors/service_level_objectives/) and their related monitors and alerts. + +The module can create metric-based SLOs (and the corresponding alerts) and monitor-based SLOs (and the corresponding monitors). ## Alerts -Datadog Alerts for SLOs are terraformed through the monitor object. + +Datadog alerts for SLOs are terraformed through the monitor object. An SLO can have many thresholds set, but a monitor can only have one. In order to get around this, the module creates Datadog monitors for each threshold within an SLO. -For example +## Usage + +Example of metric-based SLO: ```yaml -synthetics-slo: +metric-slo: name: "(SLO) Synthetic Checks" type: metric query: @@ -23,21 +28,77 @@ synthetics-slo: force_delete: true validate: true thresholds: - - target: "99.5" - target_display: "99.50" - timeframe: "7d" - warning: "99.9" - warning_display: "99.90" - - target: "99" - target_display: "99.00" - timeframe: "30d" - warning: "99.5" - warning_display: "99.50" - groups: [] - monitor_ids: [] + - target: "99.5" + timeframe: "7d" + warning: "99.9" + - target: "99" + timeframe: "30d" + warning: "99.5" tags: - managedby: terraform + ManagedBy: terraform + test: true + api_version: null +``` +Example of monitor-based SLO: + +```yaml +monitor-slo: + name: "(SLO) EC2 Availability" + type: monitor + description: | + Number of EC2 failed status checks. + message: | + ({stage} {region}) {instance_id} failed a SLO check + force_delete: true + validate: true + thresholds: + - target: "99.5" + timeframe: "7d" + warning: "99.9" + - target: "99" + timeframe: "30d" + warning: "99.5" + # Either `monitor_ids` or `monitors` should be provided + # `monitor_ids` is a list of externally created monitors to use for this monitor-based SLO + # If `monitors` mao is provided, the monitors will be created by the module and assigned to the SLO + monitor_ids: null + monitors: + ec2-failed-status-check: + name: "(EC2) Status Check" + type: metric alert + query: | + avg(last_10m):avg:aws.ec2.status_check_failed{*} by {instance_id} > 0 + message: | + ({stage} {region}) {instance_id} failed a status check + escalation_message: "" + tags: + ManagedBy: Terraform + priority: 3 + notify_no_data: false + notify_audit: true + require_full_window: true + enable_logs_sample: false + force_delete: true + include_tags: true + locked: false + renotify_interval: 60 + timeout_h: 0 + evaluation_delay: 60 + new_host_delay: 300 + new_group_delay: 0 + groupby_simple_monitor: false + renotify_occurrences: 0 + renotify_statuses: [] + validate: true + no_data_timeframe: 10 + threshold_windows: {} + thresholds: + critical: 0 + tags: + ManagedBy: terraform + test: true + api_version: null ``` ## References From 9a6a4365192e4c96d3ee6cd632a23e9d9f6092e4 Mon Sep 17 00:00:00 2001 From: aknysh Date: Sat, 13 Aug 2022 00:12:50 -0400 Subject: [PATCH 10/12] Update SLO module --- modules/slo/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/slo/README.md b/modules/slo/README.md index d0aa973..ddad7b6 100644 --- a/modules/slo/README.md +++ b/modules/slo/README.md @@ -105,3 +105,4 @@ monitor-slo: - [Service Level Objectives](https://docs.datadoghq.com/monitors/service_level_objectives/) - [Monitor-based SLOs](https://docs.datadoghq.com/monitors/service_level_objectives/monitor/) - [Datadog Error Budget](https://docs.datadoghq.com/monitors/service_level_objectives/error_budget/) + - [Monitor-based SLO example](https://github.com/DataDog/terraform-provider-datadog/issues/667) From 6a2846b001e7915d24e6634588ca464c51218c0e Mon Sep 17 00:00:00 2001 From: aknysh Date: Sat, 13 Aug 2022 00:17:07 -0400 Subject: [PATCH 11/12] Update SLO module --- examples/slo/catalog/monitor_slo.yaml | 2 +- modules/slo/README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/slo/catalog/monitor_slo.yaml b/examples/slo/catalog/monitor_slo.yaml index f511eac..191ddaf 100644 --- a/examples/slo/catalog/monitor_slo.yaml +++ b/examples/slo/catalog/monitor_slo.yaml @@ -16,7 +16,7 @@ monitor-slo: warning: "99.5" # Either `monitor_ids` or `monitors` should be provided # `monitor_ids` is a list of externally created monitors to use for this monitor-based SLO - # If `monitors` mao is provided, the monitors will be created by the module and assigned to the SLO + # If `monitors` map is provided, the monitors will be created by the module and assigned to the SLO monitor_ids: null monitors: ec2-failed-status-check: diff --git a/modules/slo/README.md b/modules/slo/README.md index ddad7b6..c195c83 100644 --- a/modules/slo/README.md +++ b/modules/slo/README.md @@ -61,7 +61,7 @@ monitor-slo: warning: "99.5" # Either `monitor_ids` or `monitors` should be provided # `monitor_ids` is a list of externally created monitors to use for this monitor-based SLO - # If `monitors` mao is provided, the monitors will be created by the module and assigned to the SLO + # If `monitors` map is provided, the monitors will be created by the module and assigned to the SLO monitor_ids: null monitors: ec2-failed-status-check: From f903d8825f5d337e50537f3f9e16426d5ca3c56d Mon Sep 17 00:00:00 2001 From: aknysh Date: Sat, 13 Aug 2022 00:23:08 -0400 Subject: [PATCH 12/12] Update SLO module --- test/src/go.mod | 12 ++++++------ test/src/go.sum | 24 +++++++++++++----------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/test/src/go.mod b/test/src/go.mod index b75b6e4..cc3ba72 100644 --- a/test/src/go.mod +++ b/test/src/go.mod @@ -1,10 +1,10 @@ module github.com/cloudposse/terraform-datadog-platform -go 1.17 +go 1.18 require ( - github.com/gruntwork-io/terratest v0.39.0 - github.com/stretchr/testify v1.7.0 + github.com/gruntwork-io/terratest v0.40.19 + github.com/stretchr/testify v1.8.0 ) require ( @@ -32,7 +32,7 @@ require ( github.com/gruntwork-io/go-commons v0.8.0 // indirect github.com/hashicorp/errwrap v1.0.0 // indirect github.com/hashicorp/go-cleanhttp v0.5.2 // indirect - github.com/hashicorp/go-getter v1.5.9 // indirect + github.com/hashicorp/go-getter v1.6.1 // indirect github.com/hashicorp/go-multierror v1.1.0 // indirect github.com/hashicorp/go-safetemp v1.0.0 // indirect github.com/hashicorp/go-version v1.3.0 // indirect @@ -64,7 +64,7 @@ require ( golang.org/x/mod v0.4.2 // indirect golang.org/x/net v0.0.0-20210614182718-04defd469f4e // indirect golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c // indirect - golang.org/x/sys v0.0.0-20210603125802-9665404d3644 // indirect + golang.org/x/sys v0.0.0-20220517195934-5e4e11fc645e // indirect golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 // indirect golang.org/x/text v0.3.6 // indirect golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e // indirect @@ -77,7 +77,7 @@ require ( google.golang.org/protobuf v1.26.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/api v0.20.6 // indirect k8s.io/apimachinery v0.20.6 // indirect k8s.io/client-go v0.20.6 // indirect diff --git a/test/src/go.sum b/test/src/go.sum index d4e2a8f..8e896f3 100644 --- a/test/src/go.sum +++ b/test/src/go.sum @@ -177,8 +177,8 @@ github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.7 h1:81/ik6ipDQS2aGcBfIN5dHDB36BwrStyeAQquSYCV4o= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.1.0 h1:Hsa8mG0dQ46ij8Sl2AYJDUv1oA9/d6Vk+3LG99Oe02g= github.com/google/gofuzz v1.1.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= @@ -213,14 +213,14 @@ github.com/googleapis/gnostic v0.4.1/go.mod h1:LRhVm6pbyptWbWbuZ38d1eyptfvIytN3i github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/gruntwork-io/go-commons v0.8.0 h1:k/yypwrPqSeYHevLlEDmvmgQzcyTwrlZGRaxEM6G0ro= github.com/gruntwork-io/go-commons v0.8.0/go.mod h1:gtp0yTtIBExIZp7vyIV9I0XQkVwiQZze678hvDXof78= -github.com/gruntwork-io/terratest v0.39.0 h1:Lq7aNCoFxhhmdQIyuBFBf8N87aCnypmNBFYgvsdIfCQ= -github.com/gruntwork-io/terratest v0.39.0/go.mod h1:CjHsEgP1Pe987X5N8K5qEqCuLtu1bqERGIAF8bTj1s0= +github.com/gruntwork-io/terratest v0.40.19 h1:slnTF0Amrc9yRVUV/X/fHlVWKNF0H8fwa2OLyeV2IOA= +github.com/gruntwork-io/terratest v0.40.19/go.mod h1:JGeIGgLbxbG9/Oqm06z6YXVr76CfomdmLkV564qov+8= github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= -github.com/hashicorp/go-getter v1.5.9 h1:b7ahZW50iQiUek/at3CvZhPK1/jiV6CtKcsJiR6E4R0= -github.com/hashicorp/go-getter v1.5.9/go.mod h1:BrrV/1clo8cCYu6mxvboYg+KutTiFnXjMEgDD8+i7ZI= +github.com/hashicorp/go-getter v1.6.1 h1:NASsgP4q6tL94WH6nJxKWj8As2H/2kop/bB1d8JMyRY= +github.com/hashicorp/go-getter v1.6.1/go.mod h1:IZCrswsZPeWv9IkVnLElzRU/gz/QPi6pZHn4tv6vbwA= github.com/hashicorp/go-multierror v1.1.0 h1:B9UzwGQJehnUY1yNrnwREHc3fGbC2xefo8g4TbElacI= github.com/hashicorp/go-multierror v1.1.0/go.mod h1:spPvp8C1qA32ftKqdAHm4hHTbPw+vmowP0z+KUhOZdA= github.com/hashicorp/go-safetemp v1.0.0 h1:2HR189eFNrjHQyENnQMMpCiBAsRxzbTMIgBhEyExpmo= @@ -330,13 +330,15 @@ github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/tmccombs/hcl2json v0.3.3 h1:+DLNYqpWE0CsOQiEZu+OZm5ZBImake3wtITYxQ8uLFQ= github.com/tmccombs/hcl2json v0.3.3/go.mod h1:Y2chtz2x9bAeRTvSibVRVgbLJhLJXKlUeIvjeVdnm4w= github.com/ulikunitz/xz v0.5.8 h1:ERv8V6GKqVi23rgu5cj9pVfVzJbOqAY2Ntl88O6c2nQ= @@ -524,8 +526,8 @@ golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210514084401-e8d321eab015/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210603125802-9665404d3644 h1:CA1DEQ4NdKphKeL70tvsWNdT5oFh1lOjihRcEDROi0I= -golang.org/x/sys v0.0.0-20210603125802-9665404d3644/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220517195934-5e4e11fc645e h1:w36l2Uw3dRan1K3TyXriXvY+6T56GNmlKGcqiQUJDfM= +golang.org/x/sys v0.0.0-20220517195934-5e4e11fc645e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1 h1:v+OssWQX+hTHEmOBgwxdZxK4zHq3yOs8F9J7mk0PY8E= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -725,8 +727,8 @@ gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=