cloudposse · aknysh · Aug 15, 2022 · Aug 12, 2022 · Aug 12, 2022 · Aug 12, 2022
diff --git a/examples/slo/catalog/synthetic.yaml → examples/slo/catalog/metric_slo.yaml b/examples/slo/catalog/synthetic.yaml → examples/slo/catalog/metric_slo.yaml
@@ -1,4 +1,4 @@
-synthetics-slo:
+metric-slo:
   name: "(SLO) Synthetic Checks"
   type: metric
   query:
@@ -17,9 +17,7 @@ synthetics-slo:
     - target: "99"
       timeframe: "30d"
       warning: "99.5"
-  groups: []
-  monitor_ids: []
   tags:
-    managedby: terraform
+    ManagedBy: terraform
     test: true
     api_version: null
diff --git a/examples/slo/catalog/monitor_slo.yaml b/examples/slo/catalog/monitor_slo.yaml
@@ -0,0 +1,56 @@
+monitor-slo:
+  name: "(SLO) EC2 Availability"
+  type: monitor
+  description: |
+    Number of EC2 failed status checks.
+  message: |
+    ({stage} {region}) {instance_id} failed a SLO check
+  force_delete: true
+  validate: true
+  thresholds:
+    - target: "99.5"
+      timeframe: "7d"
+      warning: "99.9"
+    - target: "99"
+      timeframe: "30d"
+      warning: "99.5"
+  # Either `monitor_ids` or `monitors` should be provided
+  # `monitor_ids` is a list of externally created monitors to use for this monitor-based SLO
+  # If `monitors` map is provided, the monitors will be created by the module and assigned to the SLO
+  monitor_ids: null
+  monitors:
+    ec2-failed-status-check:
+      name: "(EC2) Status Check"
+      type: metric alert
+      query: |
+        avg(last_10m):avg:aws.ec2.status_check_failed{*} by {instance_id} > 0
+      message: |
+        ({stage} {region}) {instance_id} failed a status check
+      escalation_message: ""
+      tags:
+        ManagedBy: Terraform
+      priority: 3
+      notify_no_data: false
+      notify_audit: true
+      require_full_window: true
+      enable_logs_sample: false
+      force_delete: true
+      include_tags: true
+      locked: false
+      renotify_interval: 60
+      timeout_h: 0
+      evaluation_delay: 60
+      new_host_delay: 300
+      new_group_delay: 0
+      groupby_simple_monitor: false
+      renotify_occurrences: 0
+      renotify_statuses: []
+      validate: true
+      no_data_timeframe: 10
+      threshold_windows: {}
+      thresholds:
+        critical: 0
+  tags:
+    ManagedBy: terraform
+    test: true
+    api_version: null
diff --git a/examples/slo/outputs.tf b/examples/slo/outputs.tf
@@ -1,14 +1,19 @@
-output "datadog_metric_slos" {
-  value       = module.datadog_slo.datadog_metric_slos
-  description = "Map of created Metric Based SLOs"
-}
-
 output "datadog_monitor_slos" {
   value       = module.datadog_slo.datadog_monitor_slos
-  description = "Map of created Monitor Based SLOs"
+  description = "Map of created monitor-based SLOs"
+}
+
+output "datadog_monitor_slo_monitors" {
+  value       = module.datadog_slo.datadog_monitor_slo_monitors
+  description = "Created monitors for the monitor-based SLOs"
+}
+
+output "datadog_metric_slos" {
+  value       = module.datadog_slo.datadog_metric_slos
+  description = "Map of created metric-based SLOs"
 }
 
-output "datadog_slo_alerts" {
-  value       = module.datadog_slo.datadog_slo_alerts
-  description = "Map of created SLO Based Alerts"
+output "datadog_metric_slo_alerts" {
+  value       = module.datadog_slo.datadog_metric_slo_alerts
+  description = "Map of created metric-based SLO alerts"
 }
diff --git a/modules/slo/README.md b/modules/slo/README.md
@@ -1,16 +1,21 @@
 # Datadog SLO
 
-This module is responsible for creating Datadog [Service Level Objectives](https://docs.datadoghq.com/monitors/service_level_objectives/) and their related alerts.
+This module is responsible for creating Datadog [Service Level Objectives](https://docs.datadoghq.com/monitors/service_level_objectives/) and their related monitors and alerts.
+
+The module can create metric-based SLOs (and the corresponding alerts) and monitor-based SLOs (and the corresponding monitors).
 
 ## Alerts
-Datadog Alerts for SLOs are terraformed through the monitor object.
+
+Datadog alerts for SLOs are terraformed through the monitor object.
 
 An SLO can have many thresholds set, but a monitor can only have one. In order to get around this, the module creates Datadog monitors for each threshold within an SLO. 
 
-For example 
+## Usage
+
+Example of metric-based SLO:
 
 ```yaml
-synthetics-slo:
+metric-slo:
   name: "(SLO) Synthetic Checks"
   type: metric
   query:
@@ -23,23 +28,81 @@ synthetics-slo:
   force_delete: true
   validate: true
   thresholds:
-  - target: "99.5"
-    target_display: "99.50"
-    timeframe: "7d"
-    warning: "99.9"
-    warning_display: "99.90"
-  - target: "99"
-    target_display: "99.00"
-    timeframe: "30d"
-    warning: "99.5"
-    warning_display: "99.50"
-  groups: []
-  monitor_ids: []
+    - target: "99.5"
+      timeframe: "7d"
+      warning: "99.9"
+    - target: "99"
+      timeframe: "30d"
+      warning: "99.5"
   tags:
-    managedby: terraform
+    ManagedBy: terraform
+    test: true
+    api_version: null
+```
 
+Example of monitor-based SLO:
+
+```yaml
+monitor-slo:
+  name: "(SLO) EC2 Availability"
+  type: monitor
+  description: |
+    Number of EC2 failed status checks.
+  message: |
+    ({stage} {region}) {instance_id} failed a SLO check
+  force_delete: true
+  validate: true
+  thresholds:
+    - target: "99.5"
+      timeframe: "7d"
+      warning: "99.9"
+    - target: "99"
+      timeframe: "30d"
+      warning: "99.5"
+  # Either `monitor_ids` or `monitors` should be provided
+  # `monitor_ids` is a list of externally created monitors to use for this monitor-based SLO
+  # If `monitors` map is provided, the monitors will be created by the module and assigned to the SLO
+  monitor_ids: null
+  monitors:
+    ec2-failed-status-check:
+      name: "(EC2) Status Check"
+      type: metric alert
+      query: |
+        avg(last_10m):avg:aws.ec2.status_check_failed{*} by {instance_id} > 0
+      message: |
+        ({stage} {region}) {instance_id} failed a status check
+      escalation_message: ""
+      tags:
+        ManagedBy: Terraform
+      priority: 3
+      notify_no_data: false
+      notify_audit: true
+      require_full_window: true
+      enable_logs_sample: false
+      force_delete: true
+      include_tags: true
+      locked: false
+      renotify_interval: 60
+      timeout_h: 0
+      evaluation_delay: 60
+      new_host_delay: 300
+      new_group_delay: 0
+      groupby_simple_monitor: false
+      renotify_occurrences: 0
+      renotify_statuses: []
+      validate: true
+      no_data_timeframe: 10
+      threshold_windows: {}
+      thresholds:
+        critical: 0
+  tags:
+    ManagedBy: terraform
+    test: true
+    api_version: null
 ```
 
 ## References
  - [Service Level Objectives](https://docs.datadoghq.com/monitors/service_level_objectives/)
+ - [Monitor-based SLOs](https://docs.datadoghq.com/monitors/service_level_objectives/monitor/)
  - [Datadog Error Budget](https://docs.datadoghq.com/monitors/service_level_objectives/error_budget/)
+ - [Monitor-based SLO example](https://github.com/DataDog/terraform-provider-datadog/issues/667)
diff --git a/modules/slo/main.tf b/modules/slo/main.tf
@@ -1,124 +1,5 @@
 locals {
   enabled = module.this.enabled
 
-  datadog_monitor_slos = { for slo in var.datadog_slos : slo.name => slo if slo.type == "monitor" && lookup(slo, "enabled", true) && local.enabled }
-  datadog_metric_slos  = { for slo in var.datadog_slos : slo.name => slo if slo.type == "metric" && lookup(slo, "enabled", true) && local.enabled }
-
-  temp_datadog_slo_metric_monitors = flatten([
-    for name, slo in var.datadog_slos : [
-      for i, threshold in slo.thresholds : {
-        slo       = slo,
-        slo_name  = format("%s_threshold%s", name, i)
-        threshold = threshold
-      }
-      if slo.type == "metric" && local.enabled && lookup(slo, "enabled", true)
-    ]
-  ])
-
-  datadog_slo_metric_monitors = { for monitor in local.temp_datadog_slo_metric_monitors : monitor.slo_name => monitor }
-
   alert_tags = local.enabled && var.alert_tags != null ? format("%s%s", var.alert_tags_separator, join(var.alert_tags_separator, var.alert_tags)) : ""
 }
-
-resource "datadog_service_level_objective" "monitor_slo" {
-  for_each = local.datadog_monitor_slos
-
-  #  Required
-  name = each.value.name
-  type = each.value.type
-
-  dynamic "thresholds" {
-    for_each = each.value.thresholds
-    content {
-      target    = lookup(thresholds, "target", "99.00")
-      timeframe = lookup(thresholds, "timeframe", "7d")
-
-      target_display  = lookup(thresholds, "target_display", "98.00")
-      warning         = lookup(thresholds, "warning", "99.95")
-      warning_display = lookup(thresholds, "warning_display", "98.00")
-    }
-  }
-
-  groups      = lookup(each.value, "groups", [])
-  monitor_ids = each.value.monitor_ids
-
-  #  Optional
-  description  = lookup(each.value, "description", null)
-  force_delete = lookup(each.value, "force_delete", true)
-  validate     = lookup(each.value, "validate", false)
-
-  # Convert terraform tags map to Datadog tags map
-  # If a key is supplied with a value, it will render "key:value" as a tag
-  #   tags:
-  #     key: value
-  # If a key is supplied without a value (null), it will render "key" as a tag
-  #   tags:
-  #     key: null
-  tags = [
-    for tagk, tagv in lookup(each.value, "tags", module.this.tags) : (tagv != null ? format("%s:%s", tagk, tagv) : tagk)
-  ]
-}
-
-resource "datadog_service_level_objective" "metric_slo" {
-  for_each = local.datadog_metric_slos
-
-  #  Required
-  name = each.value.name
-  type = each.value.type
-
-  query {
-    denominator = each.value.query.denominator
-    numerator   = each.value.query.numerator
-  }
-
-  #  Optional
-  description  = lookup(each.value, "description", null)
-  force_delete = lookup(each.value, "force_delete", true)
-  validate     = lookup(each.value, "validate", false)
-
-  dynamic "thresholds" {
-    for_each = each.value.thresholds
-    content {
-      target    = lookup(thresholds.value, "target", null)
-      timeframe = lookup(thresholds.value, "timeframe", null)
-      warning   = lookup(thresholds.value, "warning", null)
-    }
-  }
-
-  # Convert terraform tags map to Datadog tags map
-  # If a key is supplied with a value, it will render "key:value" as a tag
-  #   tags:
-  #     key: value
-  # If a key is supplied without a value (null), it will render "key" as a tag
-  #   tags:
-  #     key: null
-  tags = [
-    for tagk, tagv in lookup(each.value, "tags", module.this.tags) : (tagv != null ? format("%s:%s", tagk, tagv) : tagk)
-  ]
-}
-
-resource "datadog_monitor" "metric_slo_alert" {
-  for_each = local.datadog_slo_metric_monitors
-
-  name    = format("(SLO Error Budget Alert) %s", each.value.slo.name)
-  type    = "slo alert"
-  message = format("%s%s", each.value.slo.message, local.alert_tags)
-
-  query = <<EOF
-    error_budget("${datadog_service_level_objective.metric_slo[each.value.slo.name].id}").over("${each.value.threshold.timeframe}") > ${lookup(each.value.threshold, "target", "99.00")}
-  EOF
-  monitor_thresholds {
-    critical = lookup(each.value.threshold, "target", null)
-  }
-
-  # Convert terraform tags map to Datadog tags map
-  # If a key is supplied with a value, it will render "key:value" as a tag
-  #   tags:
-  #     key: value
-  # If a key is supplied without a value (null), it will render "key" as a tag
-  #   tags:
-  #     key: null
-  tags = [
-    for tagk, tagv in lookup(each.value.slo, "tags", module.this.tags) : (tagv != null ? format("%s:%s", tagk, tagv) : tagk)
-  ]
-}