diff --git a/.github/workflows/lint-terraform.yml b/.github/workflows/lint-terraform.yml index 9a23c54..0dce770 100644 --- a/.github/workflows/lint-terraform.yml +++ b/.github/workflows/lint-terraform.yml @@ -2,9 +2,9 @@ name: Lint Terraform on: push: - branches: [ "main" ] + branches: [ "main", "feat/sagemaker-llms" ] pull_request: - branches: [ "main" ] + branches: [ "main", "feat/sagemaker-llms" ] jobs: test: diff --git a/infra/main.tf b/infra/main.tf index 552c348..5000efa 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -264,10 +264,10 @@ variable "s3_prefixes_for_external_role_copy" { variable "sagemaker_example_inference_image" { default = "" } -variable "sagemaker_models_folder" {default = ""} -variable "hugging_face_model_image" {default = ""} -variable "sagemaker_default_bucket" {default = ""} -variable "sagemaker_budget_emails" {default = [""]} +variable "sagemaker_models_folder" { default = "" } +variable "hugging_face_model_image" { default = "" } +variable "sagemaker_default_bucket" { default = "" } +variable "sagemaker_budget_emails" { default = [""] } locals { diff --git a/infra/modules/autoscaling/sagemaker/main.tf b/infra/modules/autoscaling/sagemaker/main.tf index 9ea2d31..cd729c8 100644 --- a/infra/modules/autoscaling/sagemaker/main.tf +++ b/infra/modules/autoscaling/sagemaker/main.tf @@ -2,18 +2,18 @@ resource "aws_appautoscaling_target" "sagemaker_target" { max_capacity = var.max_capacity min_capacity = var.min_capacity - resource_id = var.resource_id # e.g., "endpoint/${module.gpt_neo_125_endpoint.endpoint_name}/variant/${module.gpt_neo_125_endpoint.variant_name}" + resource_id = var.resource_id # e.g., "endpoint/${module.gpt_neo_125_endpoint.endpoint_name}/variant/${module.gpt_neo_125_endpoint.variant_name}" scalable_dimension = var.scalable_dimension service_namespace = "sagemaker" } # Scale-Up Policy (Triggered by Backlog in the Queue) resource "aws_appautoscaling_policy" "scale_up" { - name = "${var.resource_id}-scale-up" - policy_type = "StepScaling" - resource_id = aws_appautoscaling_target.sagemaker_target.resource_id - scalable_dimension = aws_appautoscaling_target.sagemaker_target.scalable_dimension - service_namespace = aws_appautoscaling_target.sagemaker_target.service_namespace + name = "${var.resource_id}-scale-up" + policy_type = "StepScaling" + resource_id = aws_appautoscaling_target.sagemaker_target.resource_id + scalable_dimension = aws_appautoscaling_target.sagemaker_target.scalable_dimension + service_namespace = aws_appautoscaling_target.sagemaker_target.service_namespace step_scaling_policy_configuration { adjustment_type = "ChangeInCapacity" @@ -57,11 +57,11 @@ resource "aws_appautoscaling_policy" "scale_out_cpu" { # Scale-In Policy to Reduce Capacity to Zero resource "aws_appautoscaling_policy" "scale_in_to_zero" { - name = "${var.resource_id}-scale-in-to-zero" - policy_type = "StepScaling" - resource_id = aws_appautoscaling_target.sagemaker_target.resource_id - scalable_dimension = aws_appautoscaling_target.sagemaker_target.scalable_dimension - service_namespace = aws_appautoscaling_target.sagemaker_target.service_namespace + name = "${var.resource_id}-scale-in-to-zero" + policy_type = "StepScaling" + resource_id = aws_appautoscaling_target.sagemaker_target.resource_id + scalable_dimension = aws_appautoscaling_target.sagemaker_target.scalable_dimension + service_namespace = aws_appautoscaling_target.sagemaker_target.service_namespace step_scaling_policy_configuration { adjustment_type = "ChangeInCapacity" @@ -70,15 +70,15 @@ resource "aws_appautoscaling_policy" "scale_in_to_zero" { # Adjust capacity to 0 when underutilization is detected step_adjustment { metric_interval_lower_bound = 0 # Handles all values from 0% and above - metric_interval_upper_bound = 5 # Upper bound of 5 - scaling_adjustment = -1 # Set capacity to zero instances + metric_interval_upper_bound = 5 # Upper bound of 5 + scaling_adjustment = -1 # Set capacity to zero instances } # Step adjustment to handle all values above the upper bound (fallback) step_adjustment { - metric_interval_lower_bound = null # Anything below 0 - metric_interval_upper_bound = 0 # Unspecified upper bound to catch all higher values - scaling_adjustment = -1 # Set capacity to zero instances + metric_interval_lower_bound = null # Anything below 0 + metric_interval_upper_bound = 0 # Unspecified upper bound to catch all higher values + scaling_adjustment = -1 # Set capacity to zero instances } } @@ -88,11 +88,11 @@ resource "aws_appautoscaling_policy" "scale_in_to_zero" { # Scale-In Policy to Reduce Capacity to Zero Based on backlog size resource "aws_appautoscaling_policy" "scale_in_to_zero_based_on_backlog" { - name = "${var.resource_id}-scale-in-zero-backlog" - policy_type = "StepScaling" - resource_id = aws_appautoscaling_target.sagemaker_target.resource_id - scalable_dimension = aws_appautoscaling_target.sagemaker_target.scalable_dimension - service_namespace = aws_appautoscaling_target.sagemaker_target.service_namespace + name = "${var.resource_id}-scale-in-zero-backlog" + policy_type = "StepScaling" + resource_id = aws_appautoscaling_target.sagemaker_target.resource_id + scalable_dimension = aws_appautoscaling_target.sagemaker_target.scalable_dimension + service_namespace = aws_appautoscaling_target.sagemaker_target.service_namespace step_scaling_policy_configuration { @@ -102,9 +102,9 @@ resource "aws_appautoscaling_policy" "scale_in_to_zero_based_on_backlog" { # Step adjustment for when there are zero queries in the backlog step_adjustment { - metric_interval_lower_bound = null # No lower bound (cover everything below 0) - metric_interval_upper_bound = 0.0 # Exact match for zero backlog size - scaling_adjustment = 0 # Set capacity to zero instances + metric_interval_lower_bound = null # No lower bound (cover everything below 0) + metric_interval_upper_bound = 0.0 # Exact match for zero backlog size + scaling_adjustment = 0 # Set capacity to zero instances } } diff --git a/infra/modules/autoscaling/sagemaker/outputs.tf b/infra/modules/autoscaling/sagemaker/outputs.tf index 1bf4a04..88553a2 100644 --- a/infra/modules/autoscaling/sagemaker/outputs.tf +++ b/infra/modules/autoscaling/sagemaker/outputs.tf @@ -15,5 +15,5 @@ output "scale_out_cpu_policy_arn" { output "scale_in_to_zero_based_on_backlog_arn" { description = "ARN of the autoscaling policy to scale in to zero for backlog queries when 0 for x minutes" - value = aws_appautoscaling_policy.scale_in_to_zero_based_on_backlog.arn + value = aws_appautoscaling_policy.scale_in_to_zero_based_on_backlog.arn } diff --git a/infra/modules/cloudwatch_alarm/sagemaker/main.tf b/infra/modules/cloudwatch_alarm/sagemaker/main.tf index 96c37fd..8356b95 100644 --- a/infra/modules/cloudwatch_alarm/sagemaker/main.tf +++ b/infra/modules/cloudwatch_alarm/sagemaker/main.tf @@ -1,6 +1,6 @@ resource "aws_cloudwatch_metric_alarm" "sagemaker_alarm" { alarm_name = var.alarm_name - alarm_description = var.alarm_description + alarm_description = var.alarm_description metric_name = var.metric_name namespace = var.namespace comparison_operator = var.comparison_operator @@ -10,10 +10,10 @@ resource "aws_cloudwatch_metric_alarm" "sagemaker_alarm" { treat_missing_data = "missing" statistic = "Average" period = var.period - + dimensions = { EndpointName = var.endpoint_name - VariantName = var.variant_name != null ? var.variant_name : "" + VariantName = var.variant_name != null ? var.variant_name : "" } diff --git a/infra/modules/cloudwatch_alarm/sagemaker/variables.tf b/infra/modules/cloudwatch_alarm/sagemaker/variables.tf index e5cfe59..82fb9f4 100644 --- a/infra/modules/cloudwatch_alarm/sagemaker/variables.tf +++ b/infra/modules/cloudwatch_alarm/sagemaker/variables.tf @@ -39,24 +39,24 @@ variable "alarm_actions" { } variable "datapoints_to_alarm" { - type = number - description = "Data points that must be breaching to trigger alarm" - + type = number + description = "Data points that must be breaching to trigger alarm" + } variable "alarm_description" { - type = string + type = string description = "Description of the Alarm" } variable "endpoint_name" { - type = string + type = string description = "Endpoint name - typically /aws/sagemaker/Endpoints for sagemaker" } variable "variant_name" { - type = string + type = string description = "Vairant name of the endpoint" nullable = true default = "" diff --git a/infra/modules/cost_monitoring/budgets/main.tf b/infra/modules/cost_monitoring/budgets/main.tf index 8202d5a..28ac935 100644 --- a/infra/modules/cost_monitoring/budgets/main.tf +++ b/infra/modules/cost_monitoring/budgets/main.tf @@ -1,34 +1,34 @@ resource "aws_budgets_budget" "monthly_cost_budget" { - name = "${var.budget_name}-monthly-cost-budget" - budget_type = "COST" - limit_amount = var.budget_limit - limit_unit = "USD" - time_unit = "MONTHLY" + name = "${var.budget_name}-monthly-cost-budget" + budget_type = "COST" + limit_amount = var.budget_limit + limit_unit = "USD" + time_unit = "MONTHLY" cost_filter { values = [var.cost_filter_service] - name = "Service" + name = "Service" } notification { - notification_type = "ACTUAL" - threshold_type = "PERCENTAGE" - comparison_operator = "GREATER_THAN" - threshold = 80 + notification_type = "ACTUAL" + threshold_type = "PERCENTAGE" + comparison_operator = "GREATER_THAN" + threshold = 80 subscriber_email_addresses = var.notification_email # Secrets to be passed - subscriber_sns_topic_arns = [var.sns_topic_arn] + subscriber_sns_topic_arns = [var.sns_topic_arn] } notification { - notification_type = "ACTUAL" - threshold_type = "PERCENTAGE" - comparison_operator = "GREATER_THAN" - threshold = 100 + notification_type = "ACTUAL" + threshold_type = "PERCENTAGE" + comparison_operator = "GREATER_THAN" + threshold = 100 subscriber_email_addresses = var.notification_email # Secrets to be passed - subscriber_sns_topic_arns = [var.sns_topic_arn] + subscriber_sns_topic_arns = [var.sns_topic_arn] } } diff --git a/infra/modules/cost_monitoring/budgets/output.tf b/infra/modules/cost_monitoring/budgets/output.tf index eea64e8..37306e2 100644 --- a/infra/modules/cost_monitoring/budgets/output.tf +++ b/infra/modules/cost_monitoring/budgets/output.tf @@ -1,3 +1,3 @@ output "budget_name" { - value = aws_budgets_budget.monthly_cost_budget.name + value = aws_budgets_budget.monthly_cost_budget.name } \ No newline at end of file diff --git a/infra/modules/cost_monitoring/budgets/variables.tf b/infra/modules/cost_monitoring/budgets/variables.tf index 4ed30dc..aabdfb0 100644 --- a/infra/modules/cost_monitoring/budgets/variables.tf +++ b/infra/modules/cost_monitoring/budgets/variables.tf @@ -1,39 +1,39 @@ variable "budget_name" { - type = string - description = "AWS Budget name" + type = string + description = "AWS Budget name" } variable "budget_limit" { - type = string - default = null - description = "Optional monthly budget limit for AWS for the budget" + type = string + default = null + description = "Optional monthly budget limit for AWS for the budget" } variable "time_unit" { - description = "Budget time unit, i.e. Monthly, etc" - type = string - default = "MONTHLY" + description = "Budget time unit, i.e. Monthly, etc" + type = string + default = "MONTHLY" } variable "notification_thresholds" { - type = list(number) - default = [80, 100] - description = "list of notification thresholds in %" + type = list(number) + default = [80, 100] + description = "list of notification thresholds in %" } variable "notification_email" { - type = list(string) - description = "email for who recieves budget alerts" + type = list(string) + description = "email for who recieves budget alerts" } variable "sns_topic_arn" { - type = string - description = "ARN of SNS topic for budget alerts" - + type = string + description = "ARN of SNS topic for budget alerts" + } variable "cost_filter_service" { - type = string - description = "service to apply cost filter on" - default = "Amazon SageMaker" + type = string + description = "service to apply cost filter on" + default = "Amazon SageMaker" } diff --git a/infra/modules/cost_monitoring/sagemaker/main.tf b/infra/modules/cost_monitoring/sagemaker/main.tf index af8d640..23268a8 100644 --- a/infra/modules/cost_monitoring/sagemaker/main.tf +++ b/infra/modules/cost_monitoring/sagemaker/main.tf @@ -11,7 +11,7 @@ resource "aws_cloudwatch_dashboard" "cost_dashboard" { "height" : 6, "properties" : { "metrics" : [ - [ "AWS/Billing", "EstimatedCharges", "Currency", "USD" ] + ["AWS/Billing", "EstimatedCharges", "Currency", "USD"] ], "period" : 86400, "stat" : "Maximum", @@ -27,9 +27,9 @@ resource "aws_cloudwatch_dashboard" "cost_dashboard" { "height" : 6, "properties" : { "metrics" : [ - [ "AWS/Billing", "EstimatedCharges", "ServiceName", "AmazonSageMaker", "Currency", "USD" ], - [ "AWS/Billing", "EstimatedCharges", "ServiceName", "AmazonEC2", "Currency", "USD" ], - [ "AWS/Billing", "EstimatedCharges", "ServiceName", "AmazonS3", "Currency", "USD" ] + ["AWS/Billing", "EstimatedCharges", "ServiceName", "AmazonSageMaker", "Currency", "USD"], + ["AWS/Billing", "EstimatedCharges", "ServiceName", "AmazonEC2", "Currency", "USD"], + ["AWS/Billing", "EstimatedCharges", "ServiceName", "AmazonS3", "Currency", "USD"] ], "period" : 86400, "stat" : "Maximum", @@ -45,7 +45,7 @@ resource "aws_cloudwatch_dashboard" "cost_dashboard" { "height" : 6, "properties" : { "metrics" : [ - [ "AWS/Billing", "EstimatedCharges", "Currency", "USD", { "stat": "Average" } ] + ["AWS/Billing", "EstimatedCharges", "Currency", "USD", { "stat" : "Average" }] ], "period" : 3600, "stat" : "Average", diff --git a/infra/modules/cost_monitoring/sagemaker/variables.tf b/infra/modules/cost_monitoring/sagemaker/variables.tf index 08e7a83..fdb5750 100644 --- a/infra/modules/cost_monitoring/sagemaker/variables.tf +++ b/infra/modules/cost_monitoring/sagemaker/variables.tf @@ -1,10 +1,10 @@ variable "dashboard_name" { - description = "Name of the CloudWatch dashboard" - type = string + description = "Name of the CloudWatch dashboard" + type = string } variable "services_to_monitor" { - description = "List of AWS services to monitor costs from" - type = list(string) - default = ["AmazonSageMaker", "AmazonEC2", "AmazonS3"] + description = "List of AWS services to monitor costs from" + type = list(string) + default = ["AmazonSageMaker", "AmazonEC2", "AmazonS3"] } \ No newline at end of file diff --git a/infra/modules/lambda/main.tf b/infra/modules/lambda/main.tf index 3535c5b..a37f4a4 100644 --- a/infra/modules/lambda/main.tf +++ b/infra/modules/lambda/main.tf @@ -1,17 +1,17 @@ resource "aws_lambda_function" "sagemaker_to_s3" { - function_name = "sagemaker-logs-to-s3" - role = var.log_delivery_role_arn - handler = "index.lambda_handler" - runtime = "python3.9" + function_name = "sagemaker-logs-to-s3" + role = var.log_delivery_role_arn + handler = "index.lambda_handler" + runtime = "python3.9" - environment { - variables = { - S3_BUCKET_NAME = var.s3_bucket_name - } + environment { + variables = { + S3_BUCKET_NAME = var.s3_bucket_name } + } - source_code_hash = filebase64sha256("${path.module}/lambda_function.zip") - filename = "${path.module}/lambda_function.zip" + source_code_hash = filebase64sha256("${path.module}/lambda_function.zip") + filename = "${path.module}/lambda_function.zip" } resource "aws_lambda_permission" "allow_cloudwatch_logs" { diff --git a/infra/modules/lambda/outputs.tf b/infra/modules/lambda/outputs.tf index f5a080a..87875b7 100644 --- a/infra/modules/lambda/outputs.tf +++ b/infra/modules/lambda/outputs.tf @@ -1,3 +1,3 @@ output "lambda_function_arn" { - value = aws_lambda_function.sagemaker_to_s3.arn + value = aws_lambda_function.sagemaker_to_s3.arn } \ No newline at end of file diff --git a/infra/modules/lambda/variables.tf b/infra/modules/lambda/variables.tf index 6b4cb19..c595c7f 100644 --- a/infra/modules/lambda/variables.tf +++ b/infra/modules/lambda/variables.tf @@ -1,15 +1,15 @@ variable "s3_bucket_name" { - type = string - description = "S3 bucket name for storing logs" + type = string + description = "S3 bucket name for storing logs" } variable "log_delivery_role_arn" { - type = string - description = "ARN of the iAM role for Lambda" + type = string + description = "ARN of the iAM role for Lambda" } variable "sagemaker_log_group_arns" { - type = list(string) + type = list(string) } variable "account_id" { diff --git a/infra/modules/logs/main.tf b/infra/modules/logs/main.tf index 273b49e..62b6ee5 100644 --- a/infra/modules/logs/main.tf +++ b/infra/modules/logs/main.tf @@ -1,17 +1,17 @@ resource "aws_cloudwatch_log_group" "budget_alert_log_group" { - name = "/aws/budget-alerts/${var.prefix}" - retention_in_days = var.retention_in_days + name = "/aws/budget-alerts/${var.prefix}" + retention_in_days = var.retention_in_days } data "aws_cloudwatch_log_group" "sagemaker_logs" { - for_each = toset(var.endpoint_names) - name = "/aws/sagemaker/Endpoints/${each.key}" + for_each = toset(var.endpoint_names) + name = "/aws/sagemaker/Endpoints/${each.key}" } resource "aws_cloudwatch_log_subscription_filter" "sagemaker_logs" { - for_each = toset(var.endpoint_names) - name = "sagemaker-log-filter-${each.key}" - log_group_name = data.aws_cloudwatch_log_group.sagemaker_logs[each.key].name - destination_arn = var.lambda_function_arn - filter_pattern = "" + for_each = toset(var.endpoint_names) + name = "sagemaker-log-filter-${each.key}" + log_group_name = data.aws_cloudwatch_log_group.sagemaker_logs[each.key].name + destination_arn = var.lambda_function_arn + filter_pattern = "" } \ No newline at end of file diff --git a/infra/modules/logs/variables.tf b/infra/modules/logs/variables.tf index 311faae..8b80846 100644 --- a/infra/modules/logs/variables.tf +++ b/infra/modules/logs/variables.tf @@ -1,19 +1,19 @@ variable "prefix" { - type = string - description = "prefix for the cloduwatch log group" - default = "" + type = string + description = "prefix for the cloduwatch log group" + default = "" } variable "retention_in_days" { - type = number - default = 90 - description = "number of days ot retain cloudwatch logs" + type = number + default = 90 + description = "number of days ot retain cloudwatch logs" } variable "endpoint_names" { - type = list(string) + type = list(string) } variable "lambda_function_arn" { - type = string + type = string } \ No newline at end of file diff --git a/infra/modules/s3/main.tf b/infra/modules/s3/main.tf index 1f2e1e4..4833613 100644 --- a/infra/modules/s3/main.tf +++ b/infra/modules/s3/main.tf @@ -1,45 +1,45 @@ resource "aws_s3_bucket" "centralized_logs" { - # Consolidation of logs into S3 Bucket - bucket = "${var.prefix}-centralized" + # Consolidation of logs into S3 Bucket + bucket = "${var.prefix}-centralized" - tags = { - Name = "${var.prefix} SageMaker Log Bucket" - } + tags = { + Name = "${var.prefix} SageMaker Log Bucket" + } } resource "aws_s3_bucket_lifecycle_configuration" "lifecycle" { - bucket = aws_s3_bucket.centralized_logs.id - rule { - status = "Enabled" - id = "archive" - transition { - days = var.glacier_transition_days - storage_class = "GLACIER" - } - - expiration { - days = var.retention_days - } - } + bucket = aws_s3_bucket.centralized_logs.id + rule { + status = "Enabled" + id = "archive" + transition { + days = var.glacier_transition_days + storage_class = "GLACIER" + } + + expiration { + days = var.retention_days + } + } } resource "aws_cloudwatch_log_group" "sagemaker_logs" { - name = "/aws/sagemaker/centralized_logs" - retention_in_days = var.log_retention_days + name = "/aws/sagemaker/centralized_logs" + retention_in_days = var.log_retention_days } resource "aws_s3_bucket_server_side_encryption_configuration" "encryption" { - bucket = aws_s3_bucket.centralized_logs.id + bucket = aws_s3_bucket.centralized_logs.id - rule { - apply_server_side_encryption_by_default { - sse_algorithm = "AES256" - } + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" } + } } diff --git a/infra/modules/s3/outputs.tf b/infra/modules/s3/outputs.tf index e34c81b..9dd037e 100644 --- a/infra/modules/s3/outputs.tf +++ b/infra/modules/s3/outputs.tf @@ -1,3 +1,3 @@ output "s3_bucket_arn" { - value = aws_s3_bucket.centralized_logs.arn + value = aws_s3_bucket.centralized_logs.arn } \ No newline at end of file diff --git a/infra/modules/s3/variables.tf b/infra/modules/s3/variables.tf index 099e6fc..41662cd 100644 --- a/infra/modules/s3/variables.tf +++ b/infra/modules/s3/variables.tf @@ -1,23 +1,23 @@ variable "prefix" { - type = string - description = "Resource name prefix" + type = string + description = "Resource name prefix" } variable "glacier_transition_days" { - type = number - description = "Number of days before moving logs to glacier" - default = 180 + type = number + description = "Number of days before moving logs to glacier" + default = 180 } variable "retention_days" { - type = number + type = number description = "number of days to retain logs before deletion" - default = 365 + default = 365 } variable "log_retention_days" { - type = number - description = "number of days to retain logs from sagemaker" - default = 180 + type = number + description = "number of days to retain logs from sagemaker" + default = 180 } diff --git a/infra/modules/sagemaker_deployment/main.tf b/infra/modules/sagemaker_deployment/main.tf index 97cf684..816377b 100644 --- a/infra/modules/sagemaker_deployment/main.tf +++ b/infra/modules/sagemaker_deployment/main.tf @@ -4,9 +4,9 @@ resource "aws_sagemaker_model" "sagemaker_model" { execution_role_arn = var.execution_role_arn primary_container { - image = var.container_image - model_data_url = var.model_data_url - environment = var.environment + image = var.container_image + model_data_url = var.model_data_url + environment = var.environment } vpc_config { @@ -31,7 +31,7 @@ resource "aws_sagemaker_endpoint_configuration" "endpoint_config" { s3_output_path = var.s3_output_path notification_config { include_inference_response_in = ["SUCCESS_NOTIFICATION_TOPIC"] - success_topic = var.sns_success_topic_arn + success_topic = var.sns_success_topic_arn } } } @@ -41,7 +41,7 @@ resource "aws_sagemaker_endpoint_configuration" "endpoint_config" { resource "aws_sagemaker_endpoint" "sagemaker_endpoint" { name = var.endpoint_name endpoint_config_name = aws_sagemaker_endpoint_configuration.endpoint_config.name - depends_on = [aws_sagemaker_endpoint_configuration.endpoint_config, var.sns_success_topic_arn] + depends_on = [aws_sagemaker_endpoint_configuration.endpoint_config, var.sns_success_topic_arn] } # Autoscaling Target Resource @@ -62,9 +62,9 @@ resource "aws_appautoscaling_policy" "scale_up_policy" { service_namespace = aws_appautoscaling_target.autoscaling_target.service_namespace step_scaling_policy_configuration { - adjustment_type = "ChangeInCapacity" - metric_aggregation_type = "Average" - cooldown = var.scale_up_cooldown + adjustment_type = "ChangeInCapacity" + metric_aggregation_type = "Average" + cooldown = var.scale_up_cooldown step_adjustment { metric_interval_lower_bound = 0 @@ -87,14 +87,14 @@ resource "aws_appautoscaling_policy" "scale_in_to_zero_policy" { step_adjustment { metric_interval_lower_bound = null # No lower bound to cover everything - metric_interval_upper_bound = 5 # Upper bound is 5% + metric_interval_upper_bound = 5 # Upper bound is 5% scaling_adjustment = 0 } step_adjustment { - metric_interval_lower_bound = 5 # Lower bound starts at 5% + metric_interval_lower_bound = 5 # Lower bound starts at 5% metric_interval_upper_bound = null # No upper bound - scaling_adjustment = 1 # Maintains min capacity of one instance + scaling_adjustment = 1 # Maintains min capacity of one instance } cooldown = var.scale_in_to_zero_cooldown @@ -104,28 +104,28 @@ resource "aws_appautoscaling_policy" "scale_in_to_zero_policy" { # Scale-In Policy to Reduce Capacity to Zero Based on backlog size resource "aws_appautoscaling_policy" "scale_in_to_zero_based_on_backlog" { - name = "scale-in-to-zero-backlog-policy-${var.model_name}" - policy_type = "StepScaling" - resource_id = aws_appautoscaling_target.autoscaling_target.resource_id - scalable_dimension = aws_appautoscaling_target.autoscaling_target.scalable_dimension - service_namespace = aws_appautoscaling_target.autoscaling_target.service_namespace + name = "scale-in-to-zero-backlog-policy-${var.model_name}" + policy_type = "StepScaling" + resource_id = aws_appautoscaling_target.autoscaling_target.resource_id + scalable_dimension = aws_appautoscaling_target.autoscaling_target.scalable_dimension + service_namespace = aws_appautoscaling_target.autoscaling_target.service_namespace step_scaling_policy_configuration { - adjustment_type = "ExactCapacity" # Set the capacity exactly to zero + adjustment_type = "ExactCapacity" # Set the capacity exactly to zero # Step adjustment for when there are zero queries in the backlog step_adjustment { - metric_interval_lower_bound = null # No lower bound (cover everything below 0) - metric_interval_upper_bound = 0 # Exact match for zero backlog size - scaling_adjustment = 0 # Set capacity to zero instances + metric_interval_lower_bound = null # No lower bound (cover everything below 0) + metric_interval_upper_bound = 0 # Exact match for zero backlog size + scaling_adjustment = 0 # Set capacity to zero instances } # Falllback for any value above 0 to prevent overlap step_adjustment { metric_interval_lower_bound = 0 # No lower bound (cover everything below 0) - metric_interval_upper_bound = null # Exact match for zero backlog size - scaling_adjustment = 1 # Set capacity to zero instances + metric_interval_upper_bound = null # Exact match for zero backlog size + scaling_adjustment = 1 # Set capacity to zero instances } cooldown = var.scale_in_to_zero_cooldown @@ -136,15 +136,15 @@ resource "aws_appautoscaling_policy" "scale_in_to_zero_based_on_backlog" { } resource "aws_cloudwatch_log_metric_filter" "unatuhorized_operations" { - name = "unauthorized-operations-filter" - log_group_name = "${var.log_group_name}" - pattern = "{ $.errorCode = \"UnauthorizedOperation\" || $.errorCode = \"AccessDenied\" }" - - metric_transformation { - name = "UnauthorizedOperationsCount" - namespace = "CloudTrailMetrics" - value = "1" - } + name = "unauthorized-operations-filter" + log_group_name = var.log_group_name + pattern = "{ $.errorCode = \"UnauthorizedOperation\" || $.errorCode = \"AccessDenied\" }" + + metric_transformation { + name = "UnauthorizedOperationsCount" + namespace = "CloudTrailMetrics" + value = "1" + } } # Loop through the alarm definitions to create multiple CloudWatch alarms @@ -166,7 +166,7 @@ resource "aws_cloudwatch_metric_alarm" "cloudwatch_alarm" { # first alarm will not have a null variantName dimensions = count.index == 0 ? { EndpointName = aws_sagemaker_endpoint.sagemaker_endpoint.name - } : { + } : { EndpointName = aws_sagemaker_endpoint.sagemaker_endpoint.name, VariantName = var.variant_name } diff --git a/infra/modules/sagemaker_deployment/outputs.tf b/infra/modules/sagemaker_deployment/outputs.tf index a27a0ee..8bde58d 100644 --- a/infra/modules/sagemaker_deployment/outputs.tf +++ b/infra/modules/sagemaker_deployment/outputs.tf @@ -16,5 +16,5 @@ output "scale_in_to_zero_policy_arn" { output "scale_in_to_zero_based_on_backlog_arn" { description = "ARN of the autoscaling policy to scale in to zero for backlog queries when 0 for x minutes" - value = aws_appautoscaling_policy.scale_in_to_zero_based_on_backlog.arn + value = aws_appautoscaling_policy.scale_in_to_zero_based_on_backlog.arn } \ No newline at end of file diff --git a/infra/modules/sagemaker_deployment/variables.tf b/infra/modules/sagemaker_deployment/variables.tf index 5bc1988..6b91514 100644 --- a/infra/modules/sagemaker_deployment/variables.tf +++ b/infra/modules/sagemaker_deployment/variables.tf @@ -90,8 +90,8 @@ variable "scale_up_cooldown" { } variable "scale_in_to_zero_cooldown" { - type = number - description = "Cooldown period for scale down" + type = number + description = "Cooldown period for scale down" } @@ -113,7 +113,7 @@ variable "alarms" { } variable "log_group_name" { - type = string + type = string description = "log group name, i.e. gpt-neo-125m..." - default = "" + default = "" } \ No newline at end of file diff --git a/infra/modules/sagemaker_init/domain/main.tf b/infra/modules/sagemaker_init/domain/main.tf index a245b88..5e7fb0d 100644 --- a/infra/modules/sagemaker_init/domain/main.tf +++ b/infra/modules/sagemaker_init/domain/main.tf @@ -1,8 +1,8 @@ resource "aws_sagemaker_domain" "sagemaker" { - domain_name = var.domain_name - auth_mode = "IAM" - vpc_id = var.vpc_id - subnet_ids = var.subnet_ids + domain_name = var.domain_name + auth_mode = "IAM" + vpc_id = var.vpc_id + subnet_ids = var.subnet_ids app_network_access_type = "VpcOnly" default_user_settings { diff --git a/infra/modules/sagemaker_init/domain/variables.tf b/infra/modules/sagemaker_init/domain/variables.tf index 79997d6..2105345 100644 --- a/infra/modules/sagemaker_init/domain/variables.tf +++ b/infra/modules/sagemaker_init/domain/variables.tf @@ -6,15 +6,15 @@ variable "domain_name" { variable "vpc_id" { type = string - description = "VPC ID" + description = "VPC ID" } variable "subnet_ids" { type = any - description = "subnet ids" + description = "subnet ids" } variable "execution_role_arn" { type = string - description = "The execution role" + description = "The execution role" } \ No newline at end of file diff --git a/infra/modules/sagemaker_init/iam/main.tf b/infra/modules/sagemaker_init/iam/main.tf index 37577b7..e18e5ee 100644 --- a/infra/modules/sagemaker_init/iam/main.tf +++ b/infra/modules/sagemaker_init/iam/main.tf @@ -93,7 +93,7 @@ data "aws_iam_policy_document" "sagemaker_inference_policy_document" { ] resources = ["*"] } - statement { + statement { actions = [ "application-autoscaling:DeleteScalingPolicy", "application-autoscaling:DeleteScheduledAction", @@ -106,7 +106,7 @@ data "aws_iam_policy_document" "sagemaker_inference_policy_document" { "application-autoscaling:PutScheduledAction", "application-autoscaling:RegisterScalableTarget", ] - resources = ["*",] + resources = ["*", ] } statement { @@ -124,7 +124,7 @@ data "aws_iam_policy_document" "sagemaker_inference_policy_document" { "ec2:DescribeVpcEndpoints", "ec2:DescribeVpcs", ] - resources = ["*",] + resources = ["*", ] } statement { @@ -141,7 +141,7 @@ data "aws_iam_policy_document" "sagemaker_inference_policy_document" { "logs:PutResourcePolicy", "logs:UpdateLogDelivery", ] - resources = ["*",] + resources = ["*", ] } } @@ -172,7 +172,7 @@ data "aws_iam_policy_document" "lambda_assume_role_policy" { statement { actions = ["sts:AssumeRole"] principals { - type = "Service" + type = "Service" identifiers = ["lambda.amazonaws.com"] } } @@ -199,7 +199,7 @@ data "aws_iam_policy_document" "lambda_execution_policy" { "logs:PutLogEvents" ] resources = [ - "arn:aws:logs:eu-west-2:${var.account_id}:log-group:*" + "arn:aws:logs:eu-west-2:${var.account_id}:log-group:*" ] } } @@ -210,33 +210,33 @@ data "aws_iam_policy_document" "cloudwatch_log_invoke_policy" { "lambda:InvokeFunction" ] resources = [ - var.lambda_function_arn + var.lambda_function_arn ] } } resource "aws_iam_policy" "lambda_execution_policy" { - name = "${var.prefix}-lambda-execution-policy" + name = "${var.prefix}-lambda-execution-policy" policy = data.aws_iam_policy_document.lambda_execution_policy.json } resource "aws_iam_policy" "cloudwatch_log_invoke_policy" { - name = "${var.prefix}-cloudwatch-log-invoke-policy" + name = "${var.prefix}-cloudwatch-log-invoke-policy" policy = data.aws_iam_policy_document.cloudwatch_log_invoke_policy.json } resource "aws_iam_role" "lambda_execution_role" { - name = "${var.prefix}-lambda-execution-role" + name = "${var.prefix}-lambda-execution-role" assume_role_policy = data.aws_iam_policy_document.lambda_assume_role_policy.json } resource "aws_iam_role_policy_attachment" "attach_lambda_execution_policy" { - role = aws_iam_role.lambda_execution_role.name + role = aws_iam_role.lambda_execution_role.name policy_arn = aws_iam_policy.lambda_execution_policy.arn } resource "aws_iam_role_policy_attachment" "attach_cloudwatch_log_invoke_policy" { - role = aws_iam_role.lambda_execution_role.name + role = aws_iam_role.lambda_execution_role.name policy_arn = aws_iam_policy.cloudwatch_log_invoke_policy.arn } diff --git a/infra/modules/sagemaker_init/iam/outputs.tf b/infra/modules/sagemaker_init/iam/outputs.tf index 03b4d37..1161cf9 100644 --- a/infra/modules/sagemaker_init/iam/outputs.tf +++ b/infra/modules/sagemaker_init/iam/outputs.tf @@ -1,16 +1,16 @@ output "execution_role" { - description = "ARN of the sagemaker execution role" - value = aws_iam_role.sagemaker.arn + description = "ARN of the sagemaker execution role" + value = aws_iam_role.sagemaker.arn } output "inference_role" { - description = "ARN of the sagemaker inference role" - value = aws_iam_role.inference_role.arn + description = "ARN of the sagemaker inference role" + value = aws_iam_role.inference_role.arn } output "default_sagemaker_bucket" { - description = "Default sagemaker bucket data object" - value = data.aws_s3_bucket.sagemaker_default_bucket + description = "Default sagemaker bucket data object" + value = data.aws_s3_bucket.sagemaker_default_bucket } # Output for Lambda Execution Role ARN diff --git a/infra/modules/sagemaker_init/iam/variables.tf b/infra/modules/sagemaker_init/iam/variables.tf index 0b3517b..e308f24 100644 --- a/infra/modules/sagemaker_init/iam/variables.tf +++ b/infra/modules/sagemaker_init/iam/variables.tf @@ -9,22 +9,20 @@ variable "sagemaker_default_bucket_name" { } variable "aws_s3_bucket_notebook" { - type = any + type = any description = "S3 bucket for notebooks" } - variable "s3_bucket_arn" { - type = string + type = string description = "arn of the s3 bucket for log storage" } variable "account_id" { - type = string + type = string description = "account ID for the AWS account, dyanmic" } variable "lambda_function_arn" { type = string - - +} diff --git a/infra/modules/sagemaker_init/security/main.tf b/infra/modules/sagemaker_init/security/main.tf index ba25609..de6fe3e 100644 --- a/infra/modules/sagemaker_init/security/main.tf +++ b/infra/modules/sagemaker_init/security/main.tf @@ -14,21 +14,21 @@ resource "aws_security_group" "notebooks_endpoints" { } resource "aws_security_group_rule" "notebooks_endpoint_ingress_sagemaker" { - description = "endpoint-ingress-from-datasets-vpc" - security_group_id = aws_security_group.notebooks_endpoints.id - cidr_blocks = var.cidr_blocks - type = "ingress" - from_port = "0" - to_port = "65535" - protocol = "tcp" + description = "endpoint-ingress-from-datasets-vpc" + security_group_id = aws_security_group.notebooks_endpoints.id + cidr_blocks = var.cidr_blocks + type = "ingress" + from_port = "0" + to_port = "65535" + protocol = "tcp" } resource "aws_security_group_rule" "notebooks_endpoint_egress_sagemaker" { - description = "endpoint-egress-from-datasets-vpc" - security_group_id = aws_security_group.notebooks_endpoints.id - cidr_blocks = var.cidr_blocks - type = "egress" - from_port = "0" - to_port = "65535" - protocol = "tcp" + description = "endpoint-egress-from-datasets-vpc" + security_group_id = aws_security_group.notebooks_endpoints.id + cidr_blocks = var.cidr_blocks + type = "egress" + from_port = "0" + to_port = "65535" + protocol = "tcp" } diff --git a/infra/modules/sagemaker_init/security/outputs.tf b/infra/modules/sagemaker_init/security/outputs.tf index 7b4feb9..8a9ae39 100644 --- a/infra/modules/sagemaker_init/security/outputs.tf +++ b/infra/modules/sagemaker_init/security/outputs.tf @@ -1,4 +1,4 @@ output "security_group_id" { - description = "ID of the SG for the SageMaker endpoints" - value = aws_security_group.notebooks_endpoints.id + description = "ID of the SG for the SageMaker endpoints" + value = aws_security_group.notebooks_endpoints.id } \ No newline at end of file diff --git a/infra/modules/sagemaker_init/security/variables.tf b/infra/modules/sagemaker_init/security/variables.tf index 05bf588..73f5439 100644 --- a/infra/modules/sagemaker_init/security/variables.tf +++ b/infra/modules/sagemaker_init/security/variables.tf @@ -1,14 +1,14 @@ variable "vpc_id" { - type = string - description = "VPC ID where SG will be created" + type = string + description = "VPC ID where SG will be created" } variable "prefix" { - type = string - description = "Prefix for naming the SGs" + type = string + description = "Prefix for naming the SGs" } variable "cidr_blocks" { - type = any - description = "List of CIDR blocks for SG rules" + type = any + description = "List of CIDR blocks for SG rules" } \ No newline at end of file diff --git a/infra/modules/sagemaker_model/main.tf b/infra/modules/sagemaker_model/main.tf index 083b8cf..a3ad58f 100644 --- a/infra/modules/sagemaker_model/main.tf +++ b/infra/modules/sagemaker_model/main.tf @@ -1,15 +1,15 @@ resource "aws_sagemaker_model" "sagemaker_model" { - name = var.model_name - execution_role_arn = var.execution_role_arn + name = var.model_name + execution_role_arn = var.execution_role_arn - primary_container { - image = var.container_image - model_data_url = var.model_data_url - environment = var.environment - } + primary_container { + image = var.container_image + model_data_url = var.model_data_url + environment = var.environment + } - vpc_config { - security_group_ids = var.security_group_ids - subnets = var.subnets - } + vpc_config { + security_group_ids = var.security_group_ids + subnets = var.subnets + } } \ No newline at end of file diff --git a/infra/modules/sagemaker_output_mover/main.tf b/infra/modules/sagemaker_output_mover/main.tf index 7b596f2..d7b3191 100644 --- a/infra/modules/sagemaker_output_mover/main.tf +++ b/infra/modules/sagemaker_output_mover/main.tf @@ -1,7 +1,7 @@ resource "aws_iam_role" "iam_for_lambda_s3_move" { name = "iam_for_lambda_s3_move" - assume_role_policy = jsonencode({ + assume_role_policy = jsonencode({ Version = "2012-10-17" Statement = [ { @@ -10,7 +10,7 @@ resource "aws_iam_role" "iam_for_lambda_s3_move" { Principal = { Service = "lambda.amazonaws.com" } - }]}) + }] }) } resource "aws_iam_role_policy" "policy_for_lambda_s3_move" { @@ -21,7 +21,7 @@ resource "aws_iam_role_policy" "policy_for_lambda_s3_move" { Version = "2012-10-17" Statement = [ { - Action = ["SNS:Receive", "SNS:Subscribe"] + Action = ["SNS:Receive", "SNS:Subscribe"] Effect = "Allow" Resource = aws_sns_topic.async-sagemaker-success-topic.arn }, @@ -36,7 +36,7 @@ resource "aws_iam_role_policy" "policy_for_lambda_s3_move" { Resource = "${var.s3_bucket_notebooks_arn}*" }, { - Action = ["logs:CreateLogGroup","logs:CreateLogStream","logs:PutLogEvents","logs:DescribeLogStreams"] + Action = ["logs:CreateLogGroup", "logs:CreateLogStream", "logs:PutLogEvents", "logs:DescribeLogStreams"] Effect = "Allow" Resource = "arn:aws:logs:*:*:*" } @@ -51,18 +51,18 @@ data "archive_file" "lambda_payload" { } resource "aws_lambda_function" "lambda_s3_move_output" { - filename = data.archive_file.lambda_payload.output_path + filename = data.archive_file.lambda_payload.output_path source_code_hash = data.archive_file.lambda_payload.output_base64sha256 - function_name = "lambda_s3_move_output" - role = aws_iam_role.iam_for_lambda_s3_move.arn - handler = "s3_move_output.lambda_handler" - runtime = "python3.12" - timeout = 30 - } + function_name = "lambda_s3_move_output" + role = aws_iam_role.iam_for_lambda_s3_move.arn + handler = "s3_move_output.lambda_handler" + runtime = "python3.12" + timeout = 30 +} resource "aws_sns_topic" "async-sagemaker-success-topic" { - name = "async-sagemaker-success-topic" + name = "async-sagemaker-success-topic" policy = data.aws_iam_policy_document.sns_publish_and_read_policy.json } @@ -73,32 +73,32 @@ resource "aws_sns_topic_subscription" "topic_lambda" { } resource "aws_lambda_permission" "with_sns" { - statement_id = "AllowExecutionFromSNS" - action = "lambda:InvokeFunction" - function_name = aws_lambda_function.lambda_s3_move_output.function_name - principal = "sns.amazonaws.com" - source_arn = aws_sns_topic.async-sagemaker-success-topic.arn + statement_id = "AllowExecutionFromSNS" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.lambda_s3_move_output.function_name + principal = "sns.amazonaws.com" + source_arn = aws_sns_topic.async-sagemaker-success-topic.arn } data "aws_iam_policy_document" "sns_publish_and_read_policy" { - statement { - sid = "sns_publish_and_read_policy_1" - actions = ["SNS:Publish"] - effect = "Allow" - principals { - type = "Service" - identifiers = ["sagemaker.amazonaws.com"] - } - resources = ["arn:aws:sns:${var.aws_region}:${var.account_id}:async-sagemaker-success-topic"] + statement { + sid = "sns_publish_and_read_policy_1" + actions = ["SNS:Publish"] + effect = "Allow" + principals { + type = "Service" + identifiers = ["sagemaker.amazonaws.com"] } - statement { - sid = "sns_publish_and_read_policy_2" - actions = ["SNS:Receive","SNS:Subscribe"] - effect = "Allow" - principals { - type = "Service" - identifiers = ["lambda.amazonaws.com"] - } - resources = ["arn:aws:sns:${var.aws_region}:${var.account_id}:async-sagemaker-success-topic"] + resources = ["arn:aws:sns:${var.aws_region}:${var.account_id}:async-sagemaker-success-topic"] + } + statement { + sid = "sns_publish_and_read_policy_2" + actions = ["SNS:Receive", "SNS:Subscribe"] + effect = "Allow" + principals { + type = "Service" + identifiers = ["lambda.amazonaws.com"] } + resources = ["arn:aws:sns:${var.aws_region}:${var.account_id}:async-sagemaker-success-topic"] + } } diff --git a/infra/modules/sns/main.tf b/infra/modules/sns/main.tf index 255f64e..9d075cd 100644 --- a/infra/modules/sns/main.tf +++ b/infra/modules/sns/main.tf @@ -1,44 +1,44 @@ resource "aws_sns_topic" "budget_alert_topic" { - name = "${var.prefix}-budget-alert-topic" - policy = data.aws_iam_policy_document.budget_publish_policy.json + name = "${var.prefix}-budget-alert-topic" + policy = data.aws_iam_policy_document.budget_publish_policy.json } resource "aws_sns_topic" "unauthorised_access_topic" { - name = "${var.prefix}-unauthorised-access-alert-topic" - policy = data.aws_iam_policy_document.unauthorised_access_policy.json + name = "${var.prefix}-unauthorised-access-alert-topic" + policy = data.aws_iam_policy_document.unauthorised_access_policy.json } resource "aws_sns_topic_subscription" "email_subscription" { - topic_arn = aws_sns_topic.unauthorised_access_topic.arn - protocol = "email" - endpoint = var.notification_email[0] + topic_arn = aws_sns_topic.unauthorised_access_topic.arn + protocol = "email" + endpoint = var.notification_email[0] } data "aws_iam_policy_document" "budget_publish_policy" { - statement { - actions = ["SNS:Publish"] - effect = "Allow" - principals { - type = "Service" - identifiers = [ "budgets.amazonaws.com" ] - } - resources = [ - "arn:aws:sns:eu-west-2:${var.account_id}:${var.prefix}-budget-alert-topic" - ] + statement { + actions = ["SNS:Publish"] + effect = "Allow" + principals { + type = "Service" + identifiers = ["budgets.amazonaws.com"] } + resources = [ + "arn:aws:sns:eu-west-2:${var.account_id}:${var.prefix}-budget-alert-topic" + ] + } } data "aws_iam_policy_document" "unauthorised_access_policy" { - statement { - actions = ["SNS:Publish"] - effect = "Allow" - principals { - type = "Service" - identifiers = [ "cloudwatch.amazonaws.com" ] - } - resources = [ - "arn:aws:sns:eu-west-2:${var.account_id}:${var.prefix}-unauthorised-access-alert-topic" - ] + statement { + actions = ["SNS:Publish"] + effect = "Allow" + principals { + type = "Service" + identifiers = ["cloudwatch.amazonaws.com"] } + resources = [ + "arn:aws:sns:eu-west-2:${var.account_id}:${var.prefix}-unauthorised-access-alert-topic" + ] + } } diff --git a/infra/modules/sns/outputs.tf b/infra/modules/sns/outputs.tf index cfdfbee..9af93f4 100644 --- a/infra/modules/sns/outputs.tf +++ b/infra/modules/sns/outputs.tf @@ -1,7 +1,7 @@ output "sns_topic_arn" { - value = aws_sns_topic.budget_alert_topic.arn + value = aws_sns_topic.budget_alert_topic.arn } output "unauthorised_access_sns_topic_arn" { - value = aws_sns_topic.unauthorised_access_topic.arn + value = aws_sns_topic.unauthorised_access_topic.arn } \ No newline at end of file diff --git a/infra/modules/sns/variables.tf b/infra/modules/sns/variables.tf index 0c56689..505dbc6 100644 --- a/infra/modules/sns/variables.tf +++ b/infra/modules/sns/variables.tf @@ -1,14 +1,14 @@ variable "prefix" { - type = string - description = "Prefix for SNS topic name" + type = string + description = "Prefix for SNS topic name" } variable "account_id" { - type = string - description = "account ID for the SNS topic" + type = string + description = "account ID for the SNS topic" } variable "notification_email" { - type = list(string) - description = "Emails for SNS subscription" + type = list(string) + description = "Emails for SNS subscription" } \ No newline at end of file diff --git a/infra/sagemaker.tf b/infra/sagemaker.tf index a4f81fc..bbb7bf7 100644 --- a/infra/sagemaker.tf +++ b/infra/sagemaker.tf @@ -9,18 +9,18 @@ module "sagemaker_domain" { # IAM Roles and Policies for SageMaker module "iam" { - source = "./modules/sagemaker_init/iam" - prefix = "sagemaker" - sagemaker_default_bucket_name = "${var.sagemaker_default_bucket}" - aws_s3_bucket_notebook = aws_s3_bucket.notebooks - account_id = data.aws_caller_identity.aws_caller_identity.account_id - s3_bucket_arn = module.s3.s3_bucket_arn - lambda_function_arn = module.lambda_logs.lambda_function_arn + source = "./modules/sagemaker_init/iam" + prefix = "sagemaker" + sagemaker_default_bucket_name = var.sagemaker_default_bucket + aws_s3_bucket_notebook = aws_s3_bucket.notebooks + account_id = data.aws_caller_identity.aws_caller_identity.account_id + s3_bucket_arn = module.s3.s3_bucket_arn + lambda_function_arn = module.lambda_logs.lambda_function_arn } module "s3" { - source ="./modules/s3" + source = "./modules/s3" prefix = "sagemaker-logs" } @@ -50,8 +50,8 @@ resource "aws_security_group" "notebooks_endpoints" { resource "aws_security_group_rule" "notebooks_endpoint_ingress_sagemaker" { description = "endpoint-ingress-from-datasets-vpc" - security_group_id = aws_security_group.notebooks_endpoints.id - cidr_blocks = [aws_vpc.notebooks.cidr_block] + security_group_id = aws_security_group.notebooks_endpoints.id + cidr_blocks = [aws_vpc.notebooks.cidr_block] type = "ingress" from_port = "0" @@ -62,8 +62,8 @@ resource "aws_security_group_rule" "notebooks_endpoint_ingress_sagemaker" { resource "aws_security_group_rule" "notebooks_endpoint_egress_sagemaker" { description = "endpoint-ingress-from-datasets-vpc" - security_group_id = aws_security_group.notebooks_endpoints.id - cidr_blocks = [aws_vpc.notebooks.cidr_block] + security_group_id = aws_security_group.notebooks_endpoints.id + cidr_blocks = [aws_vpc.notebooks.cidr_block] type = "egress" from_port = "0" @@ -103,9 +103,9 @@ output "default_sagemaker_bucket" { # Cost monitoring module "cost_monitoring_dashboard" { - source = "./modules/cost_monitoring/sagemaker" - dashboard_name = "aws-cost-monitoring-dashboard" - services_to_monitor = [ + source = "./modules/cost_monitoring/sagemaker" + dashboard_name = "aws-cost-monitoring-dashboard" + services_to_monitor = [ "AmazonSageMaker", "AmazonEC2", "AmazonS3" @@ -113,16 +113,16 @@ module "cost_monitoring_dashboard" { } module "sns" { - source = "./modules/sns" - prefix = "data-workspace-sagemaker" - account_id = data.aws_caller_identity.aws_caller_identity.account_id + source = "./modules/sns" + prefix = "data-workspace-sagemaker" + account_id = data.aws_caller_identity.aws_caller_identity.account_id notification_email = var.sagemaker_budget_emails } module "log_group" { - source = "./modules/logs" - prefix = "data-workspace-sagemaker" - endpoint_names = local.all_endpoint_names + source = "./modules/logs" + prefix = "data-workspace-sagemaker" + endpoint_names = local.all_endpoint_names lambda_function_arn = module.lambda_logs.lambda_function_arn } @@ -136,8 +136,8 @@ output "all_log_group_arns" { module "lambda_logs" { - source = "./modules/lambda" - s3_bucket_name = "sagemaker-logs-centralized" + source = "./modules/lambda" + s3_bucket_name = "sagemaker-logs-centralized" log_delivery_role_arn = module.iam.lambda_execution_role_arn sagemaker_log_group_arns = [ for endpoint_name in local.all_endpoint_names : @@ -147,12 +147,12 @@ module "lambda_logs" { } module "budgets" { - source = "./modules/cost_monitoring/budgets" - budget_limit = "1000" + source = "./modules/cost_monitoring/budgets" + budget_limit = "1000" cost_filter_service = "Amazon SageMaker" - budget_name = "sagemaker-budget" - sns_topic_arn = module.sns.sns_topic_arn - notification_email = var.sagemaker_budget_emails + budget_name = "sagemaker-budget" + sns_topic_arn = module.sns.sns_topic_arn + notification_email = var.sagemaker_budget_emails } diff --git a/infra/sagemaker_llm_resources.tf b/infra/sagemaker_llm_resources.tf index 8c5fe93..137ce26 100644 --- a/infra/sagemaker_llm_resources.tf +++ b/infra/sagemaker_llm_resources.tf @@ -3,12 +3,12 @@ ################################################################################################################# module "gpt_neo_125_deployment" { - source = "./modules/sagemaker_deployment" - model_name = "gpt-neo-125m" + source = "./modules/sagemaker_deployment" + model_name = "gpt-neo-125m" sns_success_topic_arn = module.sagemaker_output_mover.sns_success_topic_arn - execution_role_arn = module.iam.inference_role - container_image = var.hugging_face_model_image - model_data_url = "${var.sagemaker_models_folder}/gpt-neo-125m.tar.gz" + execution_role_arn = module.iam.inference_role + container_image = var.hugging_face_model_image + model_data_url = "${var.sagemaker_models_folder}/gpt-neo-125m.tar.gz" environment = { "HF_MODEL_ID" = "/opt/ml/model/" "SM_NUM_GPUS" = 1 @@ -180,7 +180,7 @@ module "gpt_neo_125_deployment" { datapoints_to_alarm = 1 period = 300 statistic = "Sum" - alarm_actions = [module.sns.unauthorised_access_sns_topic_arn] + alarm_actions = [module.sns.unauthorised_access_sns_topic_arn] } ] @@ -192,12 +192,12 @@ module "gpt_neo_125_deployment" { ################################################################################################################# module "llama_3_2_1b_deployment" { - source = "./modules/sagemaker_deployment" - model_name = "Llama-3-2-1B" + source = "./modules/sagemaker_deployment" + model_name = "Llama-3-2-1B" sns_success_topic_arn = module.sagemaker_output_mover.sns_success_topic_arn - execution_role_arn = module.iam.inference_role - container_image = var.hugging_face_model_image - model_data_url = "${var.sagemaker_models_folder}/Llama-3.2-1B.tar.gz" + execution_role_arn = module.iam.inference_role + container_image = var.hugging_face_model_image + model_data_url = "${var.sagemaker_models_folder}/Llama-3.2-1B.tar.gz" environment = { "HF_MODEL_ID" = "/opt/ml/model/" "SM_NUM_GPUS" = 1 @@ -369,7 +369,7 @@ module "llama_3_2_1b_deployment" { datapoints_to_alarm = 1 period = 300 statistic = "Sum" - alarm_actions = [module.sns.unauthorised_access_sns_topic_arn] + alarm_actions = [module.sns.unauthorised_access_sns_topic_arn] } ] }