Skip to content

Commit

Permalink
[#77] Optionally install Argo Workflows to GKE
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitriy Karbyshev committed Apr 3, 2021
1 parent 7a72e39 commit 5c8100c
Show file tree
Hide file tree
Showing 19 changed files with 701 additions and 173 deletions.
2 changes: 1 addition & 1 deletion terraform/env_types/gcp/gke/gke_create/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ module "gke_cluster" {
zone = var.zone
allowed_ips = var.allowed_ips
nodes_sa = module.iam.service_account
node_pools = var.node_pools
node_pools = try(merge(var.node_pools, var.argo.node_pool), var.node_pools)
pods_cidr = var.pods_cidr
service_cidr = var.service_cidr
location = var.region
Expand Down
36 changes: 36 additions & 0 deletions terraform/env_types/gcp/gke/gke_create/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,39 @@ variable "bastion_labels" {
default = {}
description = "Bastion host GCP labels"
}

variable "argo" {
type = object({
enabled = bool
namespace = string
artifact_bucket = string
node_pool = any
})
default = {
enabled = false
namespace = "argo"
artifact_bucket = ""
node_pool = {
argo-workflows = {
init_node_count = 0
min_node_count = 0
max_node_count = 1
preemptible = true
machine_type = "n1-standard-2"
disk_size_gb = 40
labels = {
machine_type = "n1-standard-2"
mode = "argo-workflows"
}
taints = [
{
key = "dedicated"
effect = "NO_SCHEDULE"
value = "argo"
}
]
}
}
}
description = "Argo configuration"
}
9 changes: 5 additions & 4 deletions terraform/env_types/gcp/gke/k8s_setup/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ locals {

is_lb_an_ip = length(regexall("^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$", module.nginx_ingress_prereqs.helm_values["controller.service.loadBalancerIP"])) > 0

databases = [
argo_db = var.argo.enabled ? "argo" : ""

databases = compact(concat([
"airflow",
"mlflow",
"jupyterhub",
"vault",
var.odahu_database,
"grafana",
"argo"
]
"grafana"
], [local.argo_db]))
}
22 changes: 17 additions & 5 deletions terraform/env_types/gcp/gke/k8s_setup/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ module "odahuflow_prereqs" {
kms_key_id = var.kms_key_id
data_bucket = var.data_bucket
log_bucket = var.log_bucket
argo_artifact_bucket = var.argo.artifact_bucket
log_expiration_days = var.log_expiration_days
uniform_bucket_level_access = var.uniform_bucket_level_access
fluentd_resources = var.fluentd_resources
Expand Down Expand Up @@ -232,11 +233,21 @@ module "airflow" {
depends_on = [module.airflow_prereqs, module.postgresql]
}

module "argo-workflows" {
source = "../../../../modules/k8s/argo"
module "argo_workflow_prereqs" {
source = "../../../../modules/k8s/argo/prereqs/gke"
cluster_name = var.cluster_name
bucket = module.odahuflow_prereqs.argo_artifact_bucket_name
kms_key_id = var.kms_key_id
project_id = var.project_id

depends_on = [module.postgresql]
}

module "argo_workflow" {
source = "../../../../modules/k8s/argo/main"
cluster_domain = var.cluster_domain_name
namespace = var.argo_namespace
configuration = var.argo
configuration = merge(var.argo, { artifact_bucket = module.odahuflow_prereqs.odahu_data_bucket_name })
workflows_sa = module.argo_workflow_prereqs.argo_workflows_sa
tls_secret_crt = var.tls_crt
tls_secret_key = var.tls_key
pgsql = {
Expand All @@ -248,7 +259,7 @@ module "argo-workflows" {
secret_namespace = module.postgresql.pgsql_credentials["argo"].namespace
secret_name = module.postgresql.pgsql_credentials["argo"].secret
}
depends_on = [module.postgresql]
depends_on = [module.postgresql, module.argo_workflow_prereqs]
}

module "storage-syncer" {
Expand Down Expand Up @@ -383,6 +394,7 @@ module "odahuflow_helm" {
extra_external_urls = concat(
module.jupyterhub.external_url,
module.airflow.external_url,
module.argo_workflow.external_url,
module.elasticsearch.external_url,
module.odahuflow_prereqs.extra_external_urls
)
Expand Down
39 changes: 31 additions & 8 deletions terraform/env_types/gcp/gke/k8s_setup/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,6 @@ variable "airflow_namespace" {
description = "Namespace for Airflow"
}

variable "argo_namespace" {
type = string
default = "argo"
description = "Namespace for Argo"
}

variable "fluentd_namespace" {
type = string
default = "fluentd"
Expand All @@ -152,6 +146,7 @@ variable "db_namespace" {

variable "kms_key_id" {
type = string
default = ""
description = "The ID of a Cloud KMS key that will be used to encrypt cluster disks"
}

Expand Down Expand Up @@ -390,10 +385,38 @@ variable "airflow" {
########################
variable "argo" {
type = object({
enabled = bool
enabled = bool
namespace = string
workflows_namespace = string
artifact_bucket = string
node_pool = any
})
default = {
enabled = true
enabled = false
namespace = "argo"
workflows_namespace = "argo-workflows"
artifact_bucket = ""
node_pool = {
argo-workflows = {
init_node_count = 0
min_node_count = 0
max_node_count = 1
preemptible = true
machine_type = "n1-standard-2"
disk_size_gb = 40
labels = {
machine_type = "n1-standard-2"
mode = "argo-workflows"
}
taints = [
{
key = "dedicated"
effect = "NO_SCHEDULE"
value = "argo"
}
]
}
}
}
description = "Argo configuration"
}
Expand Down
110 changes: 110 additions & 0 deletions terraform/modules/gcp/networking/natbox/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
data "google_compute_subnetwork" "dmz_subnet" {
name = var.dmz_subnet
}

data "google_compute_subnetwork" "gke_subnet" {
name = var.gke_subnet
}

locals {
default_labels = {
"project" = "odahu-flow"
"cluster" = var.cluster_name
}

dmz_natbox_labels = merge(local.default_labels, var.dmz_natbox_labels)
dmz_natbox_gcp_tags = length(var.dmz_natbox_gcp_tags) == 0 ? ["${var.cluster_name}-dmz-natbox"] : var.dmz_natbox_gcp_tags

gke_dmz_peering_enabled = data.google_compute_subnetwork.gke_subnet.network != data.google_compute_subnetwork.dmz_subnet.network
}

data "google_compute_instance" "dmz_natbox" {
count = var.dmz_natbox_enabled ? 1 : 0
name = "wpm-odahu01-dmz-natbox"
zone = "europe-west1-b"
}

#resource "google_compute_instance" "dmz_natbox" {
# count = var.dmz_natbox_enabled ? 1 : 0
# name = "${var.cluster_name}-${var.dmz_natbox_hostname}"
# machine_type = var.dmz_natbox_machine_type
# zone = var.gcp_zone
# project = var.gcp_project_id
# allow_stopping_for_update = true
# can_ip_forward = true
#
# boot_disk {
# initialize_params {
# image = "ubuntu-1804-lts"
# }
# }
#
# tags = local.dmz_natbox_gcp_tags
# labels = local.dmz_natbox_labels
#
# network_interface {
# subnetwork = var.dmz_subnet
# subnetwork_project = var.gcp_project_id
# }
#
# metadata = {
# ssh-keys = "${var.ssh_user}:${var.ssh_public_key}"
# }
#
# metadata_startup_script = <<SCRIPT
# sed -i '/AllowAgentForwarding/s/^#//g' /etc/ssh/sshd_config && \
# systemctl restart ssh.service
# sed -i '/net.ipv4.ip_forward/s/^#//g' /etc/sysctl.conf && \
# systemctl restart systemd-sysctl.service
#
# iptables -t nat -A POSTROUTING -j MASQUERADE -s ${data.google_compute_subnetwork.gke_subnet.ip_cidr_range} -d ${var.dmz_dest_cidr}
# iptables -t nat -A POSTROUTING -j MASQUERADE -s ${var.pods_cidr} -d ${var.dmz_dest_cidr}
# SCRIPT
#
# service_account {
# scopes = ["userinfo-email", "compute-ro", "storage-ro"]
# }
#}

resource "google_compute_route" "gke_to_dmz" {
# count = var.dmz_natbox_enabled ? 1 : 0
name = substr("${var.cluster_name}-gke-to-dmz", 0, 62)
network = data.google_compute_subnetwork.gke_subnet.network
dest_range = var.dmz_dest_cidr
priority = 800
tags = concat(var.gke_gcp_tags, var.bastion_gcp_tags)

# next_hop_instance = google_compute_instance.dmz_natbox[0].name
# next_hop_instance_zone = google_compute_instance.dmz_natbox[0].zone
next_hop_instance = data.google_compute_instance.dmz_natbox[0].name
next_hop_instance_zone = data.google_compute_instance.dmz_natbox[0].zone
}

# Firewall rule to allow traffic between nodes in GKE subnet and NATbox host
#resource "google_compute_firewall" "gke_to_dmz" {
# count = var.dmz_natbox_enabled ? 1 : 0
# project = var.gcp_project_id
# name = substr("${var.cluster_name}-gke-to-dmz", 0, 62)
# network = data.google_compute_subnetwork.gke_subnet.network
# source_ranges = [
# var.pods_cidr,
# data.google_compute_subnetwork.gke_subnet.ip_cidr_range,
# data.google_compute_subnetwork.dmz_subnet.ip_cidr_range
# ]
#
# target_tags = concat(local.dmz_natbox_gcp_tags, var.gke_gcp_tags, var.bastion_gcp_tags)
#
# allow {
# protocol = "icmp"
# }
#
# allow {
# protocol = "tcp"
# }
#
# allow {
# protocol = "udp"
# }
#
# depends_on = [google_compute_route.gke_to_dmz[0]]
#}
76 changes: 76 additions & 0 deletions terraform/modules/gcp/networking/natbox/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
variable "cluster_name" {
default = "odahuflow"
description = "Odahuflow cluster name"
}

variable "gcp_project_id" {
description = "Target GCP project ID"
}

variable "gcp_zone" {
description = "Target GCP zone"
}

variable "ssh_user" {
default = "ubuntu"
description = "default ssh user"
}

variable "ssh_public_key" {
description = "SSH public key"
}

variable "gke_subnet" {
description = "Name of GKE nodes subnet in `gke_network` VPC"
}

variable "pods_cidr" {
description = "GKE pods CIDR"
}

variable "dmz_dest_cidr" {
description = "Network CIDR that should be reachable through natbox host"
}

variable "dmz_subnet" {
description = "Name of the DMZ subnet (routed to internal resources)"
}

variable "dmz_natbox_enabled" {
default = false
type = bool
description = "Flag to install natbox host or not"
}

variable "dmz_natbox_machine_type" {
default = "custom-2-4096"
}

variable "dmz_natbox_hostname" {
default = "dmz-natbox"
description = "DMZ natbox host name"
}

variable "gke_gcp_tags" {
default = []
description = "GKE nodes GCP network tags"
type = list(string)
}

variable "bastion_gcp_tags" {
default = []
description = "Bastion host GCP network tags"
type = list(string)
}

variable "dmz_natbox_gcp_tags" {
default = []
description = "DMZ natbox host GCP network tags"
type = list(string)
}

variable "dmz_natbox_labels" {
default = {}
description = "DMZ natbox host GCP labels"
type = map(string)
}
Loading

0 comments on commit 5c8100c

Please sign in to comment.