Skip to content

Commit

Permalink
Databricks cluster customisation: Add singleNode mode and fixed node …
Browse files Browse the repository at this point in the history
…options (#319)

* Add singleNode mode and fixed node options

* Allow runtime configuration

* Add doc link to config.sample.yaml
  • Loading branch information
jjgriff93 authored Aug 24, 2023
1 parent d8188f2 commit abeffd4
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 14 deletions.
5 changes: 3 additions & 2 deletions config.sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ transform: # Optional
- Silver
- Gold
spark_config: # Optional
spark.databricks.cluster.profile: singleNode # Configuration for a Single Node cluster
spark.master: local[*]
spark.configuration.key: value
databricks_secrets:
cog_services_key: my-super-secret-key # On Github, this wil lbe a token replacement
databricks_libraries: # Optional
Expand All @@ -52,6 +51,8 @@ transform: # Optional
local_disk_min_size: 600
category: "Memory Optimised"
autotermination_minutes: 120
runtime_engine: STANDARD # Optional: STANDARD or PHOTON (https://learn.microsoft.com/en-us/azure/databricks/runtime/photon)
num_of_workers: 0 # Set to 0 for single node mode or any number for fixed cluster (ignored if autoscale also defined)
autoscale:
min_workers: 1
max_workers: 3
Expand Down
36 changes: 25 additions & 11 deletions infrastructure/transform/databricks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -57,22 +57,31 @@ data "databricks_spark_version" "latest" {
}

data "databricks_node_type" "node_type" {
min_memory_gb = var.transform.databricks_cluster.node_type.min_memory_gb
min_cores = var.transform.databricks_cluster.node_type.min_cores
local_disk_min_size = var.transform.databricks_cluster.node_type.local_disk_min_size
category = var.transform.databricks_cluster.node_type.category
min_memory_gb = var.transform.databricks_cluster.node_type.min_memory_gb
min_cores = var.transform.databricks_cluster.node_type.min_cores
local_disk_min_size = var.transform.databricks_cluster.node_type.local_disk_min_size
category = var.transform.databricks_cluster.node_type.category
photon_worker_capable = var.transform.databricks_cluster.runtime_engine == "PHOTON"
photon_driver_capable = var.transform.databricks_cluster.runtime_engine == "PHOTON"

depends_on = [time_sleep.wait_for_databricks_network]
}

resource "databricks_cluster" "cluster" {
cluster_name = "Fixed Job Cluster"
cluster_name = "FlowEHR Cluster"
spark_version = data.databricks_spark_version.latest.id
node_type_id = data.databricks_node_type.node_type.id
autotermination_minutes = var.transform.databricks_cluster.autotermination_minutes
autoscale {
min_workers = var.transform.databricks_cluster.autoscale.min_workers
max_workers = var.transform.databricks_cluster.autoscale.max_workers
num_workers = !local.autoscale_cluster ? var.transform.databricks_cluster.num_of_workers : null
runtime_engine = var.transform.databricks_cluster.runtime_engine

dynamic "autoscale" {
for_each = local.autoscale_cluster ? [1] : []

content {
min_workers = var.transform.databricks_cluster.autoscale.min_workers
max_workers = var.transform.databricks_cluster.autoscale.max_workers
}
}

spark_conf = merge(
Expand Down Expand Up @@ -112,7 +121,12 @@ resource "databricks_cluster" "cluster" {
"spark.secret.${secret_name}" => "{{secrets/${databricks_secret_scope.secrets.name}/${secret_name}}}"
}),
# Any values set in the config
var.transform.spark_config
var.transform.spark_config,
# Special config if in single node configuration
local.single_node ? tomap({
"spark.databricks.cluster.profile" : "singleNode"
"spark.master" : "local[*]"
}) : tomap({})
)

dynamic "library" {
Expand Down Expand Up @@ -162,9 +176,9 @@ resource "databricks_cluster" "cluster" {
"APPLICATIONINSIGHTS_CONNECTION_STRING" = azurerm_application_insights.transform.connection_string
}

custom_tags = {
custom_tags = local.single_node ? {
"ResourceClass" = "SingleNode"
}
} : {}

depends_on = [time_sleep.wait_for_databricks_network]
}
Expand Down
2 changes: 2 additions & 0 deletions infrastructure/transform/locals.tf
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ locals {
adb_linked_service_name = "ADBLinkedServiceViaMSI"
dbfs_storage_account_name = "dbfs${var.naming_suffix_truncated}"
datalake_enabled = try(var.transform.datalake, null) != null
autoscale_cluster = var.transform.databricks_cluster.autoscale != null
single_node = !local.autoscale_cluster && var.transform.databricks_cluster.num_of_workers == 0

# IPs required for Databricks UDRs
# Built from https://learn.microsoft.com/en-us/azure/databricks/resources/supported-regions#--control-plane-nat-webapp-and-extended-infrastructure-ip-addresses-and-domains
Expand Down
10 changes: 9 additions & 1 deletion infrastructure/transform/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,14 @@ variable "transform" {
url = string,
sha = optional(string, "")
})), [])

datalake = optional(object({
zones = set(string)
}))

spark_config = optional(map(string), {})
databricks_secrets = optional(map(string), {})

databricks_libraries = optional(object({
jar = optional(list(string), []),
pypi = optional(list(object({
Expand All @@ -125,19 +128,24 @@ variable "transform" {
exclusions = optional(list(string), [])
})), [])
}), {}),

databricks_cluster = optional(object({
node_type = optional(object({
min_memory_gb = optional(number, 0),
min_cores = optional(number, 0),
local_disk_min_size = optional(number, 0),
category = optional(string, "")
}), {}),

autotermination_minutes = optional(number, 0),
init_scripts = optional(list(string), [])
runtime_engine = optional(string, "STANDARD")
num_of_workers = optional(number, 0)

autoscale = optional(object({
min_workers = optional(number, 0)
max_workers = optional(number, 0)
}), {})
}), null)
}), {})
})
default = {
Expand Down

0 comments on commit abeffd4

Please sign in to comment.