Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFC] Update python-default template to use presets: catalog/schema #2041

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"default": "default",
"pattern": "^\\w+$",
"pattern_match_failure_message": "Invalid schema name.",
"description": "\nPlease provide an initial schema during development.\ndefault_schema",
"description": "\nPlease provide a default schema during development.\ndefault_schema",
"order": 5
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"project_name": {
"type": "string",
"default": "my_project",
"description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project",
"description": "\nPlease provide a unique name for this project.\nproject_name",
"order": 1,
"pattern": "^[A-Za-z0-9_]+$",
"pattern_match_failure_message": "Name must consist of letters, numbers, and underscores."
Expand All @@ -13,23 +13,55 @@
"type": "string",
"default": "yes",
"enum": ["yes", "no"],
"description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'",
"description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?",
"order": 2
},
"include_dlt": {
"type": "string",
"default": "yes",
"enum": ["yes", "no"],
"description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'",
"description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?",
"order": 3
},
"include_python": {
"type": "string",
"default": "yes",
"enum": ["yes", "no"],
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
"description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?",
"order": 4
},
"default_catalog": {
"type": "string",
"default": "{{default_catalog}}",
"pattern": "^\\w*$",
"pattern_match_failure_message": "Invalid catalog name.",
"description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog",
"order": 5
},
"personal_schemas": {
"type": "string",
"description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas",
"enum": [
"yes, use a schema based on the current user name during development",
"no, use a shared schema during development"
],
"order": 6
},
"shared_schema": {
"skip_prompt_if": {
"properties": {
"personal_schemas": {
"const": "yes, use a schema based on the current user name during development"
}
}
},
"type": "string",
"default": "default",
"pattern": "^\\w+$",
"pattern_match_failure_message": "Invalid schema name.",
"description": "\nPlease provide default schema during development.\ndefault_schema",
"order": 7
}
},
"success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
"success_message": "\nWorkspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml').\nworkspace_host: {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ bundle:
include:
- resources/*.yml

{{- $dev_schema := .shared_schema }}
{{- $prod_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "${workspace.current_user.short_name}"}}
{{- $prod_schema = "default"}}
{{- end}}

targets:
dev:
# The default target uses 'mode: development' to create a development copy.
Expand All @@ -16,6 +23,9 @@ targets:
default: true
workspace:
host: {{workspace_host}}
presets:
catalog: {{.default_catalog}}
schema: {{$dev_schema}}

prod:
mode: production
Expand All @@ -26,5 +36,6 @@ targets:
permissions:
- {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
level: CAN_MANAGE
run_as:
{{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
presets:
catalog: {{.default_catalog}}
schema: {{$prod_schema}}
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,12 @@ resources:
interval: 1
unit: DAYS

{{- if not is_service_principal}}

{{if not is_service_principal -}}
email_notifications:
on_failure:
- {{user_name}}

{{else}}

{{end -}}

tasks:
{{- if eq .include_notebook "yes" }}
- task_key: notebook_task
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,6 @@ resources:
pipelines:
{{.project_name}}_pipeline:
name: {{.project_name}}_pipeline
{{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
# catalog: catalog_name
{{- else}}
catalog: {{default_catalog}}
{{- end}}
target: {{.project_name}}_${bundle.target}
libraries:
- notebook:
path: ../src/dlt_pipeline.ipynb
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,25 @@
},
"outputs": [],
"source": [
{{- if (eq .include_python "yes") }}
{{- if (eq .include_python "yes") }}
"import sys\n",
"sys.path.append('../src')\n",
"from {{.project_name}} import main\n",
"\n",
"main.get_taxis(spark).show(10)"
{{else}}
"spark.range(10)"
{{end -}}
{{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}}
"catalog = dbutils.widgets.get('catalog')\n",
"schema = dbutils.widgets.get('schema')\n",
"spark.sql(f'USE {catalog}.{schema}')\n",
"\n",
"spark.sql('SELECT * FROM example').show(10)"
{{- else}}
"# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
"catalog = dbutils.widgets.get('catalog')\n",
"schema = dbutils.widgets.get('schema')\n",
"spark.sql(f'USE {catalog}.{schema}')\n",
"\n",
"spark.sql('SELECT * FROM example').show(10)"
{{- end}}
]
}
],
Expand All @@ -46,8 +56,63 @@
"notebookMetadata": {
"pythonIndentUnit": 2
},
"notebookName": "ipynb-notebook",
"widgets": {}
"notebookName": "exploration",
"widgets": {
"catalog": {
"currentValue": "{{.default_catalog}}",
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
{{- $dev_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "{{short_name}}"}}
{{- end}}
"schema": {
"currentValue": "{{$dev_schema}}",
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
"kernelspec": {
"display_name": "Python 3",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
"# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
{{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}}
"catalog = dbutils.widgets.get('catalog')\n",
"schema = dbutils.widgets.get('schema')\n",
"spark.sql(f'USE {catalog}.{schema}')"
]
},
{
Expand All @@ -47,9 +50,9 @@
{{- if (eq .include_python "yes") }}
"from {{.project_name}} import main\n",
"\n",
"main.get_taxis(spark).show(10)"
"main.create_example_table()"
{{else}}
"spark.range(10)"
"spark.sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column")"
{{end -}}
]
}
Expand All @@ -62,7 +65,64 @@
"pythonIndentUnit": 2
},
"notebookName": "notebook",
"widgets": {}
"widgets": {
"catalog": {
"currentValue": "{{.default_catalog}}",
"nuid": "3965fc9c-8080-45b1-bee3-f75cef7685b4",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
{{- $dev_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "{{short_name}}"}}
{{- end}}
"schema": {
"currentValue": "{{$dev_schema}}",
"nuid": "6ec0d70f-39bf-4859-a510-02c3e3d59bff",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
}
}
},
"kernelspec": {
"display_name": "Python 3",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,39 @@
from pyspark.sql import SparkSession, DataFrame
import argparse

def get_taxis(spark: SparkSession) -> DataFrame:
return spark.read.table("samples.nyctaxi.trips")


# Create a new Databricks Connect session. If this fails,
# check that you have configured Databricks Connect correctly.
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
def get_spark() -> SparkSession:
"""
Create a new Databricks Connect session. If this fails,
check that you have configured Databricks Connect correctly.
See https://docs.databricks.com/dev-tools/databricks-connect.html.
"""
try:
from databricks.connect import DatabricksSession
return DatabricksSession.builder.getOrCreate()
except ImportError:
return SparkSession.builder.getOrCreate()

def get_taxis(spark: SparkSession) -> DataFrame:
return spark.read.table("samples.nyctaxi.trips")

def create_example_table():
"""
Create a table called 'example' in the default catalog and schema.
"""
get_spark().sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column")

def main():
get_taxis(get_spark()).show(5)
# Set the catalog and schema for the current session.
# In the default template, these parameters are set
# using the 'catalog' and 'schema' presets in databricks.yml.
parser = argparse.ArgumentParser()
parser.add_argument('--catalog', required=True)
parser.add_argument('--schema', required=True)
args, unknown = parser.parse_known_args()
spark = get_spark()
spark.sql(f"USE {args.catalog}.{args.schema}")

create_example_table()

if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"default": "default",
"pattern": "^\\w+$",
"pattern_match_failure_message": "Invalid schema name.",
"description": "\nPlease provide an initial schema during development.\ndefault_schema",
"description": "\nPlease provide a default schema during development.\ndefault_schema",
"order": 5
}
},
Expand Down
Loading