Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an experimental default-sql template #1051

Merged
merged 35 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
c71aa4a
Add a default-sql template
lennartkats-db Dec 11, 2023
7abb3ad
Merge branch 'main' into sql-template
pietern Dec 12, 2023
2ce4ce3
Add missing file
lennartkats-db Dec 13, 2023
9899d16
Only do SQL files for now
lennartkats-db Dec 13, 2023
19e25f5
Merge branch 'sql-template' of github.com:lennartkats-db/cli into sql…
lennartkats-db Dec 13, 2023
bd1b78d
Use a template for VS Code settings
lennartkats-db Dec 13, 2023
7828c35
Add missing files
lennartkats-db Dec 13, 2023
4b7c4a0
Update libs/template/templates/default-sql/template/{{.project_name}}…
lennartkats-db Dec 14, 2023
78d22eb
Update cmd/bundle/init.go
lennartkats-db Dec 14, 2023
a9bdc64
Update libs/template/templates/default-sql/template/{{.project_name}}…
lennartkats-db Dec 14, 2023
ab43e1e
Process feedback
lennartkats-db Dec 15, 2023
c207991
Update description
lennartkats-db Dec 15, 2023
6124fbe
Merge branch 'sql-template' of github.com:lennartkats-db/cli into sql…
lennartkats-db Dec 15, 2023
74df04e
Remove workspace_host_override
lennartkats-db Dec 19, 2023
ace64dd
Merge remote-tracking branch 'databricks/main' into sql-template
lennartkats-db Dec 19, 2023
a502bb1
Add SQL extension configuration
lennartkats-db Jan 13, 2024
3aa501e
Merge remote-tracking branch 'databricks/main' into sql-template
lennartkats-db Jan 25, 2024
cc2f66d
Fix test
lennartkats-db Jan 25, 2024
93d7052
Support customizable catalog/schema
lennartkats-db Jan 25, 2024
9d6fb8c
Avoid using /Shared
lennartkats-db Jan 26, 2024
2becf55
Fix keyword
lennartkats-db Jan 26, 2024
18860ca
Fix parameter
lennartkats-db Jan 26, 2024
a74a19d
Improve setup DX, support non-UC workspaces
lennartkats-db Jan 27, 2024
e5fab2d
Remove from list of templates for now
lennartkats-db Jan 28, 2024
d46d247
Add README.md
lennartkats-db Jan 28, 2024
97ef8fc
Fix test
lennartkats-db Jan 28, 2024
12b77ab
Merge remote-tracking branch 'databricks/main' into sql-template
lennartkats-db Jan 28, 2024
22176b2
Mark as experimental
lennartkats-db Jan 29, 2024
d471666
Restore sql-default template in hidden form
lennartkats-db Feb 19, 2024
bf70431
Copy-editing
lennartkats-db Feb 19, 2024
dcc3cb2
Merge remote-tracking branch 'databricks/main' into sql-template
lennartkats-db Feb 19, 2024
bdbd7f7
Merge remote-tracking branch 'databricks/main' into sql-template
lennartkats-db Feb 19, 2024
1c8f9ff
Incorporate feedback
lennartkats-db Feb 19, 2024
8572b3d
Merge remote-tracking branch 'databricks/main' into sql-template
lennartkats-db Feb 19, 2024
64851e7
Incorporate feedback
lennartkats-db Feb 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions cmd/bundle/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ var nativeTemplates = []nativeTemplate{
name: "default-python",
description: "The default Python template for Notebooks / Delta Live Tables / Workflows",
},
{
name: "default-sql",
description: "The default SQL template for .sql files that run with Databricks SQL",
hidden: true,
},
{
name: "dbt-sql",
description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)",
Expand Down
20 changes: 19 additions & 1 deletion libs/template/renderer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
// Test option combinations
options := []string{"yes", "no"}
isServicePrincipal := false
build := false
catalog := "hive_metastore"
cachedCatalog = &catalog
build := false
for _, includeNotebook := range options {
for _, includeDlt := range options {
for _, includePython := range options {
Expand Down Expand Up @@ -149,6 +149,24 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
defer os.RemoveAll(tempDir)
}

func TestBuiltinSQLTemplateValid(t *testing.T) {
for _, personal_schemas := range []string{"yes", "no"} {
for _, target := range []string{"dev", "prod"} {
for _, isServicePrincipal := range []bool{true, false} {
config := map[string]any{
"project_name": "my_project",
"http_path": "/sql/1.0/warehouses/123abc",
"default_catalog": "users",
"shared_schema": "lennart",
"personal_schemas": personal_schemas,
}
build := false
assertBuiltinTemplateValid(t, "default-sql", config, target, isServicePrincipal, build, t.TempDir())
}
}
}
}

func TestBuiltinDbtTemplateValid(t *testing.T) {
for _, personal_schemas := range []string{"yes", "no"} {
for _, target := range []string{"dev", "prod"} {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"properties": {
"project_name": {
"type": "string",
"pattern": "^[A-Za-z_][A-Za-z0-9_]+$",
"pattern_match_failure_message": "Name must consist of letters, numbers, and underscores.",
"pattern": "^[A-Za-z_][A-Za-z0-9-_]+$",
"pattern_match_failure_message": "Name must consist of letters, numbers, dashes, and underscores.",
"default": "dbt_project",
"description": "\nPlease provide a unique name for this project.\nproject_name",
"order": 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ targets:
host: {{workspace_host}}

## Optionally, there could be a 'staging' target here.
## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/index.html.)
## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/ci-cd.html.)
#
# staging:
# workspace:
Expand Down
3 changes: 3 additions & 0 deletions libs/template/templates/default-sql/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# sql template

This folder provides a template for using SQL with Databricks Asset Bundles.
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{
"welcome_message": "\nWelcome to the (EXPERIMENTAL) default SQL template for Databricks Asset Bundles!",
"properties": {
"project_name": {
"type": "string",
"default": "sql_project",
"description": "\nPlease provide a unique name for this project.\nproject_name",
"order": 1,
"pattern": "^[A-Za-z_][A-Za-z0-9-_]+$",
"pattern_match_failure_message": "Name must consist of letters, numbers, dashes, and underscores."
},
"http_path": {
"type": "string",
"pattern": "^/sql/.\\../warehouses/[a-z0-9]+$",
"pattern_match_failure_message": "Path must be of the form /sql/1.0/warehouses/<warehouse id>",
"description": "\nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development.\nYou can find this path by clicking on \"Connection details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]",
"order": 2
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
},
"default_catalog": {
"type": "string",
"default": "{{if eq (default_catalog) \"\"}}hive_metastore{{else}}{{default_catalog}}{{end}}",
"pattern": "^\\w*$",
"pattern_match_failure_message": "Invalid catalog name.",
"description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} or metastore{{end}}.\ndefault_catalog",
"order": 3
},
"personal_schemas": {
"type": "string",
"description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas",
"enum": [
"yes, automatically use a schema based on the current user name during development",
"no, use a single schema for all users during development"
],
"order": 4
},
"shared_schema": {
"skip_prompt_if": {
"properties": {
"personal_schemas": {
"const": "yes, automatically use a schema based on the current user name during development"
}
}
},
"type": "string",
"default": "default",
"pattern": "^\\w+$",
"pattern_match_failure_message": "Invalid schema name.",
"description": "\nPlease provide an initial schema during development.\ndefault_schema",
"order": 5
}
},
"success_message": "\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
}
7 changes: 7 additions & 0 deletions libs/template/templates/default-sql/library/versions.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{define "latest_lts_dbr_version" -}}
13.3.x-scala2.12
fjakobs marked this conversation as resolved.
Show resolved Hide resolved
{{- end}}

{{define "latest_lts_db_connect_version_spec" -}}
>=13.3,<13.4
{{- end}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"recommendations": [
"databricks.databricks",
"redhat.vscode-yaml",
"databricks.sqltools-databricks-driver",
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{
"python.analysis.stubPath": ".vscode",
"databricks.python.envFile": "${workspaceFolder}/.env",
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.analysis.extraPaths": ["src"],
"files.exclude": {
"**/*.egg-info": true,
"**/__pycache__": true,
".pytest_cache": true,
},
pietern marked this conversation as resolved.
Show resolved Hide resolved
"sqltools.connections": [
{
fjakobs marked this conversation as resolved.
Show resolved Hide resolved
"connectionMethod": "VS Code Extension (beta)",
"catalog": "{{.default_catalog}}",
"previewLimit": 50,
"driver": "Databricks",
"name": "databricks",
"path": "{{.http_path}}"
}
],
"sqltools.autoConnectTo": "",
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# {{.project_name}}

The '{{.project_name}}' project was generated by using the default-sql template.

## Getting started

1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/install.html

2. Authenticate to your Databricks workspace (if you have not done so already):
```
$ databricks configure
```

3. To deploy a development copy of this project, type:
```
$ databricks bundle deploy --target dev
```
(Note that "dev" is the default target, so the `--target` parameter
is optional here.)

This deploys everything that's defined for this project.
For example, the default template would deploy a job called
`[dev yourname] {{.project_name}}_job` to your workspace.
You can find that job by opening your workpace and clicking on **Workflows**.

4. Similarly, to deploy a production copy, type:
```
$ databricks bundle deploy --target prod
```

5. To run a job, use the "run" command:
```
$ databricks bundle run
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
```

6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
https://docs.databricks.com/dev-tools/vscode-ext.html.

7. For documentation on the Databricks Asset Bundles format used
for this project, and for CI/CD configuration, see
https://docs.databricks.com/dev-tools/bundles/index.html.
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# This is a Databricks asset bundle definition for {{.project_name}}.
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
bundle:
name: {{.project_name}}

include:
- resources/*.yml

# Variable declarations. These variables are assigned in the dev/prod targets below.
variables:
warehouse_id:
description: The warehouse to use
catalog:
description: The catalog to use
schema:
description: The schema to use

{{- $dev_schema := .shared_schema }}
{{- $prod_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "${workspace.current_user.short_name}"}}
{{- $prod_schema = "default"}}
{{- end}}

# Deployment targets.
targets:
# The 'dev' target, for development purposes. This target is the default.
dev:
# We use 'mode: development' to indicate this is a personal development copy.
# Any job schedules and triggers are paused by default
mode: development
default: true
workspace:
host: {{workspace_host}}
variables:
warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}}
catalog: {{.default_catalog}}
schema: {{$dev_schema}}

## Optionally, there could be a 'staging' target here.
## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/ci-cd.html.)
#
# staging:
# workspace:
# host: {{workspace_host}}

# The 'prod' target, used for production deployment.
prod:
# We use 'mode: production' to indicate this is a production deployment.
# Doing so enables strict verification of the settings below.
mode: production
workspace:
host: {{workspace_host}}
# We always use /Users/{{user_name}} for all resources to make sure we only have a single copy.
{{- /*
Internal note 2023-12: CLI versions v0.211.0 and before would show an error when using `mode: production`
with a path that doesn't say "/Shared". For now, we'll include an extra comment in the template
to explain that customers should update if they see this.
*/}}
# If this path results in an error, please make sure you have a recent version of the CLI installed.
root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
variables:
warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}}
catalog: {{.default_catalog}}
schema: {{$prod_schema}}
{{- if not is_service_principal}}
run_as:
# This runs as {{user_name}} in production. We could also use a service principal here
# using service_principal_name (see https://docs.databricks.com/en/dev-tools/bundles/permissions.html).
user_name: {{user_name}}
{{end -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# A job running SQL queries on a SQL warehouse
resources:
jobs:
{{.project_name}}_sql_job:
name: {{.project_name}}_sql_job

schedule:
# Run every day at 7:17 AM
quartz_cron_expression: '44 17 7 * * ?'
timezone_id: Europe/Amsterdam

{{- if not is_service_principal}}

email_notifications:
on_failure:
- {{user_name}}

{{else}}

{{end -}}

parameters:
- name: catalog
default: ${var.catalog}
- name: schema
default: ${var.schema}
- name: bundle_target
default: ${bundle.target}

tasks:
- task_key: orders_raw
sql_task:
warehouse_id: ${var.warehouse_id}
file:
path: ../src/orders_raw.sql

- task_key: orders_daily
depends_on:
- task_key: orders_raw
sql_task:
warehouse_id: ${var.warehouse_id}
file:
path: ../src/orders_daily.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# scratch

This folder is reserved for personal, exploratory notebooks and SQL files.
By default these are not committed to Git, as 'scratch' is listed in .gitignore.
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "dc8c630c-1ea0-42e4-873f-e4dec4d3d416",
lennartkats-db marked this conversation as resolved.
Show resolved Hide resolved
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"%sql\n",
"SELECT * FROM json.`/databricks-datasets/nyctaxi/sample/json/`"
]
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"dashboards": [],
"language": "python",
"notebookMetadata": {
"pythonIndentUnit": 2
},
"notebookName": "exploration",
"widgets": {}
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml)
{{- /* We can't use a materialized view here since they don't support 'create or refresh yet.*/}}

CREATE OR REPLACE VIEW
IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_daily'))
AS SELECT
order_date, count(*) AS number_of_orders
FROM
IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_raw'))

-- During development, only process a smaller range of data
WHERE {{"{{"}}bundle_target{{"}}"}} == "prod" OR (order_date >= '2019-08-01' AND order_date < '2019-09-01')

GROUP BY order_date
Loading
Loading