Skip to content

Commit

Permalink
Refactor jobs path translation
Browse files Browse the repository at this point in the history
  • Loading branch information
kanterov committed Sep 20, 2024
1 parent cf989a7 commit 7a24c99
Show file tree
Hide file tree
Showing 9 changed files with 754 additions and 136 deletions.
6 changes: 6 additions & 0 deletions bundle/config/experimental.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ type PyDABs struct {
// These packages are imported to discover resources, resource generators, and mutators.
// This list can include namespace packages, which causes the import of nested packages.
Import []string `json:"import,omitempty"`

// LoadLocations is a flag to enable loading Python source locations from the PyDABs.
//
// Locations are only supported since PyDABs 0.6.0, and because of that,
// this flag is disabled by default.
LoadLocations bool `json:"load_locations,omitempty"`
}

type Command string
Expand Down
115 changes: 115 additions & 0 deletions bundle/config/mutator/paths/job_paths_visitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package paths

import (
"github.com/databricks/cli/bundle/libraries"
"github.com/databricks/cli/libs/dyn"
)

type jobRewritePattern struct {
pattern dyn.Pattern
kind PathKind
skipRewrite func(string) bool
}

func noSkipRewrite(string) bool {
return false
}

func jobTaskRewritePatterns(base dyn.Pattern) []jobRewritePattern {
return []jobRewritePattern{
{
base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")),
PathKindNotebook,
noSkipRewrite,
},
{
base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")),
PathKindWorkspaceFile,
noSkipRewrite,
},
{
base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")),
PathKindDirectory,
noSkipRewrite,
},
{
base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")),
PathKindWorkspaceFile,
noSkipRewrite,
},
{
base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")),
PathKindLibrary,
noSkipRewrite,
},
{
base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")),
PathKindLibrary,
noSkipRewrite,
},
{
base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")),
PathKindWorkspaceFile,
noSkipRewrite,
},
}
}

func jobRewritePatterns() []jobRewritePattern {
// Base pattern to match all tasks in all jobs.
base := dyn.NewPattern(
dyn.Key("resources"),
dyn.Key("jobs"),
dyn.AnyKey(),
dyn.Key("tasks"),
dyn.AnyIndex(),
)

// Compile list of patterns and their respective rewrite functions.
jobEnvironmentsPatterns := []jobRewritePattern{
{
dyn.NewPattern(
dyn.Key("resources"),
dyn.Key("jobs"),
dyn.AnyKey(),
dyn.Key("environments"),
dyn.AnyIndex(),
dyn.Key("spec"),
dyn.Key("dependencies"),
dyn.AnyIndex(),
),
PathKindWithPrefix,
func(s string) bool {
return !libraries.IsLibraryLocal(s)
},
},
}

taskPatterns := jobTaskRewritePatterns(base)
forEachPatterns := jobTaskRewritePatterns(base.Append(dyn.Key("for_each_task"), dyn.Key("task")))
allPatterns := append(taskPatterns, jobEnvironmentsPatterns...)
allPatterns = append(allPatterns, forEachPatterns...)
return allPatterns
}

// VisitJobPaths visits all paths in job resources and applies a function to each path.
func VisitJobPaths(value dyn.Value, fn VisitFunc) (dyn.Value, error) {
var err error
var newValue = value

for _, rewritePattern := range jobRewritePatterns() {
newValue, err = dyn.MapByPattern(newValue, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
if rewritePattern.skipRewrite(v.MustString()) {
return v, nil
}

return fn(p, rewritePattern.kind, v)
})

if err != nil {
return dyn.InvalidValue, err
}
}

return newValue, nil
}
158 changes: 158 additions & 0 deletions bundle/config/mutator/paths/job_paths_visitor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package paths

import (
"testing"

"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/dyn"
assert "github.com/databricks/cli/libs/dyn/dynassert"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
)

func TestVisitJobPaths(t *testing.T) {
job0 := &resources.Job{
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "abc",
},
},
{
SparkPythonTask: &jobs.SparkPythonTask{
PythonFile: "abc",
},
},
{
DbtTask: &jobs.DbtTask{
ProjectDirectory: "abc",
},
},
{
SqlTask: &jobs.SqlTask{
File: &jobs.SqlTaskFile{
Path: "abc",
},
},
},
{
Libraries: []compute.Library{
{Whl: "dist/foo.whl"},
},
},
{
Libraries: []compute.Library{
{Jar: "dist/foo.jar"},
},
},
{
Libraries: []compute.Library{
{Requirements: "requirements.txt"},
},
},
},
},
}

root := config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job0": job0,
},
},
}

actual := visitJobPaths(t, root)
expected := []dyn.Path{
dyn.MustPathFromString("resources.jobs.job0.tasks[0].notebook_task.notebook_path"),
dyn.MustPathFromString("resources.jobs.job0.tasks[1].spark_python_task.python_file"),
dyn.MustPathFromString("resources.jobs.job0.tasks[2].dbt_task.project_directory"),
dyn.MustPathFromString("resources.jobs.job0.tasks[3].sql_task.file.path"),
dyn.MustPathFromString("resources.jobs.job0.tasks[4].libraries[0].whl"),
dyn.MustPathFromString("resources.jobs.job0.tasks[5].libraries[0].jar"),
dyn.MustPathFromString("resources.jobs.job0.tasks[6].libraries[0].requirements"),
}

assert.ElementsMatch(t, expected, actual)
}

func TestVisitJobPaths_environments(t *testing.T) {
job0 := &resources.Job{
JobSettings: &jobs.JobSettings{
Environments: []jobs.JobEnvironment{
{
Spec: &compute.Environment{
Dependencies: []string{
"dist_0/*.whl",
"dist_1/*.whl",
},
},
},
},
},
}

root := config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job0": job0,
},
},
}

actual := visitJobPaths(t, root)
expected := []dyn.Path{
dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[0]"),
dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[1]"),
}

assert.ElementsMatch(t, expected, actual)
}

func TestVisitJobPaths_foreach(t *testing.T) {
job0 := &resources.Job{
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
ForEachTask: &jobs.ForEachTask{
Task: jobs.Task{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "abc",
},
},
},
},
},
},
}

root := config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job0": job0,
},
},
}

actual := visitJobPaths(t, root)
expected := []dyn.Path{
dyn.MustPathFromString("resources.jobs.job0.tasks[0].for_each_task.task.notebook_task.notebook_path"),
}

assert.ElementsMatch(t, expected, actual)
}

func visitJobPaths(t *testing.T, root config.Root) []dyn.Path {
var actual []dyn.Path
err := root.Mutate(func(value dyn.Value) (dyn.Value, error) {
return VisitJobPaths(value, func(p dyn.Path, kind PathKind, v dyn.Value) (dyn.Value, error) {
actual = append(actual, p)
return v, nil
})
})
require.NoError(t, err)
return actual
}
26 changes: 26 additions & 0 deletions bundle/config/mutator/paths/visitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package paths

import "github.com/databricks/cli/libs/dyn"

type PathKind int

const (
// PathKindLibrary is a path to a library file
PathKindLibrary = iota

// PathKindNotebook is a path to a notebook file
PathKindNotebook

// PathKindWorkspaceFile is a path to a regular workspace file,
// notebooks are not allowed because they are uploaded a special
// kind of workspace object.
PathKindWorkspaceFile

// PathKindWithPrefix is a path that starts with './'
PathKindWithPrefix

// PathKindDirectory is a path to directory
PathKindDirectory
)

type VisitFunc func(path dyn.Path, kind PathKind, value dyn.Value) (dyn.Value, error)
Loading

0 comments on commit 7a24c99

Please sign in to comment.