Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor jobs path translation #1782

Merged
merged 2 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions bundle/config/mutator/paths/job_paths_visitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package paths

import (
"github.com/databricks/cli/bundle/libraries"
"github.com/databricks/cli/libs/dyn"
)

type jobRewritePattern struct {
pattern dyn.Pattern
kind PathKind
skipRewrite func(string) bool
}

func noSkipRewrite(string) bool {
return false
}

func jobTaskRewritePatterns(base dyn.Pattern) []jobRewritePattern {
return []jobRewritePattern{
{
base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")),
PathKindNotebook,
noSkipRewrite,
},
{
base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")),
PathKindWorkspaceFile,
noSkipRewrite,
},
{
base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")),
PathKindDirectory,
noSkipRewrite,
},
{
base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")),
PathKindWorkspaceFile,
noSkipRewrite,
},
{
base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")),
PathKindLibrary,
noSkipRewrite,
},
{
base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")),
PathKindLibrary,
noSkipRewrite,
},
{
base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")),
PathKindWorkspaceFile,
noSkipRewrite,
},
}
}

func jobRewritePatterns() []jobRewritePattern {
// Base pattern to match all tasks in all jobs.
base := dyn.NewPattern(
dyn.Key("resources"),
dyn.Key("jobs"),
dyn.AnyKey(),
dyn.Key("tasks"),
dyn.AnyIndex(),
)

// Compile list of patterns and their respective rewrite functions.
jobEnvironmentsPatterns := []jobRewritePattern{
{
dyn.NewPattern(
dyn.Key("resources"),
dyn.Key("jobs"),
dyn.AnyKey(),
dyn.Key("environments"),
dyn.AnyIndex(),
dyn.Key("spec"),
dyn.Key("dependencies"),
dyn.AnyIndex(),
),
PathKindWithPrefix,
func(s string) bool {
return !libraries.IsLibraryLocal(s)
},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO this piece belongs in the original code. Then this routine (and package) is only concerned with visiting everything and what to do with these paths is something for the caller to decide. These entries can use a different Kind to differentiate what they contain.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The rationale here is that if property specifies a PyPi library, it isn't really a path, so it doesn't need to be handled. I don't see a use-case when we want to visit PyPi libraries, but if we need that, it should be very fixable by adding a separate kind.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that makes sense. But then the code should be updated to no longer talk about rewriting but rather about just visiting or not visiting. It's the caller who decides what to do upon visiting.

},
}

taskPatterns := jobTaskRewritePatterns(base)
forEachPatterns := jobTaskRewritePatterns(base.Append(dyn.Key("for_each_task"), dyn.Key("task")))
allPatterns := append(taskPatterns, jobEnvironmentsPatterns...)
allPatterns = append(allPatterns, forEachPatterns...)
return allPatterns
}

// VisitJobPaths visits all paths in job resources and applies a function to each path.
func VisitJobPaths(value dyn.Value, fn VisitFunc) (dyn.Value, error) {
var err error
var newValue = value

for _, rewritePattern := range jobRewritePatterns() {
newValue, err = dyn.MapByPattern(newValue, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) {
if rewritePattern.skipRewrite(v.MustString()) {
return v, nil
}

return fn(p, rewritePattern.kind, v)
})

if err != nil {
return dyn.InvalidValue, err
}
}

return newValue, nil
}
168 changes: 168 additions & 0 deletions bundle/config/mutator/paths/job_paths_visitor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package paths

import (
"testing"

"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/libs/dyn"
assert "github.com/databricks/cli/libs/dyn/dynassert"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
)

func TestVisitJobPaths(t *testing.T) {
task0 := jobs.Task{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "abc",
},
}
task1 := jobs.Task{
SparkPythonTask: &jobs.SparkPythonTask{
PythonFile: "abc",
},
}
task2 := jobs.Task{
DbtTask: &jobs.DbtTask{
ProjectDirectory: "abc",
},
}
task3 := jobs.Task{
SqlTask: &jobs.SqlTask{
File: &jobs.SqlTaskFile{
Path: "abc",
},
},
}
task4 := jobs.Task{
Libraries: []compute.Library{
{Whl: "dist/foo.whl"},
},
}
task5 := jobs.Task{
Libraries: []compute.Library{
{Jar: "dist/foo.jar"},
},
}
task6 := jobs.Task{
Libraries: []compute.Library{
{Requirements: "requirements.txt"},
},
}

job0 := &resources.Job{
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
task0,
task1,
task2,
task3,
task4,
task5,
task6,
},
},
}

root := config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job0": job0,
},
},
}

actual := visitJobPaths(t, root)
expected := []dyn.Path{
dyn.MustPathFromString("resources.jobs.job0.tasks[0].notebook_task.notebook_path"),
dyn.MustPathFromString("resources.jobs.job0.tasks[1].spark_python_task.python_file"),
dyn.MustPathFromString("resources.jobs.job0.tasks[2].dbt_task.project_directory"),
dyn.MustPathFromString("resources.jobs.job0.tasks[3].sql_task.file.path"),
dyn.MustPathFromString("resources.jobs.job0.tasks[4].libraries[0].whl"),
dyn.MustPathFromString("resources.jobs.job0.tasks[5].libraries[0].jar"),
dyn.MustPathFromString("resources.jobs.job0.tasks[6].libraries[0].requirements"),
}

assert.ElementsMatch(t, expected, actual)
}

func TestVisitJobPaths_environments(t *testing.T) {
environment0 := jobs.JobEnvironment{
Spec: &compute.Environment{
Dependencies: []string{
"dist_0/*.whl",
"dist_1/*.whl",
},
},
}
job0 := &resources.Job{
JobSettings: &jobs.JobSettings{
Environments: []jobs.JobEnvironment{
environment0,
},
},
}

root := config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job0": job0,
},
},
}

actual := visitJobPaths(t, root)
expected := []dyn.Path{
dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[0]"),
dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[1]"),
}

assert.ElementsMatch(t, expected, actual)
}

func TestVisitJobPaths_foreach(t *testing.T) {
task0 := jobs.Task{
ForEachTask: &jobs.ForEachTask{
Task: jobs.Task{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "abc",
},
},
},
}
job0 := &resources.Job{
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
task0,
},
},
}

root := config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job0": job0,
},
},
}

actual := visitJobPaths(t, root)
expected := []dyn.Path{
dyn.MustPathFromString("resources.jobs.job0.tasks[0].for_each_task.task.notebook_task.notebook_path"),
}

assert.ElementsMatch(t, expected, actual)
}

func visitJobPaths(t *testing.T, root config.Root) []dyn.Path {
var actual []dyn.Path
err := root.Mutate(func(value dyn.Value) (dyn.Value, error) {
return VisitJobPaths(value, func(p dyn.Path, kind PathKind, v dyn.Value) (dyn.Value, error) {
actual = append(actual, p)
return v, nil
})
})
require.NoError(t, err)
return actual
}
26 changes: 26 additions & 0 deletions bundle/config/mutator/paths/visitor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package paths

import "github.com/databricks/cli/libs/dyn"

type PathKind int

const (
// PathKindLibrary is a path to a library file
PathKindLibrary = iota

// PathKindNotebook is a path to a notebook file
PathKindNotebook

// PathKindWorkspaceFile is a path to a regular workspace file,
// notebooks are not allowed because they are uploaded a special
// kind of workspace object.
PathKindWorkspaceFile

// PathKindWithPrefix is a path that starts with './'
PathKindWithPrefix

// PathKindDirectory is a path to directory
PathKindDirectory
)

type VisitFunc func(path dyn.Path, kind PathKind, value dyn.Value) (dyn.Value, error)
Loading