-
Notifications
You must be signed in to change notification settings - Fork 57
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor jobs path translation #1782
Merged
andrewnester
merged 2 commits into
databricks:main
from
kanterov:kanterov/refactor-jobs-paths
Sep 24, 2024
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
package paths | ||
|
||
import ( | ||
"github.com/databricks/cli/bundle/libraries" | ||
"github.com/databricks/cli/libs/dyn" | ||
) | ||
|
||
type jobRewritePattern struct { | ||
pattern dyn.Pattern | ||
kind PathKind | ||
skipRewrite func(string) bool | ||
} | ||
|
||
func noSkipRewrite(string) bool { | ||
return false | ||
} | ||
|
||
func jobTaskRewritePatterns(base dyn.Pattern) []jobRewritePattern { | ||
return []jobRewritePattern{ | ||
{ | ||
base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")), | ||
PathKindNotebook, | ||
noSkipRewrite, | ||
}, | ||
{ | ||
base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")), | ||
PathKindWorkspaceFile, | ||
noSkipRewrite, | ||
}, | ||
{ | ||
base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")), | ||
PathKindDirectory, | ||
noSkipRewrite, | ||
}, | ||
{ | ||
base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")), | ||
PathKindWorkspaceFile, | ||
noSkipRewrite, | ||
}, | ||
{ | ||
base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")), | ||
PathKindLibrary, | ||
noSkipRewrite, | ||
}, | ||
{ | ||
base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")), | ||
PathKindLibrary, | ||
noSkipRewrite, | ||
}, | ||
{ | ||
base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")), | ||
PathKindWorkspaceFile, | ||
noSkipRewrite, | ||
}, | ||
} | ||
} | ||
|
||
func jobRewritePatterns() []jobRewritePattern { | ||
// Base pattern to match all tasks in all jobs. | ||
base := dyn.NewPattern( | ||
dyn.Key("resources"), | ||
dyn.Key("jobs"), | ||
dyn.AnyKey(), | ||
dyn.Key("tasks"), | ||
dyn.AnyIndex(), | ||
) | ||
|
||
// Compile list of patterns and their respective rewrite functions. | ||
jobEnvironmentsPatterns := []jobRewritePattern{ | ||
{ | ||
dyn.NewPattern( | ||
dyn.Key("resources"), | ||
dyn.Key("jobs"), | ||
dyn.AnyKey(), | ||
dyn.Key("environments"), | ||
dyn.AnyIndex(), | ||
dyn.Key("spec"), | ||
dyn.Key("dependencies"), | ||
dyn.AnyIndex(), | ||
), | ||
PathKindWithPrefix, | ||
func(s string) bool { | ||
return !libraries.IsLibraryLocal(s) | ||
}, | ||
}, | ||
} | ||
|
||
taskPatterns := jobTaskRewritePatterns(base) | ||
forEachPatterns := jobTaskRewritePatterns(base.Append(dyn.Key("for_each_task"), dyn.Key("task"))) | ||
allPatterns := append(taskPatterns, jobEnvironmentsPatterns...) | ||
allPatterns = append(allPatterns, forEachPatterns...) | ||
return allPatterns | ||
} | ||
|
||
// VisitJobPaths visits all paths in job resources and applies a function to each path. | ||
func VisitJobPaths(value dyn.Value, fn VisitFunc) (dyn.Value, error) { | ||
var err error | ||
var newValue = value | ||
|
||
for _, rewritePattern := range jobRewritePatterns() { | ||
newValue, err = dyn.MapByPattern(newValue, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { | ||
if rewritePattern.skipRewrite(v.MustString()) { | ||
return v, nil | ||
} | ||
|
||
return fn(p, rewritePattern.kind, v) | ||
}) | ||
|
||
if err != nil { | ||
return dyn.InvalidValue, err | ||
} | ||
} | ||
|
||
return newValue, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
package paths | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/databricks/cli/bundle/config" | ||
"github.com/databricks/cli/bundle/config/resources" | ||
"github.com/databricks/cli/libs/dyn" | ||
assert "github.com/databricks/cli/libs/dyn/dynassert" | ||
"github.com/databricks/databricks-sdk-go/service/compute" | ||
"github.com/databricks/databricks-sdk-go/service/jobs" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestVisitJobPaths(t *testing.T) { | ||
task0 := jobs.Task{ | ||
NotebookTask: &jobs.NotebookTask{ | ||
NotebookPath: "abc", | ||
}, | ||
} | ||
task1 := jobs.Task{ | ||
SparkPythonTask: &jobs.SparkPythonTask{ | ||
PythonFile: "abc", | ||
}, | ||
} | ||
task2 := jobs.Task{ | ||
DbtTask: &jobs.DbtTask{ | ||
ProjectDirectory: "abc", | ||
}, | ||
} | ||
task3 := jobs.Task{ | ||
SqlTask: &jobs.SqlTask{ | ||
File: &jobs.SqlTaskFile{ | ||
Path: "abc", | ||
}, | ||
}, | ||
} | ||
task4 := jobs.Task{ | ||
Libraries: []compute.Library{ | ||
{Whl: "dist/foo.whl"}, | ||
}, | ||
} | ||
task5 := jobs.Task{ | ||
Libraries: []compute.Library{ | ||
{Jar: "dist/foo.jar"}, | ||
}, | ||
} | ||
task6 := jobs.Task{ | ||
Libraries: []compute.Library{ | ||
{Requirements: "requirements.txt"}, | ||
}, | ||
} | ||
|
||
job0 := &resources.Job{ | ||
JobSettings: &jobs.JobSettings{ | ||
Tasks: []jobs.Task{ | ||
task0, | ||
task1, | ||
task2, | ||
task3, | ||
task4, | ||
task5, | ||
task6, | ||
}, | ||
}, | ||
} | ||
|
||
root := config.Root{ | ||
Resources: config.Resources{ | ||
Jobs: map[string]*resources.Job{ | ||
"job0": job0, | ||
}, | ||
}, | ||
} | ||
|
||
actual := visitJobPaths(t, root) | ||
expected := []dyn.Path{ | ||
dyn.MustPathFromString("resources.jobs.job0.tasks[0].notebook_task.notebook_path"), | ||
dyn.MustPathFromString("resources.jobs.job0.tasks[1].spark_python_task.python_file"), | ||
dyn.MustPathFromString("resources.jobs.job0.tasks[2].dbt_task.project_directory"), | ||
dyn.MustPathFromString("resources.jobs.job0.tasks[3].sql_task.file.path"), | ||
dyn.MustPathFromString("resources.jobs.job0.tasks[4].libraries[0].whl"), | ||
dyn.MustPathFromString("resources.jobs.job0.tasks[5].libraries[0].jar"), | ||
dyn.MustPathFromString("resources.jobs.job0.tasks[6].libraries[0].requirements"), | ||
} | ||
|
||
assert.ElementsMatch(t, expected, actual) | ||
} | ||
|
||
func TestVisitJobPaths_environments(t *testing.T) { | ||
environment0 := jobs.JobEnvironment{ | ||
Spec: &compute.Environment{ | ||
Dependencies: []string{ | ||
"dist_0/*.whl", | ||
"dist_1/*.whl", | ||
}, | ||
}, | ||
} | ||
job0 := &resources.Job{ | ||
JobSettings: &jobs.JobSettings{ | ||
Environments: []jobs.JobEnvironment{ | ||
environment0, | ||
}, | ||
}, | ||
} | ||
|
||
root := config.Root{ | ||
Resources: config.Resources{ | ||
Jobs: map[string]*resources.Job{ | ||
"job0": job0, | ||
}, | ||
}, | ||
} | ||
|
||
actual := visitJobPaths(t, root) | ||
expected := []dyn.Path{ | ||
dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[0]"), | ||
dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[1]"), | ||
} | ||
|
||
assert.ElementsMatch(t, expected, actual) | ||
} | ||
|
||
func TestVisitJobPaths_foreach(t *testing.T) { | ||
task0 := jobs.Task{ | ||
ForEachTask: &jobs.ForEachTask{ | ||
Task: jobs.Task{ | ||
NotebookTask: &jobs.NotebookTask{ | ||
NotebookPath: "abc", | ||
}, | ||
}, | ||
}, | ||
} | ||
job0 := &resources.Job{ | ||
JobSettings: &jobs.JobSettings{ | ||
Tasks: []jobs.Task{ | ||
task0, | ||
}, | ||
}, | ||
} | ||
|
||
root := config.Root{ | ||
Resources: config.Resources{ | ||
Jobs: map[string]*resources.Job{ | ||
"job0": job0, | ||
}, | ||
}, | ||
} | ||
|
||
actual := visitJobPaths(t, root) | ||
expected := []dyn.Path{ | ||
dyn.MustPathFromString("resources.jobs.job0.tasks[0].for_each_task.task.notebook_task.notebook_path"), | ||
} | ||
|
||
assert.ElementsMatch(t, expected, actual) | ||
} | ||
|
||
func visitJobPaths(t *testing.T, root config.Root) []dyn.Path { | ||
var actual []dyn.Path | ||
err := root.Mutate(func(value dyn.Value) (dyn.Value, error) { | ||
return VisitJobPaths(value, func(p dyn.Path, kind PathKind, v dyn.Value) (dyn.Value, error) { | ||
actual = append(actual, p) | ||
return v, nil | ||
}) | ||
}) | ||
require.NoError(t, err) | ||
return actual | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
package paths | ||
|
||
import "github.com/databricks/cli/libs/dyn" | ||
|
||
type PathKind int | ||
|
||
const ( | ||
// PathKindLibrary is a path to a library file | ||
PathKindLibrary = iota | ||
|
||
// PathKindNotebook is a path to a notebook file | ||
PathKindNotebook | ||
|
||
// PathKindWorkspaceFile is a path to a regular workspace file, | ||
// notebooks are not allowed because they are uploaded a special | ||
// kind of workspace object. | ||
PathKindWorkspaceFile | ||
|
||
// PathKindWithPrefix is a path that starts with './' | ||
PathKindWithPrefix | ||
|
||
// PathKindDirectory is a path to directory | ||
PathKindDirectory | ||
) | ||
|
||
type VisitFunc func(path dyn.Path, kind PathKind, value dyn.Value) (dyn.Value, error) |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO this piece belongs in the original code. Then this routine (and package) is only concerned with visiting everything and what to do with these paths is something for the caller to decide. These entries can use a different
Kind
to differentiate what they contain.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The rationale here is that if property specifies a PyPi library, it isn't really a path, so it doesn't need to be handled. I don't see a use-case when we want to visit PyPi libraries, but if we need that, it should be very fixable by adding a separate kind.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, that makes sense. But then the code should be updated to no longer talk about rewriting but rather about just visiting or not visiting. It's the caller who decides what to do upon visiting.