From 7a24c9998d3de7ed539b255d97894d3448a84b7c Mon Sep 17 00:00:00 2001 From: Gleb Kanterov Date: Fri, 20 Sep 2024 16:39:32 +0200 Subject: [PATCH] Refactor jobs path translation --- bundle/config/experimental.go | 6 + .../config/mutator/paths/job_paths_visitor.go | 115 ++++++++++++ .../mutator/paths/job_paths_visitor_test.go | 158 +++++++++++++++++ bundle/config/mutator/paths/visitor.go | 26 +++ .../config/mutator/python/python_locations.go | 163 ++++++++++++++++++ .../mutator/python/python_locations_test.go | 127 ++++++++++++++ .../config/mutator/python/python_mutator.go | 80 +++++++-- .../mutator/python/python_mutator_test.go | 78 +++++++-- bundle/config/mutator/translate_paths_jobs.go | 137 ++++----------- 9 files changed, 754 insertions(+), 136 deletions(-) create mode 100644 bundle/config/mutator/paths/job_paths_visitor.go create mode 100644 bundle/config/mutator/paths/job_paths_visitor_test.go create mode 100644 bundle/config/mutator/paths/visitor.go create mode 100644 bundle/config/mutator/python/python_locations.go create mode 100644 bundle/config/mutator/python/python_locations_test.go diff --git a/bundle/config/experimental.go b/bundle/config/experimental.go index 061bbdae06..bf5e019088 100644 --- a/bundle/config/experimental.go +++ b/bundle/config/experimental.go @@ -45,6 +45,12 @@ type PyDABs struct { // These packages are imported to discover resources, resource generators, and mutators. // This list can include namespace packages, which causes the import of nested packages. Import []string `json:"import,omitempty"` + + // LoadLocations is a flag to enable loading Python source locations from the PyDABs. + // + // Locations are only supported since PyDABs 0.6.0, and because of that, + // this flag is disabled by default. + LoadLocations bool `json:"load_locations,omitempty"` } type Command string diff --git a/bundle/config/mutator/paths/job_paths_visitor.go b/bundle/config/mutator/paths/job_paths_visitor.go new file mode 100644 index 0000000000..275a8fa536 --- /dev/null +++ b/bundle/config/mutator/paths/job_paths_visitor.go @@ -0,0 +1,115 @@ +package paths + +import ( + "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/libs/dyn" +) + +type jobRewritePattern struct { + pattern dyn.Pattern + kind PathKind + skipRewrite func(string) bool +} + +func noSkipRewrite(string) bool { + return false +} + +func jobTaskRewritePatterns(base dyn.Pattern) []jobRewritePattern { + return []jobRewritePattern{ + { + base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")), + PathKindNotebook, + noSkipRewrite, + }, + { + base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")), + PathKindWorkspaceFile, + noSkipRewrite, + }, + { + base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")), + PathKindDirectory, + noSkipRewrite, + }, + { + base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")), + PathKindWorkspaceFile, + noSkipRewrite, + }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")), + PathKindLibrary, + noSkipRewrite, + }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")), + PathKindLibrary, + noSkipRewrite, + }, + { + base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")), + PathKindWorkspaceFile, + noSkipRewrite, + }, + } +} + +func jobRewritePatterns() []jobRewritePattern { + // Base pattern to match all tasks in all jobs. + base := dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("jobs"), + dyn.AnyKey(), + dyn.Key("tasks"), + dyn.AnyIndex(), + ) + + // Compile list of patterns and their respective rewrite functions. + jobEnvironmentsPatterns := []jobRewritePattern{ + { + dyn.NewPattern( + dyn.Key("resources"), + dyn.Key("jobs"), + dyn.AnyKey(), + dyn.Key("environments"), + dyn.AnyIndex(), + dyn.Key("spec"), + dyn.Key("dependencies"), + dyn.AnyIndex(), + ), + PathKindWithPrefix, + func(s string) bool { + return !libraries.IsLibraryLocal(s) + }, + }, + } + + taskPatterns := jobTaskRewritePatterns(base) + forEachPatterns := jobTaskRewritePatterns(base.Append(dyn.Key("for_each_task"), dyn.Key("task"))) + allPatterns := append(taskPatterns, jobEnvironmentsPatterns...) + allPatterns = append(allPatterns, forEachPatterns...) + return allPatterns +} + +// VisitJobPaths visits all paths in job resources and applies a function to each path. +func VisitJobPaths(value dyn.Value, fn VisitFunc) (dyn.Value, error) { + var err error + var newValue = value + + for _, rewritePattern := range jobRewritePatterns() { + newValue, err = dyn.MapByPattern(newValue, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { + if rewritePattern.skipRewrite(v.MustString()) { + return v, nil + } + + return fn(p, rewritePattern.kind, v) + }) + + if err != nil { + return dyn.InvalidValue, err + } + } + + return newValue, nil +} diff --git a/bundle/config/mutator/paths/job_paths_visitor_test.go b/bundle/config/mutator/paths/job_paths_visitor_test.go new file mode 100644 index 0000000000..949583e6d9 --- /dev/null +++ b/bundle/config/mutator/paths/job_paths_visitor_test.go @@ -0,0 +1,158 @@ +package paths + +import ( + "testing" + + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/resources" + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" + "github.com/stretchr/testify/require" +) + +func TestVisitJobPaths(t *testing.T) { + job0 := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "abc", + }, + }, + { + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "abc", + }, + }, + { + DbtTask: &jobs.DbtTask{ + ProjectDirectory: "abc", + }, + }, + { + SqlTask: &jobs.SqlTask{ + File: &jobs.SqlTaskFile{ + Path: "abc", + }, + }, + }, + { + Libraries: []compute.Library{ + {Whl: "dist/foo.whl"}, + }, + }, + { + Libraries: []compute.Library{ + {Jar: "dist/foo.jar"}, + }, + }, + { + Libraries: []compute.Library{ + {Requirements: "requirements.txt"}, + }, + }, + }, + }, + } + + root := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job0": job0, + }, + }, + } + + actual := visitJobPaths(t, root) + expected := []dyn.Path{ + dyn.MustPathFromString("resources.jobs.job0.tasks[0].notebook_task.notebook_path"), + dyn.MustPathFromString("resources.jobs.job0.tasks[1].spark_python_task.python_file"), + dyn.MustPathFromString("resources.jobs.job0.tasks[2].dbt_task.project_directory"), + dyn.MustPathFromString("resources.jobs.job0.tasks[3].sql_task.file.path"), + dyn.MustPathFromString("resources.jobs.job0.tasks[4].libraries[0].whl"), + dyn.MustPathFromString("resources.jobs.job0.tasks[5].libraries[0].jar"), + dyn.MustPathFromString("resources.jobs.job0.tasks[6].libraries[0].requirements"), + } + + assert.ElementsMatch(t, expected, actual) +} + +func TestVisitJobPaths_environments(t *testing.T) { + job0 := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Environments: []jobs.JobEnvironment{ + { + Spec: &compute.Environment{ + Dependencies: []string{ + "dist_0/*.whl", + "dist_1/*.whl", + }, + }, + }, + }, + }, + } + + root := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job0": job0, + }, + }, + } + + actual := visitJobPaths(t, root) + expected := []dyn.Path{ + dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[0]"), + dyn.MustPathFromString("resources.jobs.job0.environments[0].spec.dependencies[1]"), + } + + assert.ElementsMatch(t, expected, actual) +} + +func TestVisitJobPaths_foreach(t *testing.T) { + job0 := &resources.Job{ + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + ForEachTask: &jobs.ForEachTask{ + Task: jobs.Task{ + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "abc", + }, + }, + }, + }, + }, + }, + } + + root := config.Root{ + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job0": job0, + }, + }, + } + + actual := visitJobPaths(t, root) + expected := []dyn.Path{ + dyn.MustPathFromString("resources.jobs.job0.tasks[0].for_each_task.task.notebook_task.notebook_path"), + } + + assert.ElementsMatch(t, expected, actual) +} + +func visitJobPaths(t *testing.T, root config.Root) []dyn.Path { + var actual []dyn.Path + err := root.Mutate(func(value dyn.Value) (dyn.Value, error) { + return VisitJobPaths(value, func(p dyn.Path, kind PathKind, v dyn.Value) (dyn.Value, error) { + actual = append(actual, p) + return v, nil + }) + }) + require.NoError(t, err) + return actual +} diff --git a/bundle/config/mutator/paths/visitor.go b/bundle/config/mutator/paths/visitor.go new file mode 100644 index 0000000000..40d1f14ef7 --- /dev/null +++ b/bundle/config/mutator/paths/visitor.go @@ -0,0 +1,26 @@ +package paths + +import "github.com/databricks/cli/libs/dyn" + +type PathKind int + +const ( + // PathKindLibrary is a path to a library file + PathKindLibrary = iota + + // PathKindNotebook is a path to a notebook file + PathKindNotebook + + // PathKindWorkspaceFile is a path to a regular workspace file, + // notebooks are not allowed because they are uploaded a special + // kind of workspace object. + PathKindWorkspaceFile + + // PathKindWithPrefix is a path that starts with './' + PathKindWithPrefix + + // PathKindDirectory is a path to directory + PathKindDirectory +) + +type VisitFunc func(path dyn.Path, kind PathKind, value dyn.Value) (dyn.Value, error) diff --git a/bundle/config/mutator/python/python_locations.go b/bundle/config/mutator/python/python_locations.go new file mode 100644 index 0000000000..97b7e1702d --- /dev/null +++ b/bundle/config/mutator/python/python_locations.go @@ -0,0 +1,163 @@ +package python + +import ( + "encoding/json" + "fmt" + "io" + "path/filepath" + + "github.com/databricks/cli/libs/dyn" +) + +const generatedFileName = "__generated_by_pydabs__.yml" + +// pythonLocations is data structure for efficient location lookup for a given path +type pythonLocations struct { + // descendants referenced by index, e.g. '.foo' + keys map[string]*pythonLocations + + // descendants referenced by key, e.g. '[0]' + indexes map[int]*pythonLocations + + // location for the current node if it exists + location dyn.Location + + // if true, location is present + exists bool +} + +type pythonLocationEntry struct { + Path string `json:"path"` + File string `json:"file"` + Line int `json:"line"` + Column int `json:"column"` +} + +// mergePythonLocations applies locations from Python mutator into given dyn.Value +// +// The primary use-case is to merge locations.json with output.json, so that any +// validation errors will point to Python source code instead of generated YAML. +func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) { + return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) { + if newLocation, ok := findPythonLocation(locations, path); ok { + var newLocations []dyn.Location + + // the first item in the list is the "last" location used for error reporting + newLocations = append(newLocations, newLocation) + + for _, location := range value.Locations() { + if filepath.Base(location.File) == generatedFileName { + continue + } + + // don't add duplicates if location already exists + if location == newLocation { + continue + } + + newLocations = append(newLocations, location) + } + + return value.WithLocations(newLocations), nil + } else { + return value, nil + } + }) +} + +// parsePythonLocations parses locations.json from the Python mutator. +// +// locations file is newline-separated JSON objects with pythonLocationEntry structure. +func parsePythonLocations(input io.Reader) (*pythonLocations, error) { + decoder := json.NewDecoder(input) + locations := newPythonLocations() + + for decoder.More() { + var entry pythonLocationEntry + + err := decoder.Decode(&entry) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + path, err := dyn.NewPathFromString(entry.Path) + if err != nil { + return nil, fmt.Errorf("failed to parse python location: %s", err) + } + + location := dyn.Location{ + File: entry.File, + Line: entry.Line, + Column: entry.Column, + } + + putPythonLocation(locations, path, location) + } + + return locations, nil +} + +// putPythonLocation puts the location to the trie for the given path +func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) { + var currentNode = trie + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + currentNode.keys[key] = newPythonLocations() + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + currentNode.indexes[index] = newPythonLocations() + } + + currentNode = currentNode.indexes[index] + } + } + + currentNode.location = location + currentNode.exists = true +} + +// newPythonLocations creates a new trie node +func newPythonLocations() *pythonLocations { + return &pythonLocations{ + keys: make(map[string]*pythonLocations), + indexes: make(map[int]*pythonLocations), + } +} + +// findPythonLocation finds the location or closest ancestor location in the trie for the given path +// if no ancestor or exact location is found, false is returned. +func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) { + var currentNode = locations + var lastLocation = locations.location + var exists = locations.exists + + for _, component := range path { + if key := component.Key(); key != "" { + if _, ok := currentNode.keys[key]; !ok { + break + } + + currentNode = currentNode.keys[key] + } else { + index := component.Index() + if _, ok := currentNode.indexes[index]; !ok { + break + } + + currentNode = currentNode.indexes[index] + } + + if currentNode.exists { + lastLocation = currentNode.location + exists = true + } + } + + return lastLocation, exists +} diff --git a/bundle/config/mutator/python/python_locations_test.go b/bundle/config/mutator/python/python_locations_test.go new file mode 100644 index 0000000000..28cd33deea --- /dev/null +++ b/bundle/config/mutator/python/python_locations_test.go @@ -0,0 +1,127 @@ +package python + +import ( + "bytes" + "testing" + + "github.com/databricks/cli/libs/dyn" + assert "github.com/databricks/cli/libs/dyn/dynassert" +) + +func TestMergeLocations(t *testing.T) { + pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1} + generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1} + yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation) + + input := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + "bar": dyn.NewValue("baz", []dyn.Location{yamlLocation, pythonLocation}), + "baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}), + "qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}), + }, + []dyn.Location{}, + ), + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + expected := dyn.NewValue( + map[string]dyn.Value{ + "foo": dyn.NewValue( + map[string]dyn.Value{ + // pythonLocation was present before, we don't duplicate it, only move it to the beginning + "bar": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + // pythonLocation is appended to the beginning of the list if absent + "baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + // generatedLocation is replaced by pythonLocation + "qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}), + }, + []dyn.Location{pythonLocation}, + ), + // if location is unknown, we keep it as-is + "bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}), + }, + []dyn.Location{yamlLocation}, + ) + + actual, err := mergePythonLocations(input, locations) + + assert.NoError(t, err) + assert.Equal(t, expected, actual) +} + +func TestFindLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_indexPathComponent(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]")) + + assert.True(t, exists) + assert.Equal(t, location2, actual) +} + +func TestFindLocation_closestAncestorLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz")) + + assert.True(t, exists) + assert.Equal(t, location1, actual) +} + +func TestFindLocation_unknownLocation(t *testing.T) { + location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1} + location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1} + + locations := newPythonLocations() + putPythonLocation(locations, dyn.MustPathFromString("foo"), location0) + putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1) + + _, exists := findPythonLocation(locations, dyn.MustPathFromString("bar")) + + assert.False(t, exists) +} + +func TestParsePythonLocations(t *testing.T) { + expected := dyn.Location{File: "foo.py", Line: 1, Column: 2} + + input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}` + reader := bytes.NewReader([]byte(input)) + locations, err := parsePythonLocations(reader) + + assert.NoError(t, err) + + assert.True(t, locations.keys["foo"].exists) + assert.Equal(t, expected, locations.keys["foo"].location) +} diff --git a/bundle/config/mutator/python/python_mutator.go b/bundle/config/mutator/python/python_mutator.go index fbf3b7e0be..c111e0e547 100644 --- a/bundle/config/mutator/python/python_mutator.go +++ b/bundle/config/mutator/python/python_mutator.go @@ -10,6 +10,8 @@ import ( "os" "path/filepath" + "github.com/databricks/cli/bundle/config/mutator/paths" + "github.com/databricks/databricks-sdk-go/logger" "github.com/fatih/color" @@ -108,7 +110,12 @@ func (m *pythonMutator) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagno return dyn.InvalidValue, fmt.Errorf("failed to create cache dir: %w", err) } - rightRoot, diags := m.runPythonMutator(ctx, cacheDir, b.RootPath, pythonPath, leftRoot) + rightRoot, diags := m.runPythonMutator(ctx, leftRoot, runPythonMutatorOpts{ + cacheDir: cacheDir, + rootPath: b.RootPath, + pythonPath: pythonPath, + loadLocations: experimental.PyDABs.LoadLocations, + }) mutateDiags = diags if diags.HasError() { return dyn.InvalidValue, mutateDiagsHasError @@ -152,13 +159,21 @@ func createCacheDir(ctx context.Context) (string, error) { return os.MkdirTemp("", "-pydabs") } -func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, rootPath string, pythonPath string, root dyn.Value) (dyn.Value, diag.Diagnostics) { - inputPath := filepath.Join(cacheDir, "input.json") - outputPath := filepath.Join(cacheDir, "output.json") - diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") +type runPythonMutatorOpts struct { + cacheDir string + rootPath string + pythonPath string + loadLocations bool +} + +func (m *pythonMutator) runPythonMutator(ctx context.Context, root dyn.Value, opts runPythonMutatorOpts) (dyn.Value, diag.Diagnostics) { + inputPath := filepath.Join(opts.cacheDir, "input.json") + outputPath := filepath.Join(opts.cacheDir, "output.json") + diagnosticsPath := filepath.Join(opts.cacheDir, "diagnostics.json") + locationsPath := filepath.Join(opts.cacheDir, "locations.json") args := []string{ - pythonPath, + opts.pythonPath, "-m", "databricks.bundles.build", "--phase", @@ -171,6 +186,10 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r diagnosticsPath, } + if opts.loadLocations { + args = append(args, "--locations", locationsPath) + } + if err := writeInputFile(inputPath, root); err != nil { return dyn.InvalidValue, diag.Errorf("failed to write input file: %s", err) } @@ -185,7 +204,7 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r _, processErr := process.Background( ctx, args, - process.WithDir(rootPath), + process.WithDir(opts.rootPath), process.WithStderrWriter(stderrWriter), process.WithStdoutWriter(stdoutWriter), ) @@ -221,7 +240,12 @@ func (m *pythonMutator) runPythonMutator(ctx context.Context, cacheDir string, r return dyn.InvalidValue, diag.Errorf("failed to load diagnostics: %s", pythonDiagnosticsErr) } - output, outputDiags := loadOutputFile(rootPath, outputPath) + locations, err := loadLocationsFile(locationsPath) + if err != nil { + return dyn.InvalidValue, diag.Errorf("failed to load locations: %s", err) + } + + output, outputDiags := loadOutputFile(opts.rootPath, outputPath, locations) pythonDiagnostics = pythonDiagnostics.Extend(outputDiags) // we pass through pythonDiagnostic because it contains warnings @@ -266,7 +290,23 @@ func writeInputFile(inputPath string, input dyn.Value) error { return os.WriteFile(inputPath, rootConfigJson, 0600) } -func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnostics) { +// loadLocationsFile loads locations.json containing source locations for generated YAML. +func loadLocationsFile(locationsPath string) (*pythonLocations, error) { + if _, err := os.Stat(locationsPath); os.IsNotExist(err) { + return newPythonLocations(), nil + } + + locationsFile, err := os.Open(locationsPath) + if err != nil { + return nil, fmt.Errorf("failed to open locations file: %w", err) + } + + defer locationsFile.Close() + + return parsePythonLocations(locationsFile) +} + +func loadOutputFile(rootPath string, outputPath string, locations *pythonLocations) (dyn.Value, diag.Diagnostics) { outputFile, err := os.Open(outputPath) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to open output file: %w", err)) @@ -282,7 +322,7 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnos // Error: path /var/folders/.../pydabs/dist/*.whl is not contained in bundle root path // // for that, we pass virtualPath instead of outputPath as file location - virtualPath, err := filepath.Abs(filepath.Join(rootPath, "__generated_by_pydabs__.yml")) + virtualPath, err := filepath.Abs(filepath.Join(rootPath, generatedFileName)) if err != nil { return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to get absolute path: %w", err)) } @@ -292,7 +332,25 @@ func loadOutputFile(rootPath string, outputPath string) (dyn.Value, diag.Diagnos return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to parse output file: %w", err)) } - return strictNormalize(config.Root{}, generated) + // paths are resolved relative to locations of their values, if we change location + // we have to update each path, until we simplify that, we don't update locations + // for such values, so we don't change how paths are resolved + _, err = paths.VisitJobPaths(generated, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { + putPythonLocation(locations, p, v.Location()) + return v, nil + }) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + // generated has dyn.Location as if it comes from generated YAML file + // earlier we loaded locations.json with source locations in Python code + generatedWithLocations, err := mergePythonLocations(generated, locations) + if err != nil { + return dyn.InvalidValue, diag.FromErr(fmt.Errorf("failed to update locations: %w", err)) + } + + return strictNormalize(config.Root{}, generatedWithLocations) } func strictNormalize(dst any, generated dyn.Value) (dyn.Value, diag.Diagnostics) { diff --git a/bundle/config/mutator/python/python_mutator_test.go b/bundle/config/mutator/python/python_mutator_test.go index bf12b24997..c01ce2db7f 100644 --- a/bundle/config/mutator/python/python_mutator_test.go +++ b/bundle/config/mutator/python/python_mutator_test.go @@ -6,7 +6,6 @@ import ( "os" "os/exec" "path/filepath" - "reflect" "runtime" "testing" @@ -47,6 +46,7 @@ func TestPythonMutator_load(t *testing.T) { pydabs: enabled: true venv_path: .venv + load_locations: true resources: jobs: job0: @@ -65,7 +65,8 @@ func TestPythonMutator_load(t *testing.T) { "experimental": { "pydabs": { "enabled": true, - "venv_path": ".venv" + "venv_path": ".venv", + "load_locations": true } }, "resources": { @@ -80,6 +81,8 @@ func TestPythonMutator_load(t *testing.T) { } }`, `{"severity": "warning", "summary": "job doesn't have any tasks", "location": {"file": "src/examples/file.py", "line": 10, "column": 5}}`, + `{"path": "resources.jobs.job0", "file": "src/examples/job0.py", "line": 3, "column": 5} + {"path": "resources.jobs.job1", "file": "src/examples/job1.py", "line": 5, "column": 7}`, ) mutator := PythonMutator(PythonMutatorPhaseLoad) @@ -97,6 +100,25 @@ func TestPythonMutator_load(t *testing.T) { assert.Equal(t, "job_1", job1.Name) } + // output of locations.json should be applied to underlying dyn.Value + err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) { + name1, err := dyn.GetByPath(v, dyn.MustPathFromString("resources.jobs.job1.name")) + if err != nil { + return dyn.InvalidValue, err + } + + assert.Equal(t, []dyn.Location{ + { + File: "src/examples/job1.py", + Line: 5, + Column: 7, + }, + }, name1.Locations()) + + return v, nil + }) + assert.NoError(t, err) + assert.Equal(t, 1, len(diags)) assert.Equal(t, "job doesn't have any tasks", diags[0].Summary) assert.Equal(t, []dyn.Location{ @@ -106,7 +128,6 @@ func TestPythonMutator_load(t *testing.T) { Column: 5, }, }, diags[0].Locations) - } func TestPythonMutator_load_disallowed(t *testing.T) { @@ -146,7 +167,7 @@ func TestPythonMutator_load_disallowed(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) @@ -191,7 +212,7 @@ func TestPythonMutator_init(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseInit) diag := bundle.Apply(ctx, b, mutator) @@ -252,7 +273,7 @@ func TestPythonMutator_badOutput(t *testing.T) { } } } - }`, "") + }`, "", "") mutator := PythonMutator(PythonMutatorPhaseLoad) diag := bundle.Apply(ctx, b, mutator) @@ -588,7 +609,7 @@ or activate the environment before running CLI commands: assert.Equal(t, expected, out) } -func withProcessStub(t *testing.T, args []string, output string, diagnostics string) context.Context { +func withProcessStub(t *testing.T, args []string, output string, diagnostics string, locations string) context.Context { ctx := context.Background() ctx, stub := process.WithStub(ctx) @@ -600,32 +621,51 @@ func withProcessStub(t *testing.T, args []string, output string, diagnostics str inputPath := filepath.Join(cacheDir, "input.json") outputPath := filepath.Join(cacheDir, "output.json") + locationsPath := filepath.Join(cacheDir, "locations.json") diagnosticsPath := filepath.Join(cacheDir, "diagnostics.json") - args = append(args, "--input", inputPath) - args = append(args, "--output", outputPath) - args = append(args, "--diagnostics", diagnosticsPath) - stub.WithCallback(func(actual *exec.Cmd) error { _, err := os.Stat(inputPath) assert.NoError(t, err) - if reflect.DeepEqual(actual.Args, args) { - err := os.WriteFile(outputPath, []byte(output), 0600) - require.NoError(t, err) + actualInputPath := getArg(actual.Args, "--input") + actualOutputPath := getArg(actual.Args, "--output") + actualDiagnosticsPath := getArg(actual.Args, "--diagnostics") + actualLocationsPath := getArg(actual.Args, "--locations") - err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600) - require.NoError(t, err) + require.Equal(t, inputPath, actualInputPath) + require.Equal(t, outputPath, actualOutputPath) + require.Equal(t, diagnosticsPath, actualDiagnosticsPath) - return nil - } else { - return fmt.Errorf("unexpected command: %v", actual.Args) + // locations is an optional argument + if locations != "" { + require.Equal(t, locationsPath, actualLocationsPath) + + err = os.WriteFile(locationsPath, []byte(locations), 0600) + require.NoError(t, err) } + + err = os.WriteFile(outputPath, []byte(output), 0600) + require.NoError(t, err) + + err = os.WriteFile(diagnosticsPath, []byte(diagnostics), 0600) + require.NoError(t, err) + + return nil }) return ctx } +func getArg(args []string, name string) string { + for i := 0; i < len(args); i++ { + if args[i] == name { + return args[i+1] + } + } + return "" +} + func loadYaml(name string, content string) *bundle.Bundle { v, diag := config.LoadFromBytes(name, []byte(content)) diff --git a/bundle/config/mutator/translate_paths_jobs.go b/bundle/config/mutator/translate_paths_jobs.go index e34eeb2f05..c29ff0ea95 100644 --- a/bundle/config/mutator/translate_paths_jobs.go +++ b/bundle/config/mutator/translate_paths_jobs.go @@ -4,97 +4,11 @@ import ( "fmt" "slices" - "github.com/databricks/cli/bundle/libraries" + "github.com/databricks/cli/bundle/config/mutator/paths" + "github.com/databricks/cli/libs/dyn" ) -type jobRewritePattern struct { - pattern dyn.Pattern - fn rewriteFunc - skipRewrite func(string) bool -} - -func noSkipRewrite(string) bool { - return false -} - -func rewritePatterns(t *translateContext, base dyn.Pattern) []jobRewritePattern { - return []jobRewritePattern{ - { - base.Append(dyn.Key("notebook_task"), dyn.Key("notebook_path")), - t.translateNotebookPath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("spark_python_task"), dyn.Key("python_file")), - t.translateFilePath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("dbt_task"), dyn.Key("project_directory")), - t.translateDirectoryPath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("sql_task"), dyn.Key("file"), dyn.Key("path")), - t.translateFilePath, - noSkipRewrite, - }, - { - base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("whl")), - t.translateNoOp, - noSkipRewrite, - }, - { - base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("jar")), - t.translateNoOp, - noSkipRewrite, - }, - { - base.Append(dyn.Key("libraries"), dyn.AnyIndex(), dyn.Key("requirements")), - t.translateFilePath, - noSkipRewrite, - }, - } -} - -func (t *translateContext) jobRewritePatterns() []jobRewritePattern { - // Base pattern to match all tasks in all jobs. - base := dyn.NewPattern( - dyn.Key("resources"), - dyn.Key("jobs"), - dyn.AnyKey(), - dyn.Key("tasks"), - dyn.AnyIndex(), - ) - - // Compile list of patterns and their respective rewrite functions. - jobEnvironmentsPatterns := []jobRewritePattern{ - { - dyn.NewPattern( - dyn.Key("resources"), - dyn.Key("jobs"), - dyn.AnyKey(), - dyn.Key("environments"), - dyn.AnyIndex(), - dyn.Key("spec"), - dyn.Key("dependencies"), - dyn.AnyIndex(), - ), - t.translateNoOpWithPrefix, - func(s string) bool { - return !libraries.IsLibraryLocal(s) - }, - }, - } - - taskPatterns := rewritePatterns(t, base) - forEachPatterns := rewritePatterns(t, base.Append(dyn.Key("for_each_task"), dyn.Key("task"))) - allPatterns := append(taskPatterns, jobEnvironmentsPatterns...) - allPatterns = append(allPatterns, forEachPatterns...) - return allPatterns -} - func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) { var err error @@ -111,30 +25,41 @@ func (t *translateContext) applyJobTranslations(v dyn.Value) (dyn.Value, error) } } - for _, rewritePattern := range t.jobRewritePatterns() { - v, err = dyn.MapByPattern(v, rewritePattern.pattern, func(p dyn.Path, v dyn.Value) (dyn.Value, error) { - key := p[2].Key() + return paths.VisitJobPaths(v, func(p dyn.Path, kind paths.PathKind, v dyn.Value) (dyn.Value, error) { + key := p[2].Key() - // Skip path translation if the job is using git source. - if slices.Contains(ignore, key) { - return v, nil - } + // Skip path translation if the job is using git source. + if slices.Contains(ignore, key) { + return v, nil + } - dir, err := v.Location().Directory() - if err != nil { - return dyn.InvalidValue, fmt.Errorf("unable to determine directory for job %s: %w", key, err) - } + dir, err := v.Location().Directory() + if err != nil { + return dyn.InvalidValue, fmt.Errorf("unable to determine directory for job %s: %w", key, err) + } - sv := v.MustString() - if rewritePattern.skipRewrite(sv) { - return v, nil - } - return t.rewriteRelativeTo(p, v, rewritePattern.fn, dir, fallback[key]) - }) + rewritePatternFn, err := t.getRewritePatternFn(kind) if err != nil { return dyn.InvalidValue, err } + + return t.rewriteRelativeTo(p, v, rewritePatternFn, dir, fallback[key]) + }) +} + +func (t *translateContext) getRewritePatternFn(kind paths.PathKind) (rewriteFunc, error) { + switch kind { + case paths.PathKindLibrary: + return t.translateNoOp, nil + case paths.PathKindNotebook: + return t.translateNotebookPath, nil + case paths.PathKindWorkspaceFile: + return t.translateFilePath, nil + case paths.PathKindDirectory: + return t.translateDirectoryPath, nil + case paths.PathKindWithPrefix: + return t.translateNoOpWithPrefix, nil } - return v, nil + return nil, fmt.Errorf("unsupported path kind: %d", kind) }