Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add integration test for mlops-stacks initialization #1155

Merged
merged 11 commits into from
Mar 12, 2024
5 changes: 3 additions & 2 deletions bundle/bundle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"testing"

"github.com/databricks/cli/bundle/env"
"github.com/databricks/cli/internal/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand Down Expand Up @@ -86,7 +87,7 @@ func TestBundleMustLoadFailureWithEnv(t *testing.T) {
}

func TestBundleMustLoadFailureIfNotFound(t *testing.T) {
chdir(t, t.TempDir())
testutil.Chdir(t, t.TempDir())
_, err := MustLoad(context.Background())
require.Error(t, err, "unable to find bundle root")
}
Expand All @@ -105,7 +106,7 @@ func TestBundleTryLoadFailureWithEnv(t *testing.T) {
}

func TestBundleTryLoadOkIfNotFound(t *testing.T) {
chdir(t, t.TempDir())
testutil.Chdir(t, t.TempDir())
b, err := TryLoad(context.Background())
assert.NoError(t, err)
assert.Nil(t, b)
Expand Down
35 changes: 8 additions & 27 deletions bundle/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,11 @@ import (

"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/env"
"github.com/databricks/cli/internal/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// Changes into specified directory for the duration of the test.
// Returns the current working directory.
func chdir(t *testing.T, dir string) string {
wd, err := os.Getwd()
require.NoError(t, err)

abs, err := filepath.Abs(dir)
require.NoError(t, err)

err = os.Chdir(abs)
require.NoError(t, err)

t.Cleanup(func() {
err := os.Chdir(wd)
require.NoError(t, err)
})

return wd
}

func TestRootFromEnv(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
Expand Down Expand Up @@ -83,7 +64,7 @@ func TestRootLookup(t *testing.T) {
t.Setenv(env.RootVariable, "")
os.Unsetenv(env.RootVariable)

chdir(t, t.TempDir())
testutil.Chdir(t, t.TempDir())

// Create databricks.yml file.
f, err := os.Create(config.FileNames[0])
Expand All @@ -95,7 +76,7 @@ func TestRootLookup(t *testing.T) {
require.NoError(t, err)

// It should find the project root from $PWD.
wd := chdir(t, "./a/b/c")
wd := testutil.Chdir(t, "./a/b/c")
root, err := mustGetRoot(ctx)
require.NoError(t, err)
require.Equal(t, wd, root)
Expand All @@ -109,14 +90,14 @@ func TestRootLookupError(t *testing.T) {
os.Unsetenv(env.RootVariable)

// It can't find a project root from a temporary directory.
_ = chdir(t, t.TempDir())
_ = testutil.Chdir(t, t.TempDir())
_, err := mustGetRoot(ctx)
require.ErrorContains(t, err, "unable to locate bundle root")
}

func TestLoadYamlWhenIncludesEnvPresent(t *testing.T) {
ctx := context.Background()
chdir(t, filepath.Join(".", "tests", "basic"))
testutil.Chdir(t, filepath.Join(".", "tests", "basic"))
t.Setenv(env.IncludesVariable, "test")

bundle, err := MustLoad(ctx)
Expand All @@ -131,7 +112,7 @@ func TestLoadYamlWhenIncludesEnvPresent(t *testing.T) {
func TestLoadDefautlBundleWhenNoYamlAndRootAndIncludesEnvPresent(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
chdir(t, dir)
testutil.Chdir(t, dir)
t.Setenv(env.RootVariable, dir)
t.Setenv(env.IncludesVariable, "test")

Expand All @@ -143,7 +124,7 @@ func TestLoadDefautlBundleWhenNoYamlAndRootAndIncludesEnvPresent(t *testing.T) {
func TestErrorIfNoYamlNoRootEnvAndIncludesEnvPresent(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
chdir(t, dir)
testutil.Chdir(t, dir)
t.Setenv(env.IncludesVariable, "test")

_, err := MustLoad(ctx)
Expand All @@ -153,7 +134,7 @@ func TestErrorIfNoYamlNoRootEnvAndIncludesEnvPresent(t *testing.T) {
func TestErrorIfNoYamlNoIncludesEnvAndRootEnvPresent(t *testing.T) {
ctx := context.Background()
dir := t.TempDir()
chdir(t, dir)
testutil.Chdir(t, dir)
t.Setenv(env.RootVariable, dir)

_, err := MustLoad(ctx)
Expand Down
83 changes: 83 additions & 0 deletions internal/init_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
package internal

import (
"context"
"encoding/json"
"os"
"path/filepath"
"strconv"
"testing"

"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/internal/testutil"
"github.com/databricks/databricks-sdk-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestAccBundleInitErrorOnUnknownFields(t *testing.T) {
Expand All @@ -13,3 +22,77 @@ func TestAccBundleInitErrorOnUnknownFields(t *testing.T) {
_, _, err := RequireErrorRun(t, "bundle", "init", "./testdata/init/field-does-not-exist", "--output-dir", tmpDir)
assert.EqualError(t, err, "failed to compute file content for bar.tmpl. variable \"does_not_exist\" not defined")
}

// This test tests the MLOps Stacks DAB e2e and thus there's a couple of special
// considerations to take note of:
//
// 1. Upstream changes to the MLOps Stacks DAB can cause this test to fail.
// In which case we should do one of:
// (a) Update this test to reflect the changes
// (b) Update the MLOps Stacks DAB to not break this test. Skip this test
// temporarily until the MLOps Stacks DAB is updated
//
// 2. While rare and to be avoided if possible, the CLI reserves the right to
// make changes that can break the MLOps Stacks DAB. In which case we should
// skip this test until the MLOps Stacks DAB is updated to work again.
func TestAccBundleInitOnMlopsStacks(t *testing.T) {
shreyas-goenka marked this conversation as resolved.
Show resolved Hide resolved
env := GetEnvOrSkipTest(t, "CLOUD_ENV")
if env == "gcp" {
shreyas-goenka marked this conversation as resolved.
Show resolved Hide resolved
t.Skip("MLOps Stacks is not supported in GCP")
}
tmpDir1 := t.TempDir()
tmpDir2 := t.TempDir()

w, err := databricks.NewWorkspaceClient(&databricks.Config{})
require.NoError(t, err)

// Create a config file with the project name and root dir
initConfig := map[string]string{
shreyas-goenka marked this conversation as resolved.
Show resolved Hide resolved
"input_project_name": "project_name",
"input_root_dir": "repo_name",
"input_include_models_in_unity_catalog": "no",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any chance we could run UC models as well? If not a separate test case, then prefer making this yes since it's our more important journey

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

UC tests are possible, but the test will require and the equivalent assertions will need make finer grained assumptions. Needing to set up schema, setting more input parameters like the schema name and catalog name and specifying a target deployment explicitly are a couple I noticed.

Given this level of dependencies on the details, I'd prefer to have the UC test in the MLOps repo itself, so that the tests are managed alongside the template changes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Kk makes sense, can we keep a UC initialization test then? We don't have to deploy/run any of the workflows, but good to make sure we can at least initialize the template

"input_cloud": env,
}
b, err := json.Marshal(initConfig)
require.NoError(t, err)
os.WriteFile(filepath.Join(tmpDir1, "config.json"), b, 0644)

// Run bundle init
assert.NoFileExists(t, filepath.Join(tmpDir2, "repo_name", "project_name", "README.md"))
RequireSuccessfulRun(t, "bundle", "init", "mlops-stacks", "--output-dir", tmpDir2, "--config-file", filepath.Join(tmpDir1, "config.json"))

// Assert that the README.md file was created
assert.FileExists(t, filepath.Join(tmpDir2, "repo_name", "project_name", "README.md"))
assertLocalFileContents(t, filepath.Join(tmpDir2, "repo_name", "project_name", "README.md"), "This directory contains python code, notebooks and ML asset configs related to one ML project.")
assertLocalFileContents(t, filepath.Join(tmpDir2, "repo_name", "project_name", "README.md"), "# project_name")

// Validate the stack
testutil.Chdir(t, filepath.Join(tmpDir2, "repo_name", "project_name"))
RequireSuccessfulRun(t, "bundle", "validate")

// Deploy the stack
RequireSuccessfulRun(t, "bundle", "deploy")
t.Cleanup(func() {
// Delete the stack
RequireSuccessfulRun(t, "bundle", "destroy", "--auto-approve")
})

// Get summary of the bundle deployment
stdout, _ := RequireSuccessfulRun(t, "bundle", "summary", "--output", "json")
summary := &config.Root{}
err = json.Unmarshal(stdout.Bytes(), summary)
require.NoError(t, err)

// Assert resource Ids are not empty
assert.NotEmpty(t, summary.Resources.Experiments["experiment"].ID)
assert.NotEmpty(t, summary.Resources.Models["model"].ID)
assert.NotEmpty(t, summary.Resources.Jobs["batch_inference_job"].ID)
assert.NotEmpty(t, summary.Resources.Jobs["model_training_job"].ID)

// Assert the batch inference job actually exists
batchJobId, err := strconv.ParseInt(summary.Resources.Jobs["batch_inference_job"].ID, 10, 64)
require.NoError(t, err)
job, err := w.Jobs.GetByJobId(context.Background(), batchJobId)
assert.NoError(t, err)
assert.Equal(t, "dev-project_name-batch-inference-job", job.Settings.Name)
}
23 changes: 23 additions & 0 deletions internal/testutil/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@ package testutil

import (
"os"
"path/filepath"
"runtime"
"strings"
"testing"

"github.com/stretchr/testify/require"
)

// CleanupEnvironment sets up a pristine environment containing only $PATH and $HOME.
Expand Down Expand Up @@ -44,3 +47,23 @@ func GetEnvOrSkipTest(t *testing.T, name string) string {
}
return value
}

// Changes into specified directory for the duration of the test.
// Returns the current working directory.
func Chdir(t *testing.T, dir string) string {
wd, err := os.Getwd()
require.NoError(t, err)

abs, err := filepath.Abs(dir)
require.NoError(t, err)

err = os.Chdir(abs)
require.NoError(t, err)

t.Cleanup(func() {
err := os.Chdir(wd)
require.NoError(t, err)
})

return wd
}
5 changes: 4 additions & 1 deletion libs/template/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,10 @@ func (c *config) assignValuesFromFile(path string) error {

// Assigns default values from schema to input config map
func (c *config) assignDefaultValues(r *renderer) error {
for name, property := range c.schema.Properties {
for _, p := range c.schema.OrderedProperties() {
name := p.Name
property := p.Schema

// Config already has a value assigned
if _, ok := c.values[name]; ok {
continue
Expand Down
Loading