Skip to content

Commit

Permalink
Support Python wheels larger than 10MB (#879)
Browse files Browse the repository at this point in the history
## Changes
Previously we only supported uploading Python wheels smaller than 10mb
due to using Workspace.Import API and `content ` field
https://docs.databricks.com/api/workspace/workspace/import

By switching to use `WorkspaceFilesClient` we overcome the limit because
it uses POST body for the API instead.

## Tests
`TestAccUploadArtifactFileToCorrectRemotePath` integration test passes

```
=== RUN   TestAccUploadArtifactFileToCorrectRemotePath
    artifacts_test.go:28: gcp
2023/10/17 15:24:04 INFO Using Google Credentials sdk=true
    helpers.go:356: Creating /Users/.../integration-test-wsfs-ekggbkcfdkid
artifacts.Upload(test.whl): Uploading...
2023/10/17 15:24:06 INFO Using Google Credentials mutator=artifacts.Upload(test) sdk=true
artifacts.Upload(test.whl): Upload succeeded
    helpers.go:362: Removing /Users/.../integration-test-wsfs-ekggbkcfdkid
--- PASS: TestAccUploadArtifactFileToCorrectRemotePath (5.66s)
PASS
coverage: 14.9% of statements in ./...
ok      github.com/databricks/cli/internal      6.109s  coverage: 14.9% of statements in ./...
```
  • Loading branch information
andrewnester authored Oct 18, 2023
1 parent 1b992c0 commit 5273d0c
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 146 deletions.
47 changes: 24 additions & 23 deletions bundle/artifacts/artifacts.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package artifacts

import (
"bytes"
"context"
"crypto/sha256"
"encoding/base64"
"errors"
"fmt"
"os"
Expand All @@ -14,7 +14,7 @@ import (
"github.com/databricks/cli/bundle/artifacts/whl"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/databricks/cli/libs/filer"
)

type mutatorFactory = func(name string) bundle.Mutator
Expand Down Expand Up @@ -83,7 +83,7 @@ func BasicUpload(name string) bundle.Mutator {
}

func (m *basicUpload) Name() string {
return fmt.Sprintf("artifacts.Build(%s)", m.name)
return fmt.Sprintf("artifacts.Upload(%s)", m.name)
}

func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
Expand All @@ -96,21 +96,32 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
return fmt.Errorf("artifact source is not configured: %s", m.name)
}

err := uploadArtifact(ctx, artifact, b)
uploadPath, err := getUploadBasePath(b)
if err != nil {
return err
}

client, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), uploadPath)
if err != nil {
return err
}

err = uploadArtifact(ctx, artifact, uploadPath, client)
if err != nil {
return fmt.Errorf("artifacts.Upload(%s): %w", m.name, err)
}

return nil
}

func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) error {
func uploadArtifact(ctx context.Context, a *config.Artifact, uploadPath string, client filer.Filer) error {
for i := range a.Files {
f := &a.Files[i]
if f.NeedsUpload() {
filename := filepath.Base(f.Source)
cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Uploading...", filename))
remotePath, err := uploadArtifactFile(ctx, f.Source, b)

remotePath, err := uploadArtifactFile(ctx, f.Source, uploadPath, client)
if err != nil {
return err
}
Expand All @@ -125,32 +136,22 @@ func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) e
}

// Function to upload artifact file to Workspace
func uploadArtifactFile(ctx context.Context, file string, b *bundle.Bundle) (string, error) {
func uploadArtifactFile(ctx context.Context, file string, uploadPath string, client filer.Filer) (string, error) {
raw, err := os.ReadFile(file)
if err != nil {
return "", fmt.Errorf("unable to read %s: %w", file, errors.Unwrap(err))
}

uploadPath, err := getUploadBasePath(b)
if err != nil {
return "", err
}

fileHash := sha256.Sum256(raw)
remotePath := path.Join(uploadPath, fmt.Sprintf("%x", fileHash), filepath.Base(file))
// Make sure target directory exists.
err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(remotePath))
relPath := path.Join(fmt.Sprintf("%x", fileHash), filepath.Base(file))
remotePath := path.Join(uploadPath, relPath)

err = client.Mkdir(ctx, path.Dir(relPath))
if err != nil {
return "", fmt.Errorf("unable to create directory for %s: %w", remotePath, err)
return "", fmt.Errorf("unable to import %s: %w", remotePath, err)
}

// Import to workspace.
err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{
Path: remotePath,
Overwrite: true,
Format: workspace.ImportFormatAuto,
Content: base64.StdEncoding.EncodeToString(raw),
})
err = client.Write(ctx, relPath, bytes.NewReader(raw), filer.OverwriteIfExists, filer.CreateParentDirectories)
if err != nil {
return "", fmt.Errorf("unable to import %s: %w", remotePath, err)
}
Expand Down
123 changes: 0 additions & 123 deletions bundle/artifacts/artifacts_test.go

This file was deleted.

69 changes: 69 additions & 0 deletions internal/bundle/artifacts_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package bundle

import (
"context"
"os"
"path"
"path/filepath"
"regexp"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/artifacts"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/internal"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/stretchr/testify/require"
)

func touchEmptyFile(t *testing.T, path string) {
err := os.MkdirAll(filepath.Dir(path), 0700)
require.NoError(t, err)
f, err := os.Create(path)
require.NoError(t, err)
f.Close()
}

func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) {
t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV"))

dir := t.TempDir()
whlPath := filepath.Join(dir, "dist", "test.whl")
touchEmptyFile(t, whlPath)

artifact := &config.Artifact{
Type: "whl",
Files: []config.ArtifactFile{
{
Source: whlPath,
Libraries: []*compute.Library{
{Whl: "dist\\test.whl"},
},
},
},
}

w := databricks.Must(databricks.NewWorkspaceClient())
wsDir := internal.TemporaryWorkspaceDir(t, w)

b := &bundle.Bundle{
Config: config.Root{
Path: dir,
Bundle: config.Bundle{
Target: "whatever",
},
Workspace: config.Workspace{
ArtifactsPath: wsDir,
},
Artifacts: config.Artifacts{
"test": artifact,
},
},
}

err := bundle.Apply(context.Background(), b, artifacts.BasicUpload("test"))
require.NoError(t, err)
require.Regexp(t, regexp.MustCompile(path.Join(wsDir, ".internal/[a-z0-9]+/test.whl")), artifact.Files[0].RemotePath)
require.Regexp(t, regexp.MustCompile(path.Join("/Workspace", wsDir, ".internal/[a-z0-9]+/test.whl")), artifact.Files[0].Libraries[0].Whl)
}

0 comments on commit 5273d0c

Please sign in to comment.