Skip to content

Commit

Permalink
Recursively delete empty parent folders (#94)
Browse files Browse the repository at this point in the history
* Recursively delete empty parent folders

* Address race condition and escape for store dir

* Address some PR comments

Co-authored-by: Erin Pentecost <erin.pentecost@lytics.com>
  • Loading branch information
erinpentecost and Erin Pentecost authored Jul 19, 2021
1 parent cb860a0 commit 1c8a12f
Show file tree
Hide file tree
Showing 3 changed files with 154 additions and 14 deletions.
116 changes: 116 additions & 0 deletions localfs/emptydir_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
package localfs

import (
"os"
"path"
"testing"

"github.com/stretchr/testify/require"
)

func TestDirectoryCleanup(t *testing.T) {
testDir := t.TempDir()

makeDummyFile := func(t *testing.T, filePath string) string {
fullPath := path.Join(testDir, filePath)
dir := path.Dir(fullPath)
require.NotEmpty(t, dir)
err := os.MkdirAll(dir, 0755)
require.NoError(t, err)
err = os.WriteFile(fullPath, []byte("don't delete this folder"), 0755)
require.NoError(t, err)
return fullPath
}

fileExists := func(t *testing.T, filePath string) bool {
_, err := os.Stat(filePath)
if err == nil {
return true
}
if os.IsNotExist(err) {
return false
}
require.FailNow(t, "failed to get status of file %s", filePath)
return false
}

require.False(t, fileExists(t, "/heythisdoesntexist/overhere"))

// /testDir
// a/
// dummyfile3
// b/
// c/
// dummyfile1
// dummyfile2
// d/
// dummyfile4

d1 := makeDummyFile(t, "a/b/c/dummyfile1")
d2 := makeDummyFile(t, "a/b/c/dummyfile2")
d3 := makeDummyFile(t, "a/dummyfile3")
d4 := makeDummyFile(t, "a/d/dummyfile4")

l := &LocalStore{storepath: testDir}

t.Run("delete-nonempty-dir", func(t *testing.T) {
err := l.deleteParentDirs(path.Join(testDir, "a/d"))
require.NoError(t, err)
require.True(t, fileExists(t, d1))
require.True(t, fileExists(t, d2))
require.True(t, fileExists(t, d3))
require.True(t, fileExists(t, d4))
})

t.Run("delete-nonempty-nested-child-dir", func(t *testing.T) {
err := l.deleteParentDirs(path.Join(testDir, "a/b/c"))
require.NoError(t, err)
require.True(t, fileExists(t, d1))
require.True(t, fileExists(t, d2))
require.True(t, fileExists(t, d3))
require.True(t, fileExists(t, d4))
})

t.Run("delete-nonempty-nested-parent-dir", func(t *testing.T) {
err := l.deleteParentDirs(path.Join(testDir, "a/b"))
require.NoError(t, err)
require.True(t, fileExists(t, d1))
require.True(t, fileExists(t, d2))
require.True(t, fileExists(t, d3))
require.True(t, fileExists(t, d4))
})

require.NoError(t, os.Remove(d4))

t.Run("delete-empty-dir", func(t *testing.T) {
err := l.deleteParentDirs(d4)
require.NoError(t, err)
require.True(t, fileExists(t, d1))
require.True(t, fileExists(t, d2))
require.True(t, fileExists(t, d3))
require.False(t, fileExists(t, d4))
require.False(t, fileExists(t, path.Join(testDir, "a/d")))
})

require.NoError(t, os.Remove(d1))
require.NoError(t, os.Remove(d2))

t.Run("delete-empty-nested-dir", func(t *testing.T) {
err := l.deleteParentDirs(d2)
require.NoError(t, err)
require.False(t, fileExists(t, d1))
require.False(t, fileExists(t, d2))
require.False(t, fileExists(t, path.Join(testDir, "a/b/c")))
require.False(t, fileExists(t, path.Join(testDir, "a/b")))
require.True(t, fileExists(t, d3))
require.False(t, fileExists(t, d4))
require.False(t, fileExists(t, path.Join(testDir, "a/d")))
})

t.Run("delete-missing-dir", func(t *testing.T) {
err := l.deleteParentDirs(path.Join(testDir, "doesntexist/what"))
require.NoError(t, err)
})

require.True(t, fileExists(t, testDir))
}
47 changes: 33 additions & 14 deletions localfs/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@ import (
"errors"
"fmt"
"io"
"io/fs"
"io/ioutil"
"os"
"path"
"path/filepath"
"strings"
"syscall"
"time"

"github.com/araddon/gou"
Expand Down Expand Up @@ -280,31 +282,48 @@ func (l *LocalStore) Get(ctx context.Context, o string) (cloudstorage.Object, er
func (l *LocalStore) Delete(ctx context.Context, obj string) error {
fo := path.Join(l.storepath, obj)
if err := os.Remove(fo); err != nil {
return fmt.Errorf("removing dir=%s: %w", fo, err)
return fmt.Errorf("removing file=%s: %w", fo, err)
}
mf := fo + ".metadata"
if cloudstorage.Exists(mf) {
if err := os.Remove(mf); err != nil {
return fmt.Errorf("removing dir=%s: %w", mf, err)
return fmt.Errorf("removing file=%s: %w", mf, err)
}
}

// When the last item in a folder is deleted, the folder
// should also be deleted. This matches the behavior in GCS.
dir, err := os.Open(l.storepath)
if err != nil {
return fmt.Errorf("failed to open store dir=%s err=%w", l.storepath, err)
}
if _, err = dir.Readdirnames(1); errors.Is(err, io.EOF) {
dir.Close()
// it's empty, so remove it.
if err := os.Remove(l.storepath); err != nil {
return fmt.Errorf("failed to remove store dir=%s err=%w", l.storepath, err)
return l.deleteParentDirs(fo)
}

// deleteParentDirs deletes all the parent dirs of some filepath
// if those dirs are empty.
func (l *LocalStore) deleteParentDirs(filePath string) error {

for dirName := path.Dir(filePath); len(dirName) > 0; dirName = path.Dir(dirName) {
if dirName == l.storepath {
// top level, stop deleting
return nil
}
} else {
dir.Close()
err := os.Remove(dirName)
if errors.Is(err, os.ErrNotExist) {
// it's already deleted; nothing to do.
return nil
}
// There is no equivalent os.ErrNotEmpty in this version of go.
var perr *fs.PathError
if ok := errors.As(err, &perr); ok {
if sysErr, ok := perr.Err.(syscall.Errno); ok && sysErr == syscall.ENOTEMPTY {
// not empty; quit.
return nil
}
}
// unknown error, return it.
if err != nil {
return fmt.Errorf("failed to remove store dir=%s err=%w", dirName, err)
}
// we deleted an empty folder, so continue
}

return nil
}

Expand Down
5 changes: 5 additions & 0 deletions testutils/testutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,11 @@ func BasicRW(t TestingT, store cloudstorage.Store) {
assert.Equal(t, cloudstorage.ErrObjectNotFound, err)
assert.Equal(t, nil, obj)

// Store should be empty again
all, err = store.List(context.Background(), cloudstorage.NewQueryAll())
assert.NoError(t, err)
assert.NotNil(t, all)
assert.Empty(t, all.Objects)
}

func createFile(t TestingT, store cloudstorage.Store, name, data string) cloudstorage.Object {
Expand Down

0 comments on commit 1c8a12f

Please sign in to comment.