Skip to content

Commit

Permalink
oci/cas/dir: Backstop flock with pruneExpire in Clean
Browse files Browse the repository at this point in the history
Following Git and defaulting to two weeks [1], as discussed in [2].
This allows the use of other non-flocking consumers (e.g. other CAS
implementations) in the same base directory.

The added robustness comes with a price, though, since we now have to
balance cleanliness vs. robustness for those other consumers.

* The hard-coded two-week cutoff may be insufficient for some use
  cases.  Maybe somebody will need to download a huge layer through a
  very slow pipe, and they'll bump into this limit.  Or maybe they
  have a high-cruft workflow or a small disk and would like to shorten
  this limit.  If that happens or we become concerned that it might,
  we should make pruneExpire configurable.

* Increasing pruneExpire protects more flock-less consumers from
  Clean, but also means that cruft can survive for longer before Clean
  is confident enough to remove it.  Setting an infite pruneExpire
  would be very safe but would make Clean a no-op.  Two weeks seems
  like a safe choice, since well-behaved consumers will clean up after
  themselves when they are closed.  Clean is just handling
  poorly-behaved consumers (or well-behaved consumers that had a hard
  shutdown).  I don't expect cruft to build up quickly enough for the
  two-week default to be an issue for most users.

Consumers who do not wish to rely on pruneExpire (perhaps they have
long-running operations or are locally setting pruneExpire very short)
can continue to flock their temporary files and directories to protect
them from Clean.  Flocking a directory will also protect all content
within that directory.

Removal errors (because of insufficient permissions, etc.) seemed like
they should be non-fatal so we could continue to remove other cruft.
However, I didn't want to silently ignore the failed removal.  In this
commit, I log those errors with a "warning" level.

The order of walk means that an old directory containing an old file
will not be removed in a single pass.  The first Clean will fail to
remove the old directory because it is not empty, but will remove the
old file (assuming it has sufficient permissions, etc.).  A second
Clean pass will remove the now-empty old directory.  With cruft
building slowly and Clean running fairly frequently, the delated
old-directory removal didn't seem like a big enough issue to warrant
an immediate re-clean attempt.

[1]: https://git-scm.com/docs/git-gc
[2]: https://github.com/openSUSE/umoci/issues/17#issuecomment-258686340

Signed-off-by: W. Trevor King <wking@tremily.us>
  • Loading branch information
wking committed Oct 18, 2017
1 parent 07263d2 commit 8aec391
Show file tree
Hide file tree
Showing 2 changed files with 113 additions and 44 deletions.
86 changes: 52 additions & 34 deletions oci/cas/dir/dir.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ import (
"io/ioutil"
"os"
"path/filepath"
"time"

"github.com/apex/log"
"github.com/openSUSE/umoci/oci/cas"
"github.com/opencontainers/go-digest"
imeta "github.com/opencontainers/image-spec/specs-go"
Expand Down Expand Up @@ -70,9 +72,10 @@ func blobPath(digest digest.Digest) (string, error) {
}

type dirEngine struct {
path string
temp string
tempFile *os.File
temp string
path string
tempFile *os.File
pruneExpire time.Duration
}

func (e *dirEngine) ensureTempDir() error {
Expand Down Expand Up @@ -299,44 +302,58 @@ func (e *dirEngine) ListBlobs(ctx context.Context) ([]digest.Digest, error) {
func (e *dirEngine) Clean(ctx context.Context) error {
// Effectively we are going to remove every directory except the standard
// directories, unless they have a lock already.
fh, err := os.Open(e.path)
if err != nil {
return errors.Wrap(err, "open imagedir")
}
defer fh.Close()

children, err := fh.Readdir(-1)
if err != nil {
return errors.Wrap(err, "readdir imagedir")
}

for _, child := range children {
// Skip any children that are expected to exist.
switch child.Name() {
case blobDirectory, indexFile, layoutFile:
continue
return filepath.Walk(e.path, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}

// Try to get a lock on the directory.
path := filepath.Join(e.path, child.Name())
cfh, err := os.Open(path)
rel, err := filepath.Rel(e.path, path)
if err != nil {
// Ignore errors because it might've been deleted underneath us.
continue
return errors.Wrap(err, "relative path")
}
defer cfh.Close()

if err := unix.Flock(int(cfh.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
// If we fail to get a flock(2) then it's probably already locked,
// so we shouldn't touch it.
continue
isDir := info.Mode().IsDir()
if isDir && rel == blobDirectory {
return filepath.SkipDir
}

if info.Mode().IsRegular() && (rel == indexFile || rel == layoutFile) {
return nil
}
defer unix.Flock(int(cfh.Fd()), unix.LOCK_UN)

if err := os.RemoveAll(path); err != nil {
return errors.Wrap(err, "remove garbage path")
if time.Since(info.ModTime()) > e.pruneExpire {
err = e.cleanPath(ctx, path)
if isDir || err == filepath.SkipDir {
return filepath.SkipDir
}
if err != nil {
return err
}
}

return nil
})
}

func (e *dirEngine) cleanPath(ctx context.Context, path string) (err error) {
cfh, err := os.Open(path)
if err != nil && !os.IsNotExist(err) {
return errors.Wrap(err, "open for locking")
}
defer cfh.Close()

if err := unix.Flock(int(cfh.Fd()), unix.LOCK_EX|unix.LOCK_NB); err != nil {
// If we fail to get a flock(2) then it's probably already locked,
// so we shouldn't touch it.
return filepath.SkipDir
}
defer unix.Flock(int(cfh.Fd()), unix.LOCK_UN)

err = os.Remove(path)
if err != nil && !os.IsNotExist(err) {
log.Warnf("failed to remove %s (%s)", path, err)
}
log.Debugf("cleaned %s", path)

return nil
}
Expand All @@ -362,8 +379,9 @@ func (e *dirEngine) Close() error {
// the provided path.
func Open(path string) (cas.Engine, error) {
engine := &dirEngine{
path: path,
temp: "",
path: path,
temp: "",
pruneExpire: 14 * 24 * time.Hour,
}

if err := engine.validate(); err != nil {
Expand Down
71 changes: 61 additions & 10 deletions oci/cas/dir/dir_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"os"
"path/filepath"
"testing"
"time"

"github.com/openSUSE/umoci/oci/cas"
"github.com/pkg/errors"
Expand Down Expand Up @@ -236,8 +237,49 @@ func TestEngineGCLocking(t *testing.T) {
t.Errorf("engine doesn't have a tempdir after putting a blob!")
}

// Create tempdir to make sure things work.
removedDir, err := ioutil.TempDir(image, "testdir")
epoch := time.Unix(0, 0)
err = os.Chtimes(engine.(*dirEngine).temp, epoch, epoch)
if err != nil {
t.Fatal(err)
}

// Create new and old directories and files to make sure things work.
newTestDir, err := ioutil.TempDir(image, "newtestdir")
if err != nil {
t.Fatal(err)
}

newTestFile, err := ioutil.TempFile(newTestDir, "newtestfile")
if err != nil {
t.Fatal(err)
}
newTestFileName := newTestFile.Name()
err = newTestFile.Close()
if err != nil {
t.Fatal(err)
}

oldTestFile, err := ioutil.TempFile(newTestDir, "oldtestfile")
if err != nil {
t.Fatal(err)
}
oldTestFileName := oldTestFile.Name()
err = oldTestFile.Close()
if err != nil {
t.Fatal(err)
}

err = os.Chtimes(oldTestFileName, epoch, epoch)
if err != nil {
t.Fatal(err)
}

oldTestDir, err := ioutil.TempDir(image, "oldtestdir")
if err != nil {
t.Fatal(err)
}

err = os.Chtimes(oldTestDir, epoch, epoch)
if err != nil {
t.Fatal(err)
}
Expand All @@ -253,15 +295,24 @@ func TestEngineGCLocking(t *testing.T) {
t.Fatalf("unexpected error while GCing image: %+v", err)
}

// Make sure that engine.temp is still around.
if _, err := os.Lstat(engine.(*dirEngine).temp); err != nil {
t.Errorf("expected active direngine.temp to still exist after GC: %+v", err)
for _, path := range []string{
engine.(*dirEngine).temp,
newTestDir,
newTestFileName,
} {
if _, err := os.Lstat(path); err != nil {
t.Errorf("expected %s to still exist after GC: %+v", path, err)
}
}

// Make sure that removedDir is still around
if _, err := os.Lstat(removedDir); err == nil {
t.Errorf("expected inactive temporary dir to not exist after GC")
} else if !os.IsNotExist(errors.Cause(err)) {
t.Errorf("expected IsNotExist for temporary dir after GC: %+v", err)
for _, path := range []string{
oldTestDir,
oldTestFileName,
} {
if _, err := os.Lstat(path); err == nil {
t.Errorf("expected %s to not exist after GC", path)
} else if !os.IsNotExist(errors.Cause(err)) {
t.Errorf("expected IsNotExist for %s after GC: %+v", path, err)
}
}
}

0 comments on commit 8aec391

Please sign in to comment.