Skip to content

Commit

Permalink
Initial Prototype
Browse files Browse the repository at this point in the history
This initial prototype supports:

- Matching on specified file patterns
- Flat (single-level) or recursive search
- Keeping a specified number of older or newer matches
- Limiting search to specified list of extensions
- Toggling file removal (read-only by default)
- Go modules (vs classic GOPATH setup)

refs #2, #4, #6
  • Loading branch information
atc0005 committed Sep 17, 2019
1 parent 656fa0c commit a0b86df
Show file tree
Hide file tree
Showing 11 changed files with 905 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
*.test
testing/
*.exe

# When building on non-Windows platform
elbow
46 changes: 46 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,48 @@
# elbow

Elbow, Elbow grease.

## Purpose

Prune content matching specific patterns, either in a single directory or
recursively through a directory tree. The primary goal is to use this
application from a cron job to perform routine pruning of generated files that
would otherwise completely clog a filesystem.

## Setup test environment

1. cd /path/to/create/test/files
1. `touch $(cat /path/to/this/repo/sample_files_list_dev_web_app_server.txt)`
1. Build app
1. Pass in path to `/path/to/create/test/files`

## References

The following unordered list of sites/examples provided guidance while
developing this application. Depending on when consulted, the original code
written based on that guidance may no longer be present in the active version
of this application.

### Configuration object

- <https://github.com/go-sql-driver/mysql/blob/877a9775f06853f611fb2d4e817d92479242d1cd/dsn.go#L67>
- <https://github.com/aws/aws-sdk-go/blob/10878ad0389c5b3069815112ce888b191c8cd325/aws/config.go#L251>
- <https://github.com/aws/aws-sdk-go/blob/master/aws/config.go>
- <https://github.com/aws/aws-sdk-go/blob/10878ad0389c5b3069815112ce888b191c8cd325/awstesting/integration/performance/s3GetObject/config.go#L25>
- <https://github.com/aws/aws-sdk-go/blob/10878ad0389c5b3069815112ce888b191c8cd325/awstesting/integration/performance/s3GetObject/main.go#L25>

### Sorting files

- <https://stackoverflow.com/questions/46746862/list-files-in-a-directory-sorted-by-creation-time>

### Path/File Existence

- <https://gist.github.com/mattes/d13e273314c3b3ade33f>

### Slice management

- <https://yourbasic.org/golang/delete-element-slice/>
- <https://stackoverflow.com/questions/37334119/how-to-delete-an-element-from-a-slice-in-golang>
- <https://github.com/golang/go/wiki/SliceTricks>

###
72 changes: 72 additions & 0 deletions config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package main

import (
"github.com/integrii/flaggy"
)

// Config represents a collection of configuration settings for this
// application. Config is created as early as possible upon application
// startup.
type Config struct {
FilePattern string
FileExtensions []string
StartPath string
RecursiveSearch bool
FilesToKeep int
KeepOldest bool
Remove bool
}

// NewConfig returns a new Config pointer that can be chained with builder
// methods to set multiple configuration values inline without using pointers.
func NewConfig() *Config {

// Explicitly initialize with intended defaults
return &Config{
StartPath: "",
FilePattern: "",
// NOTE: This creates an empty slice (not nil since there is an
// underlying array of zero length) FileExtensions: []string{},
//
// Leave at default value of nil slice instead by not providing a
// value here
// FileExtensions: []string,
FilesToKeep: 0,
RecursiveSearch: false,
KeepOldest: false,
Remove: false,
}

}

// SetupFlags applies settings provided by command-line flags
// TODO: Pull out
func (c *Config) SetupFlags(appName string, appDesc string) *Config {

flaggy.SetName(appName)
flaggy.SetDescription(appDesc)

flaggy.DefaultParser.ShowHelpOnUnexpected = true

// Add flags
flaggy.String(&c.StartPath, "p", "path", "Path to process")
flaggy.String(&c.FilePattern, "fp", "pattern", "Substring pattern to compare filenames against. Wildcards are not supported.")
flaggy.StringSlice(&c.FileExtensions, "e", "extension", "Limit search to specified file extension. Specify as needed to match multiple required extensions.")
flaggy.Int(&c.FilesToKeep, "k", "keep", "Keep specified number of matching files")
flaggy.Bool(&c.RecursiveSearch, "r", "recurse", "Perform recursive search into subdirectories")
flaggy.Bool(&c.KeepOldest, "ko", "keep-old", "Keep oldest files instead of newer")
flaggy.Bool(&c.Remove, "rm", "remove", "Remove matched files")

// Parse the flags
flaggy.Parse()

// https://github.com/atc0005/elbow/issues/2#issuecomment-524032239
//
// For flags, you can easily just check the value after calling
// flaggy.Parse(). If the value is set to something other than the
// default, then the caller supplied it. If it was the default value (set
// by you or the language), then it was not used.

return c

}
9 changes: 9 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module github.com/atc0005/elbow

go 1.12

require (
github.com/integrii/flaggy v1.2.2
github.com/r3labs/diff v0.0.0-20190801153147-a71de73c46ad
github.com/stretchr/testify v1.4.0 // indirect
)
15 changes: 15 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/integrii/flaggy v1.2.2 h1:SzL5kyEaW+Cb3RLxGG1ch9FFDLQPB6QuMdYoNu5JIo0=
github.com/integrii/flaggy v1.2.2/go.mod h1:tnTxHeTJbah0gQ6/K0RW0J7fMUBk9MCF5blhm43LNpI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/r3labs/diff v0.0.0-20190801153147-a71de73c46ad h1:j5pg/OewZJyE6i3hIG4v3eQUvUyFdQkC8Nd/mjaEkxE=
github.com/r3labs/diff v0.0.0-20190801153147-a71de73c46ad/go.mod h1:ozniNEFS3j1qCwHKdvraMn1WJOsUxHd7lYfukEIS4cs=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
113 changes: 113 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package main

import (
"fmt"
"log"
"os"

"github.com/integrii/flaggy"
)

func main() {

// DEBUG
// TODO: Enable this once leveled logging has been implemented.
//defaultConfig := NewConfig()
//fmt.Printf("Default configuration:\t%+v\n", defaultConfig)

appName := "Elbow"
appDesc := "Prune content matching specific patterns, either in a single directory or recursively through a directory tree."

config := NewConfig().SetupFlags(appName, appDesc)

// DEBUG
// TODO: Enable this once leveled logging has been implemented.
//fmt.Printf("Our configuration:\t%+v\n", config)

// DEBUG
log.Println("Confirm that requested path actually exists")
if !pathExists(config.StartPath) {
flaggy.ShowHelpAndExit(fmt.Sprintf("Error processing requested path: %q", config.StartPath))
}

// INFO
log.Println("Processing path:", config.StartPath)

matches, err := processPath(config)

// TODO
// How to handle errors from gathering removal candidates?
// Add optional flag to allow ignoring errors, fail immediately otherwise?
if err != nil {
log.Println("error:", err)
}

// NOTE: If this sort order changes, make sure to update the later logic
// which retains the top or bottom X items (specific flag to preserve X
// number of files while pruning the others)
matches.sortByModTimeAsc()

// DEBUG
log.Printf("Length of matches slice: %d\n", len(matches))

// DEBUG
log.Println("Early exit if no matching files were found.")
if len(matches) <= 0 {

// INFO
fmt.Printf("No matches found in path %q for files with substring pattern of %q and with extensions %v\n",
config.StartPath, config.FilePattern, config.FileExtensions)

// TODO: Not finding something is a valid outcome, so "normal" exit
// code?
os.Exit(0)
}

var filesToPrune FileMatches

// DEBUG
log.Printf("%d total items in matches", len(matches))
log.Printf("%d items to keep per config.FilesToKeep", config.FilesToKeep)

if config.KeepOldest {
// DEBUG
log.Println("Keeping older files")
log.Println("start at specified number to keep, go until end of slice")
filesToPrune = matches[config.FilesToKeep:]
} else {
// DEBUG
log.Println("Keeping newer files")
log.Println("start at beginning, go until specified number to keep")
filesToPrune = matches[:(len(matches) - config.FilesToKeep)]
}

// DEBUG, INFO?
log.Printf("%d items to prune", len(filesToPrune))

log.Println("Prune specified files, do NOT ignore errors")
// TODO: Add support for ignoring errors (though I cannot immediately
// think of a good reason to do so)
removalResults, err := cleanPath(filesToPrune, false, config)

// Show what we WERE able to successfully remove
// TODO: Refactor this into a function to handle displaying results?
log.Printf("%d files successfully removed\n", len(removalResults.SuccessfulRemovals))
log.Println("----------------------------")
for _, file := range removalResults.SuccessfulRemovals {
log.Println("*", file.Name())
}

log.Printf("%d files failed to remove\n", len(removalResults.FailedRemovals))
log.Println("----------------------------")
for _, file := range removalResults.FailedRemovals {
log.Println("*", file.Name())
}

// Determine if we need to display error, exit with unsuccessful error code
if err != nil {
log.Fatalf("Errors encountered while processing %s: %s", config.StartPath, err)
}

log.Printf("%s successfully completed.", appName)

}
26 changes: 26 additions & 0 deletions main_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package main

import "testing"

func TestMain(t *testing.T) {

defaultConfig := NewConfig()

var emptySlice = []string{}
var nilSlice []string

t.Logf("%v\n", emptySlice)
t.Log(len(emptySlice))
t.Log("emptySlice is nil:", emptySlice == nil)
t.Log("-------------------------")

t.Logf("%v\n", nilSlice)
t.Log(len(nilSlice))
t.Log("nilSlice is nil:", nilSlice == nil)
t.Log("-------------------------")

t.Logf("%v\n", defaultConfig.FileExtensions)
t.Log(len(defaultConfig.FileExtensions))
t.Log("defaultConfig.FileExtensions is nil:", defaultConfig.FileExtensions == nil)

}
94 changes: 94 additions & 0 deletions matches.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package main

import (
"log"
"os"
"path/filepath"
"sort"
"strings"
)

// FileMatch represents a superset of statistics (including os.FileInfo) for a
// file matched by provided search criteria. This allows us to record the
// original full path while also
type FileMatch struct {
os.FileInfo
Path string
}

// FileMatches is a slice of FileMatch objects
// TODO: Do I really need to abstract the fact that FileMatches is a slice of
// FileMatch objects? It seems that by hiding this it makes it harder to see
// that we're working with a slice?
type FileMatches []FileMatch

func hasValidExtension(filename string, config *Config) bool {

// NOTE: We do NOT compare extensions insensitively. We can add that
// functionality in the future if needed.
ext := filepath.Ext(filename)

if len(config.FileExtensions) == 0 {
// DEBUG
log.Println("No extension limits have been set!")
log.Printf("Considering %s safe for removal\n", filename)
return true
}

if inFileExtensionsPatterns(ext, config.FileExtensions) {
// DEBUG
log.Printf("%s has a valid extension for removal\n", filename)
return true
}

// DEBUG
log.Println("hasValidExtension: returning false for:", filename)
log.Printf("hasValidExtension: returning false (%q not in %q)",
ext, config.FileExtensions)
return false
}

func hasValidFilenamePattern(filename string, config *Config) bool {

if strings.TrimSpace(config.FilePattern) == "" {
// DEBUG
log.Println("No FilePattern has been specified!")
log.Printf("Considering %s safe for removal\n", filename)
return true
}

// Search for substring
if strings.Contains(filename, config.FilePattern) {
return true
}

// DEBUG
log.Println("hasValidFilenamePattern: returning false for:", filename)
log.Printf("hasValidFilenamePattern: returning false (%q does not contain %q)",
filename, config.FilePattern)
return false
}

// inFileExtensionsPatterns is a helper function to emulate Python's `if "x"
// in list:` functionality
func inFileExtensionsPatterns(ext string, exts []string) bool {
for _, pattern := range exts {
if ext == pattern {
return true
}
}
return false
}

// TODO: Two methods, or one method with a boolean flag determining behavior?
func (fm FileMatches) sortByModTimeAsc() {
sort.Slice(fm, func(i, j int) bool {
return fm[i].ModTime().Before(fm[j].ModTime())
})
}

func (fm FileMatches) sortByModTimeDesc() {
sort.Slice(fm, func(i, j int) bool {
return fm[i].ModTime().After(fm[j].ModTime())
})
}
Loading

0 comments on commit a0b86df

Please sign in to comment.