-
Notifications
You must be signed in to change notification settings - Fork 0
/
drsz.go
274 lines (227 loc) · 6.98 KB
/
drsz.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
package drsz
import (
"encoding/csv"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"sync"
"text/tabwriter"
"time"
"github.com/dustin/go-humanize"
"github.com/schollz/progressbar/v3"
)
// Dir holds information about a directory.
type Dir struct {
AbsPath string
SizeBytes int64
LastModified time.Time
}
// RootDir holds information about the top level directories it contains.
type RootDir struct {
Dir
TopDirs []*Dir
}
// SizeString returns the size of the directory as a human readable string.
func (d Dir) SizeString() string {
return humanize.Bytes(uint64(d.SizeBytes))
}
// Name returns the name of the directory.
func (d Dir) Name() string {
return path.Base(d.AbsPath)
}
// SetPath resolves an absolute path, confirms it is an accessible directory, and sets it in the struct.
func (d *Dir) SetPath(dirPath string) error {
abs, err := filepath.Abs(dirPath)
if err != nil {
return err
}
info, err := os.Stat(abs)
if err != nil {
return err
}
// no issues reading path, make sure it's a dir
if !info.IsDir() {
return fmt.Errorf("provided path is not a directory")
}
// path exists
d.AbsPath = abs
return nil
}
// WalkCalc recursively walks through the directory, calculating its total size and the most recent file modification time.
func (d *Dir) WalkCalc() error {
var size int64
var lastMod time.Time
err := filepath.Walk(d.AbsPath, func(_ string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.IsDir() {
// found a file
// add to total size
size += info.Size()
// set last modified time if it's more recent
mod := info.ModTime()
if mod.After(lastMod) {
lastMod = mod
}
}
return nil
})
if err != nil {
return fmt.Errorf("error while searching in %s: %v", d.AbsPath, err)
}
d.SizeBytes = size
d.LastModified = lastMod
return nil
}
// ExportCSV creates an output CSV file containing directory information at the provided path.
func (r RootDir) ExportCSV(csvPath string) error {
if !IsCsvPath(csvPath) {
return fmt.Errorf("provided filepath is not a CSV file")
}
csvFile, err := os.Create(csvPath)
if err != nil {
return fmt.Errorf("failed to create output file: %s: %v", csvPath, err)
}
csvWriter := csv.NewWriter(csvFile)
defer func() {
// write buff to file and close it before completion
csvWriter.Flush()
csvFile.Close()
}()
header := []string{"directory", "bytes", "lastModified"}
err = csvWriter.Write(header)
if err != nil {
return fmt.Errorf("failed to write header to file: %s: %v", csvPath, err)
}
for _, dir := range r.TopDirs {
row := []string{dir.AbsPath, fmt.Sprintf("%d", dir.SizeBytes), dir.LastModified.Local().String()}
err = csvWriter.Write(row)
if err != nil {
return fmt.Errorf("failed to write row %q to file: %s: %v", dir.AbsPath, csvPath, err)
}
}
fmt.Printf("Exported CSV file %s\n", csvPath)
return nil
}
// FindTops finds the top level directories within the provided root dir.
func (r *RootDir) FindTops() error {
contents, err := os.ReadDir(r.AbsPath)
if err != nil {
return err
}
var topDirs []*Dir
for _, item := range contents {
if item.IsDir() {
dirPath := path.Join(r.AbsPath, item.Name())
d, err := NewDir(dirPath)
if err != nil {
return err
}
topDirs = append(topDirs, d)
}
}
r.TopDirs = topDirs
fmt.Printf("Found %d top level directories in %s\n", len(r.TopDirs), r.AbsPath)
return nil
}
// CalcStats calculates the top level directory stats for the provided root dir by recursively walking through.
func (r *RootDir) CalcStats(concLimit uint8) error {
bar := progressbar.NewOptions64(int64(len(r.TopDirs)), progressbar.OptionSetDescription("Calculating..."), progressbar.OptionSetPredictTime(true), progressbar.OptionShowCount()) // setup progress bar based on number of dirs
var wg sync.WaitGroup // setup wait group for tracking dir calc worker progress
var mu sync.Mutex // setup mutex to protect errors slice
var errors []error // slice to hold any errors encountered
if concLimit == 0 {
concLimit = 1 // if concLimit is zero, only run goroutines one at a time
}
// Implement semaphore to limit concurrency
sem := make(chan struct{}, concLimit) // concLimit is the max number of concurrent goroutines
for _, d := range r.TopDirs {
wg.Add(1) // increment wait group
go func(d *Dir) {
defer wg.Done() // decrement wait group once work complete
sem <- struct{}{} // acquire a concurrency token when performing intensive i/o
err := d.WalkCalc()
if err != nil {
mu.Lock()
errors = append(errors, err) // collect error
mu.Unlock()
}
<-sem // release token
bar.Add(1) // increment progress bar
}(d)
}
wg.Wait() // wait for goroutines to finish
// print results using tabwriter
tw := tabwriter.NewWriter(os.Stdout, 0, 0, 5, ' ', 0)
// add blank row
fmt.Fprintln(tw, "")
// add header
fmt.Fprintf(tw, "Name\tSize\tLast_Modified\n")
// add info
for _, d := range r.TopDirs {
fmt.Fprintf(tw, "%s\t%s\t%s\n", d.Name(), d.SizeString(), d.LastModified.Local().String())
}
// print errors and their associated directories
if len(errors) != 0 {
fmt.Fprintln(tw, "")
fmt.Fprintln(tw, "***WARN*** Processing failed on the following directories ***WARN***")
for i, err := range errors {
fmt.Fprintf(tw, "%d: %v\n", i+1, err)
}
}
tw.Flush()
return nil
}
// IsCsvPath checks that the provided filepath is to a CSV.
func IsCsvPath(filepath string) bool {
clean := path.Clean(filepath)
ext := strings.ToLower(path.Ext(clean))
return ext == ".csv"
}
// NewRootDir returns a pointer to a new RootDir initialized with dirPath.
func NewRootDir(dirPath string) (*RootDir, error) {
r := &RootDir{}
err := r.SetPath(dirPath)
if err != nil {
return nil, err
}
return r, nil
}
// NewDir returns a pointer to a new Dir initialized with dirPath.
func NewDir(dirPath string) (*Dir, error) {
d := &Dir{}
err := d.SetPath(dirPath)
if err != nil {
return nil, err
}
return d, nil
}
// Run will execute a drsz search of the provided root dir; optionally creating an output file with the results.
func Run(rootDir string, concLimit uint8, createFile bool, outputFile string) error {
// initialize root directory
root, err := NewRootDir(rootDir)
if err != nil {
return err
}
// find the top-level dirs within root dir
err = root.FindTops()
if err != nil {
return err
}
// calculate stats for each top-level dir
err = root.CalcStats(concLimit)
if err != nil {
return err
}
// export CSV if requested
if createFile {
err = root.ExportCSV(outputFile)
if err != nil {
return err
}
}
return nil
}