From 1e0972c540a207a0462d39ab78f7edb799caac12 Mon Sep 17 00:00:00 2001 From: Aiden Scandella Date: Tue, 16 May 2017 14:40:07 -0700 Subject: [PATCH 1/5] Use fastwalk from goimports --- internal/gps/pkgtree/fastwalk.go | 187 ++++++++++++++++++++ internal/gps/pkgtree/fastwalk_dirent_ino.go | 13 ++ internal/gps/pkgtree/fastwalk_portable.go | 29 +++ internal/gps/pkgtree/fastwalk_unix.go | 122 +++++++++++++ internal/gps/pkgtree/pkgtree.go | 15 +- 5 files changed, 360 insertions(+), 6 deletions(-) create mode 100644 internal/gps/pkgtree/fastwalk.go create mode 100644 internal/gps/pkgtree/fastwalk_dirent_ino.go create mode 100644 internal/gps/pkgtree/fastwalk_portable.go create mode 100644 internal/gps/pkgtree/fastwalk_unix.go diff --git a/internal/gps/pkgtree/fastwalk.go b/internal/gps/pkgtree/fastwalk.go new file mode 100644 index 0000000000..af74c285ab --- /dev/null +++ b/internal/gps/pkgtree/fastwalk.go @@ -0,0 +1,187 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A faster implementation of filepath.Walk. +// +// filepath.Walk's design necessarily calls os.Lstat on each file, +// even if the caller needs less info. And goimports only need to know +// the type of each file. The kernel interface provides the type in +// the Readdir call but the standard library ignored it. +// fastwalk_unix.go contains a fork of the syscall routines. +// +// See golang.org/issue/16399 + +package pkgtree + +import ( + "errors" + "os" + "path/filepath" + "runtime" + "sync" +) + +// traverseLink is a sentinel error for fastWalk, similar to filepath.SkipDir. +var traverseLink = errors.New("traverse symlink, assuming target is a directory") + +// fastWalk walks the file tree rooted at root, calling walkFn for +// each file or directory in the tree, including root. +// +// If fastWalk returns filepath.SkipDir, the directory is skipped. +// +// Unlike filepath.Walk: +// * file stat calls must be done by the user. +// The only provided metadata is the file type, which does not include +// any permission bits. +// * multiple goroutines stat the filesystem concurrently. The provided +// walkFn must be safe for concurrent use. +// * fastWalk can follow symlinks if walkFn returns the traverseLink +// sentinel error. It is the walkFn's responsibility to prevent +// fastWalk from going into symlink cycles. +func fastWalk(root string, walkFn func(path string, typ os.FileMode) error) error { + // TODO(bradfitz): make numWorkers configurable? We used a + // minimum of 4 to give the kernel more info about multiple + // things we want, in hopes its I/O scheduling can take + // advantage of that. Hopefully most are in cache. Maybe 4 is + // even too low of a minimum. Profile more. + numWorkers := 4 + if n := runtime.NumCPU(); n > numWorkers { + numWorkers = n + } + + // Make sure to wait for all workers to finish, otherwise + // walkFn could still be called after returning. This Wait call + // runs after close(e.donec) below. + var wg sync.WaitGroup + defer wg.Wait() + + w := &walker{ + fn: walkFn, + enqueuec: make(chan walkItem, numWorkers), // buffered for performance + workc: make(chan walkItem, numWorkers), // buffered for performance + donec: make(chan struct{}), + + // buffered for correctness & not leaking goroutines: + resc: make(chan error, numWorkers), + } + defer close(w.donec) + + for i := 0; i < numWorkers; i++ { + wg.Add(1) + go w.doWork(&wg) + } + todo := []walkItem{{dir: root}} + out := 0 + for { + workc := w.workc + var workItem walkItem + if len(todo) == 0 { + workc = nil + } else { + workItem = todo[len(todo)-1] + } + select { + case workc <- workItem: + todo = todo[:len(todo)-1] + out++ + case it := <-w.enqueuec: + todo = append(todo, it) + case err := <-w.resc: + out-- + if err != nil { + return err + } + if out == 0 && len(todo) == 0 { + // It's safe to quit here, as long as the buffered + // enqueue channel isn't also readable, which might + // happen if the worker sends both another unit of + // work and its result before the other select was + // scheduled and both w.resc and w.enqueuec were + // readable. + select { + case it := <-w.enqueuec: + todo = append(todo, it) + default: + return nil + } + } + } + } +} + +// doWork reads directories as instructed (via workc) and runs the +// user's callback function. +func (w *walker) doWork(wg *sync.WaitGroup) { + defer wg.Done() + for { + select { + case <-w.donec: + return + case it := <-w.workc: + select { + case <-w.donec: + return + case w.resc <- w.walk(it.dir, !it.callbackDone): + } + } + } +} + +type walker struct { + fn func(path string, typ os.FileMode) error + + donec chan struct{} // closed on fastWalk's return + workc chan walkItem // to workers + enqueuec chan walkItem // from workers + resc chan error // from workers +} + +type walkItem struct { + dir string + callbackDone bool // callback already called; don't do it again +} + +func (w *walker) enqueue(it walkItem) { + select { + case w.enqueuec <- it: + case <-w.donec: + } +} + +func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error { + joined := dirName + string(os.PathSeparator) + baseName + if typ == os.ModeDir { + w.enqueue(walkItem{dir: joined}) + return nil + } + + err := w.fn(joined, typ) + if typ == os.ModeSymlink { + if err == traverseLink { + // Set callbackDone so we don't call it twice for both the + // symlink-as-symlink and the symlink-as-directory later: + w.enqueue(walkItem{dir: joined, callbackDone: true}) + return nil + } + if err == filepath.SkipDir { + // Permit SkipDir on symlinks too. + return nil + } + } + return err +} + +func (w *walker) walk(root string, runUserCallback bool) error { + if runUserCallback { + err := w.fn(root, os.ModeDir) + if err == filepath.SkipDir { + return nil + } + if err != nil { + return err + } + } + + return readDir(root, w.onDirEnt) +} diff --git a/internal/gps/pkgtree/fastwalk_dirent_ino.go b/internal/gps/pkgtree/fastwalk_dirent_ino.go new file mode 100644 index 0000000000..b2c775abac --- /dev/null +++ b/internal/gps/pkgtree/fastwalk_dirent_ino.go @@ -0,0 +1,13 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux,!appengine darwin + +package pkgtree + +import "syscall" + +func direntInode(dirent *syscall.Dirent) uint64 { + return uint64(dirent.Ino) +} diff --git a/internal/gps/pkgtree/fastwalk_portable.go b/internal/gps/pkgtree/fastwalk_portable.go new file mode 100644 index 0000000000..7dce608809 --- /dev/null +++ b/internal/gps/pkgtree/fastwalk_portable.go @@ -0,0 +1,29 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build appengine !linux,!darwin,!freebsd,!openbsd,!netbsd + +package pkgtree + +import ( + "io/ioutil" + "os" +) + +// readDir calls fn for each directory entry in dirName. +// It does not descend into directories or follow symlinks. +// If fn returns a non-nil error, readDir returns with that error +// immediately. +func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { + fis, err := ioutil.ReadDir(dirName) + if err != nil { + return err + } + for _, fi := range fis { + if err := fn(dirName, fi.Name(), fi.Mode()&os.ModeType); err != nil { + return err + } + } + return nil +} diff --git a/internal/gps/pkgtree/fastwalk_unix.go b/internal/gps/pkgtree/fastwalk_unix.go new file mode 100644 index 0000000000..fee5fea877 --- /dev/null +++ b/internal/gps/pkgtree/fastwalk_unix.go @@ -0,0 +1,122 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux,!appengine darwin freebsd openbsd netbsd + +package pkgtree + +import ( + "bytes" + "fmt" + "os" + "syscall" + "unsafe" +) + +const blockSize = 8 << 10 + +// unknownFileMode is a sentinel (and bogus) os.FileMode +// value used to represent a syscall.DT_UNKNOWN Dirent.Type. +const unknownFileMode os.FileMode = os.ModeNamedPipe | os.ModeSocket | os.ModeDevice + +func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { + fd, err := syscall.Open(dirName, 0, 0) + if err != nil { + return err + } + defer syscall.Close(fd) + + // The buffer must be at least a block long. + buf := make([]byte, blockSize) // stack-allocated; doesn't escape + bufp := 0 // starting read position in buf + nbuf := 0 // end valid data in buf + for { + if bufp >= nbuf { + bufp = 0 + nbuf, err = syscall.ReadDirent(fd, buf) + if err != nil { + return os.NewSyscallError("readdirent", err) + } + if nbuf <= 0 { + return nil + } + } + consumed, name, typ := parseDirEnt(buf[bufp:nbuf]) + bufp += consumed + if name == "" || name == "." || name == ".." { + continue + } + // Fallback for filesystems (like old XFS) that don't + // support Dirent.Type and have DT_UNKNOWN (0) there + // instead. + if typ == unknownFileMode { + fi, err := os.Lstat(dirName + "/" + name) + if err != nil { + // It got deleted in the meantime. + if os.IsNotExist(err) { + continue + } + return err + } + typ = fi.Mode() & os.ModeType + } + if err := fn(dirName, name, typ); err != nil { + return err + } + } +} + +func parseDirEnt(buf []byte) (consumed int, name string, typ os.FileMode) { + // golang.org/issue/15653 + dirent := (*syscall.Dirent)(unsafe.Pointer(&buf[0])) + if v := unsafe.Offsetof(dirent.Reclen) + unsafe.Sizeof(dirent.Reclen); uintptr(len(buf)) < v { + panic(fmt.Sprintf("buf size of %d smaller than dirent header size %d", len(buf), v)) + } + if len(buf) < int(dirent.Reclen) { + panic(fmt.Sprintf("buf size %d < record length %d", len(buf), dirent.Reclen)) + } + consumed = int(dirent.Reclen) + if direntInode(dirent) == 0 { // File absent in directory. + return + } + switch dirent.Type { + case syscall.DT_REG: + typ = 0 + case syscall.DT_DIR: + typ = os.ModeDir + case syscall.DT_LNK: + typ = os.ModeSymlink + case syscall.DT_BLK: + typ = os.ModeDevice + case syscall.DT_FIFO: + typ = os.ModeNamedPipe + case syscall.DT_SOCK: + typ = os.ModeSocket + case syscall.DT_UNKNOWN: + typ = unknownFileMode + default: + // Skip weird things. + // It's probably a DT_WHT (http://lwn.net/Articles/325369/) + // or something. Revisit if/when this package is moved outside + // of goimports. goimports only cares about regular files, + // symlinks, and directories. + return + } + + nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0])) + nameLen := bytes.IndexByte(nameBuf[:], 0) + if nameLen < 0 { + panic("failed to find terminating 0 byte in dirent") + } + + // Special cases for common things: + if nameLen == 1 && nameBuf[0] == '.' { + name = "." + } else if nameLen == 2 && nameBuf[0] == '.' && nameBuf[1] == '.' { + name = ".." + } else { + name = string(nameBuf[:nameLen]) + } + return +} diff --git a/internal/gps/pkgtree/pkgtree.go b/internal/gps/pkgtree/pkgtree.go index f14598c286..c0de3c4641 100644 --- a/internal/gps/pkgtree/pkgtree.go +++ b/internal/gps/pkgtree/pkgtree.go @@ -15,6 +15,7 @@ import ( "sort" "strconv" "strings" + "sync" "unicode" ) @@ -64,6 +65,7 @@ func ListPackages(fileRoot, importRoot string) (PackageTree, error) { ImportRoot: importRoot, Packages: make(map[string]PackageOrErr), } + mu := &sync.Mutex{} var err error fileRoot, err = filepath.Abs(fileRoot) @@ -71,10 +73,7 @@ func ListPackages(fileRoot, importRoot string) (PackageTree, error) { return PackageTree{}, err } - err = filepath.Walk(fileRoot, func(wp string, fi os.FileInfo, err error) error { - if err != nil && err != filepath.SkipDir { - return err - } + err = fastWalk(fileRoot, func(wp string, fi os.FileMode) error { if !fi.IsDir() { return nil } @@ -83,7 +82,7 @@ func ListPackages(fileRoot, importRoot string) (PackageTree, error) { // // We don't skip _*, or testdata dirs because, while it may be poor // form, importing them is not a compilation error. - switch fi.Name() { + switch wp { case "vendor", "Godeps": return filepath.SkipDir } @@ -93,7 +92,7 @@ func ListPackages(fileRoot, importRoot string) (PackageTree, error) { // Note that there are some pathological edge cases this doesn't cover, // such as a user using Git for version control, but having a package // named "svn" in a directory named ".svn". - if _, ok := vcsRoots[fi.Name()]; ok { + if _, ok := vcsRoots[wp]; ok { return filepath.SkipDir } @@ -143,9 +142,11 @@ func ListPackages(fileRoot, importRoot string) (PackageTree, error) { case gscan.ErrorList, *gscan.Error, *build.NoGoError: // This happens if we encounter malformed or nonexistent Go // source code + mu.Lock() ptree.Packages[ip] = PackageOrErr{ Err: err, } + mu.Unlock() return nil default: return err @@ -168,6 +169,7 @@ func ListPackages(fileRoot, importRoot string) (PackageTree, error) { } } + mu.Lock() if len(lim) > 0 { ptree.Packages[ip] = PackageOrErr{ Err: &LocalImportsError{ @@ -181,6 +183,7 @@ func ListPackages(fileRoot, importRoot string) (PackageTree, error) { P: pkg, } } + mu.Unlock() return nil }) From 8c886c50af6229497dd20f6ff8ce2204c5d1a7f9 Mon Sep 17 00:00:00 2001 From: Aiden Scandella Date: Tue, 16 May 2017 16:20:53 -0700 Subject: [PATCH 2/5] Fix race by introducing local err --- internal/gps/pkgtree/pkgtree.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/gps/pkgtree/pkgtree.go b/internal/gps/pkgtree/pkgtree.go index c0de3c4641..2360483027 100644 --- a/internal/gps/pkgtree/pkgtree.go +++ b/internal/gps/pkgtree/pkgtree.go @@ -107,6 +107,7 @@ func ListPackages(fileRoot, importRoot string) (PackageTree, error) { // then will filepath.Walk have attempted to descend into the directory // and encountered an error. var f *os.File + var err error f, err = os.Open(wp) if err != nil { if os.IsPermission(err) { From 07cd3a2e9dd2ac7e708770d129a42ab58825ce4e Mon Sep 17 00:00:00 2001 From: Aiden Scandella Date: Tue, 16 May 2017 16:25:04 -0700 Subject: [PATCH 3/5] Style updates per dcheney --- internal/gps/pkgtree/pkgtree.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/gps/pkgtree/pkgtree.go b/internal/gps/pkgtree/pkgtree.go index 2360483027..6252c5df66 100644 --- a/internal/gps/pkgtree/pkgtree.go +++ b/internal/gps/pkgtree/pkgtree.go @@ -65,7 +65,7 @@ func ListPackages(fileRoot, importRoot string) (PackageTree, error) { ImportRoot: importRoot, Packages: make(map[string]PackageOrErr), } - mu := &sync.Mutex{} + var mu sync.Mutex var err error fileRoot, err = filepath.Abs(fileRoot) From 8b92f980c0570d7fc62c0893dfc666da7b6ba083 Mon Sep 17 00:00:00 2001 From: Aiden Scandella Date: Tue, 16 May 2017 16:43:38 -0700 Subject: [PATCH 4/5] Add benchmark --- .../gps/pkgtree/pkgtree_benchmark_test.go | 28 +++++++++++++++++++ internal/gps/pkgtree/pkgtree_test.go | 10 +++++-- 2 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 internal/gps/pkgtree/pkgtree_benchmark_test.go diff --git a/internal/gps/pkgtree/pkgtree_benchmark_test.go b/internal/gps/pkgtree/pkgtree_benchmark_test.go new file mode 100644 index 0000000000..655107d45c --- /dev/null +++ b/internal/gps/pkgtree/pkgtree_benchmark_test.go @@ -0,0 +1,28 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +package pkgtree + +import "testing" + +func BenchmarkListPackages(b *testing.B) { + b.StopTimer() + + j := func(s ...string) string { + return testDir(b, s...) + } + + table := []string{ + "dotgodir", + "buildtag", + "varied", + } + + b.StartTimer() + + for _, name := range table { + for n := 0; n < b.N; n++ { + ListPackages(j(name), name) + } + } +} diff --git a/internal/gps/pkgtree/pkgtree_test.go b/internal/gps/pkgtree/pkgtree_test.go index ab134f02e7..089dccc627 100644 --- a/internal/gps/pkgtree/pkgtree_test.go +++ b/internal/gps/pkgtree/pkgtree_test.go @@ -472,10 +472,14 @@ func TestListPackagesNoDir(t *testing.T) { } } -func TestListPackages(t *testing.T) { +func testDir(t testing.TB, s ...string) string { srcdir := filepath.Join(getTestdataRootDir(t), "src") + return filepath.Join(srcdir, filepath.Join(s...)) +} + +func TestListPackages(t *testing.T) { j := func(s ...string) string { - return filepath.Join(srcdir, filepath.Join(s...)) + return testDir(t, s...) } table := map[string]struct { @@ -1976,7 +1980,7 @@ func TestToReachMapFilterDot(t *testing.T) { } } -func getTestdataRootDir(t *testing.T) string { +func getTestdataRootDir(t testing.TB) string { cwd, err := os.Getwd() if err != nil { t.Fatal(err) From bcf8f9bc363fabefa5c8bbce6c0bfdcce0529fa1 Mon Sep 17 00:00:00 2001 From: Aiden Scandella Date: Tue, 16 May 2017 16:53:05 -0700 Subject: [PATCH 5/5] Update benchmark --- .../gps/pkgtree/pkgtree_benchmark_test.go | 25 ++++++++++--------- internal/gps/pkgtree/pkgtree_test.go | 10 +++----- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/internal/gps/pkgtree/pkgtree_benchmark_test.go b/internal/gps/pkgtree/pkgtree_benchmark_test.go index 655107d45c..eb9d8f8672 100644 --- a/internal/gps/pkgtree/pkgtree_benchmark_test.go +++ b/internal/gps/pkgtree/pkgtree_benchmark_test.go @@ -3,26 +3,27 @@ // license that can be found in the LICENSE file. package pkgtree -import "testing" +import ( + "os" + "path/filepath" + "testing" +) func BenchmarkListPackages(b *testing.B) { b.StopTimer() - j := func(s ...string) string { - return testDir(b, s...) - } - - table := []string{ - "dotgodir", - "buildtag", - "varied", + cwd, err := os.Getwd() + if err != nil { + b.Fatal(err) } + root := filepath.Join(cwd, "..", "..", "..") b.StartTimer() - for _, name := range table { - for n := 0; n < b.N; n++ { - ListPackages(j(name), name) + for n := 0; n < b.N; n++ { + _, err := ListPackages(root, "github.com/golang/dep") + if err != nil { + b.Error(err) } } } diff --git a/internal/gps/pkgtree/pkgtree_test.go b/internal/gps/pkgtree/pkgtree_test.go index 089dccc627..ab134f02e7 100644 --- a/internal/gps/pkgtree/pkgtree_test.go +++ b/internal/gps/pkgtree/pkgtree_test.go @@ -472,14 +472,10 @@ func TestListPackagesNoDir(t *testing.T) { } } -func testDir(t testing.TB, s ...string) string { - srcdir := filepath.Join(getTestdataRootDir(t), "src") - return filepath.Join(srcdir, filepath.Join(s...)) -} - func TestListPackages(t *testing.T) { + srcdir := filepath.Join(getTestdataRootDir(t), "src") j := func(s ...string) string { - return testDir(t, s...) + return filepath.Join(srcdir, filepath.Join(s...)) } table := map[string]struct { @@ -1980,7 +1976,7 @@ func TestToReachMapFilterDot(t *testing.T) { } } -func getTestdataRootDir(t testing.TB) string { +func getTestdataRootDir(t *testing.T) string { cwd, err := os.Getwd() if err != nil { t.Fatal(err)