pax_global_header00006660000000000000000000000064140155144220014510gustar00rootroot0000000000000052 comment=2c746f29c263bb7437b6b4870e577335eb4871e1 walker-0.1.2/000077500000000000000000000000001401551442200127755ustar00rootroot00000000000000walker-0.1.2/LICENSE000066400000000000000000000020541401551442200140030ustar00rootroot00000000000000MIT License Copyright (c) 2019 Arran Walker Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.walker-0.1.2/README.md000066400000000000000000000125471401551442200142650ustar00rootroot00000000000000# walker [![](https://godoc.org/github.com/saracen/walker?status.svg)](http://godoc.org/github.com/saracen/walker) `walker` is a faster, parallel version, of `filepath.Walk`. ```go // walk function called for every path found walkFn := func(pathname string, fi os.FileInfo) error { fmt.Printf("%s: %d bytes\n", pathname, fi.Size()) return nil } // error function called for every error encountered errorCallbackOption := walker.WithErrorCallback(func(pathname string, err error) error { // ignore permissione errors if os.IsPermission(err) { return nil } // halt traversal on any other error return err }) walker.Walk("/tmp", walkFn, errorCallbackOption) ``` ## Benchmarks - Standard library (`filepath.Walk`) is `FilepathWalk`. - This library is `WalkerWalk` - `FastwalkWalk` is [fastwalk](https://github.com/golang/tools/tree/master/internal/fastwalk). - `GodirwalkWalk` is [godirwalk](https://github.com/karrick/godirwalk). This library and `filepath.Walk` both perform `os.Lstat` calls and provide a full `os.FileInfo` structure to the callback. `BenchmarkFastwalkWalkLstat` and `BenchmarkGodirwalkWalkLstat` include this stat call for better comparison with `BenchmarkFilepathWalk` and `BenchmarkWalkerWalk`. This library and `fastwalk` both require the callback to be safe for concurrent use. `BenchmarkFilepathWalkAppend`, `BenchmarkWalkerWalkAppend`, `BenchmarkFastwalkWalkAppend` and `BenchmarkGodirwalkWalkAppend` append the paths found to a string slice. The callback, for the libraries that require it, use a mutex, for better comparison with the libraries that require no locking. This library will not always be the best/fastest option. In general, if you're on Windows, or performing `lstat` calls, it does a pretty decent job. If you're not, I've found `fastwalk` to perform better on machines with fewer cores. These benchmarks were performed with a warm cache. ``` goos: linux goarch: amd64 pkg: github.com/saracen/walker BenchmarkFilepathWalk-16 1 1437919955 ns/op 340100304 B/op 775525 allocs/op BenchmarkFilepathWalkAppend-16 1 1226169600 ns/op 351722832 B/op 775556 allocs/op BenchmarkWalkerWalk-16 8 133364860 ns/op 92611308 B/op 734674 allocs/op BenchmarkWalkerWalkAppend-16 7 166917499 ns/op 104231474 B/op 734693 allocs/op BenchmarkFastwalkWalk-16 6 241763690 ns/op 25257176 B/op 309423 allocs/op BenchmarkFastwalkWalkAppend-16 4 285673715 ns/op 36898800 B/op 309456 allocs/op BenchmarkFastwalkWalkLstat-16 6 176641625 ns/op 73769765 B/op 592980 allocs/op BenchmarkGodirwalkWalk-16 2 714625929 ns/op 145340576 B/op 723225 allocs/op BenchmarkGodirwalkWalkAppend-16 2 597653802 ns/op 156963288 B/op 723256 allocs/op BenchmarkGodirwalkWalkLstat-16 1 1186956102 ns/op 193724464 B/op 1006727 allocs/op ``` ``` goos: windows goarch: amd64 pkg: github.com/saracen/walker BenchmarkFilepathWalk-16 1 1268606000 ns/op 101248040 B/op 650718 allocs/op BenchmarkFilepathWalkAppend-16 1 1276617400 ns/op 107079288 B/op 650744 allocs/op BenchmarkWalkerWalk-16 12 98901983 ns/op 52393125 B/op 382836 allocs/op BenchmarkWalkerWalkAppend-16 12 99733117 ns/op 58220869 B/op 382853 allocs/op BenchmarkFastwalkWalk-16 10 109107980 ns/op 53032702 B/op 401320 allocs/op BenchmarkFastwalkWalkAppend-16 10 107512330 ns/op 58853827 B/op 401336 allocs/op BenchmarkFastwalkWalkLstat-16 3 379318333 ns/op 100606232 B/op 653931 allocs/op BenchmarkGodirwalkWalk-16 3 466418533 ns/op 42955197 B/op 579974 allocs/op BenchmarkGodirwalkWalkAppend-16 3 476391833 ns/op 48786530 B/op 580002 allocs/op BenchmarkGodirwalkWalkLstat-16 1 1250652800 ns/op 90536184 B/op 832562 allocs/op ``` Performing benchmarks without having the OS cache the directory information isn't straight forward, but to get a sense of the performance, we can flush the cache and roughly time how long it took to walk a directory: #### filepath.Walk ``` $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -run TestFilepathWalkDir -benchdir $GOPATH ok github.com/saracen/walker 3.846s ``` #### walker ``` $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -run TestWalkerWalkDir -benchdir $GOPATH ok github.com/saracen/walker 0.353s ``` #### fastwalk ``` $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -run TestFastwalkWalkDir -benchdir $GOPATH ok github.com/saracen/walker 0.306s ``` #### fastwalk (lstat) ``` $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -run TestFastwalkWalkLstatDir -benchdir $GOPATH ok github.com/saracen/walker 0.339s ``` #### godirwalk ``` $ sudo su -c 'sync; echo 3 > /proc/sys/vm/drop_caches'; go test -run TestGodirwalkWalkDir -benchdir $GOPATH ok github.com/saracen/walker 3.208s ``` walker-0.1.2/go.mod000066400000000000000000000001501401551442200140770ustar00rootroot00000000000000module github.com/saracen/walker go 1.12 require golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a walker-0.1.2/go.sum000066400000000000000000000003211401551442200141240ustar00rootroot00000000000000golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a h1:WXEvlFVvvGxCJLG6REjsT03iWnKLEWinaScsxF2Vm2o= golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= walker-0.1.2/testdata/000077500000000000000000000000001401551442200146065ustar00rootroot00000000000000walker-0.1.2/testdata/fastwalk/000077500000000000000000000000001401551442200164225ustar00rootroot00000000000000walker-0.1.2/testdata/fastwalk/fastwalk.go000066400000000000000000000125471401551442200205760ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package fastwalk provides a faster version of filepath.Walk for file system // scanning tools. package fastwalk import ( "errors" "os" "path/filepath" "runtime" "sync" ) // ErrTraverseLink is used as a return value from WalkFuncs to indicate that the // symlink named in the call may be traversed. var ErrTraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory") // ErrSkipFiles is a used as a return value from WalkFuncs to indicate that the // callback should not be called for any other files in the current directory. // Child directories will still be traversed. var ErrSkipFiles = errors.New("fastwalk: skip remaining files in directory") // Walk is a faster implementation of filepath.Walk. // // filepath.Walk's design necessarily calls os.Lstat on each file, // even if the caller needs less info. // Many tools need only the type of each file. // On some platforms, this information is provided directly by the readdir // system call, avoiding the need to stat each file individually. // fastwalk_unix.go contains a fork of the syscall routines. // // See golang.org/issue/16399 // // Walk walks the file tree rooted at root, calling walkFn for // each file or directory in the tree, including root. // // If fastWalk returns filepath.SkipDir, the directory is skipped. // // Unlike filepath.Walk: // * file stat calls must be done by the user. // The only provided metadata is the file type, which does not include // any permission bits. // * multiple goroutines stat the filesystem concurrently. The provided // walkFn must be safe for concurrent use. // * fastWalk can follow symlinks if walkFn returns the TraverseLink // sentinel error. It is the walkFn's responsibility to prevent // fastWalk from going into symlink cycles. func Walk(root string, walkFn func(path string, typ os.FileMode) error) error { // TODO(bradfitz): make numWorkers configurable? We used a // minimum of 4 to give the kernel more info about multiple // things we want, in hopes its I/O scheduling can take // advantage of that. Hopefully most are in cache. Maybe 4 is // even too low of a minimum. Profile more. numWorkers := 4 if n := runtime.NumCPU(); n > numWorkers { numWorkers = n } // Make sure to wait for all workers to finish, otherwise // walkFn could still be called after returning. This Wait call // runs after close(e.donec) below. var wg sync.WaitGroup defer wg.Wait() w := &walker{ fn: walkFn, enqueuec: make(chan walkItem, numWorkers), // buffered for performance workc: make(chan walkItem, numWorkers), // buffered for performance donec: make(chan struct{}), // buffered for correctness & not leaking goroutines: resc: make(chan error, numWorkers), } defer close(w.donec) for i := 0; i < numWorkers; i++ { wg.Add(1) go w.doWork(&wg) } todo := []walkItem{{dir: root}} out := 0 for { workc := w.workc var workItem walkItem if len(todo) == 0 { workc = nil } else { workItem = todo[len(todo)-1] } select { case workc <- workItem: todo = todo[:len(todo)-1] out++ case it := <-w.enqueuec: todo = append(todo, it) case err := <-w.resc: out-- if err != nil { return err } if out == 0 && len(todo) == 0 { // It's safe to quit here, as long as the buffered // enqueue channel isn't also readable, which might // happen if the worker sends both another unit of // work and its result before the other select was // scheduled and both w.resc and w.enqueuec were // readable. select { case it := <-w.enqueuec: todo = append(todo, it) default: return nil } } } } } // doWork reads directories as instructed (via workc) and runs the // user's callback function. func (w *walker) doWork(wg *sync.WaitGroup) { defer wg.Done() for { select { case <-w.donec: return case it := <-w.workc: select { case <-w.donec: return case w.resc <- w.walk(it.dir, !it.callbackDone): } } } } type walker struct { fn func(path string, typ os.FileMode) error donec chan struct{} // closed on fastWalk's return workc chan walkItem // to workers enqueuec chan walkItem // from workers resc chan error // from workers } type walkItem struct { dir string callbackDone bool // callback already called; don't do it again } func (w *walker) enqueue(it walkItem) { select { case w.enqueuec <- it: case <-w.donec: } } func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error { joined := dirName + string(os.PathSeparator) + baseName if typ == os.ModeDir { w.enqueue(walkItem{dir: joined}) return nil } err := w.fn(joined, typ) if typ == os.ModeSymlink { if err == ErrTraverseLink { // Set callbackDone so we don't call it twice for both the // symlink-as-symlink and the symlink-as-directory later: w.enqueue(walkItem{dir: joined, callbackDone: true}) return nil } if err == filepath.SkipDir { // Permit SkipDir on symlinks too. return nil } } return err } func (w *walker) walk(root string, runUserCallback bool) error { if runUserCallback { err := w.fn(root, os.ModeDir) if err == filepath.SkipDir { return nil } if err != nil { return err } } return readDir(root, w.onDirEnt) } walker-0.1.2/testdata/fastwalk/fastwalk_dirent_fileno.go000066400000000000000000000005401401551442200234650ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build freebsd || openbsd || netbsd // +build freebsd openbsd netbsd package fastwalk import "syscall" func direntInode(dirent *syscall.Dirent) uint64 { return uint64(dirent.Fileno) } walker-0.1.2/testdata/fastwalk/fastwalk_dirent_ino.go000066400000000000000000000005531401551442200230020ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build (linux || darwin) && !appengine // +build linux darwin // +build !appengine package fastwalk import "syscall" func direntInode(dirent *syscall.Dirent) uint64 { return uint64(dirent.Ino) } walker-0.1.2/testdata/fastwalk/fastwalk_dirent_namlen_bsd.go000066400000000000000000000005621401551442200243170ustar00rootroot00000000000000// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build darwin || freebsd || openbsd || netbsd // +build darwin freebsd openbsd netbsd package fastwalk import "syscall" func direntNamlen(dirent *syscall.Dirent) uint64 { return uint64(dirent.Namlen) } walker-0.1.2/testdata/fastwalk/fastwalk_dirent_namlen_linux.go000066400000000000000000000013571401551442200247110ustar00rootroot00000000000000// Copyright 2018 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build linux && !appengine // +build linux,!appengine package fastwalk import ( "bytes" "syscall" "unsafe" ) func direntNamlen(dirent *syscall.Dirent) uint64 { const fixedHdr = uint16(unsafe.Offsetof(syscall.Dirent{}.Name)) nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0])) const nameBufLen = uint16(len(nameBuf)) limit := dirent.Reclen - fixedHdr if limit > nameBufLen { limit = nameBufLen } nameLen := bytes.IndexByte(nameBuf[:limit], 0) if nameLen < 0 { panic("failed to find terminating 0 byte in dirent") } return uint64(nameLen) } walker-0.1.2/testdata/fastwalk/fastwalk_portable.go000066400000000000000000000017371401551442200224650ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build appengine || (!linux && !darwin && !freebsd && !openbsd && !netbsd) // +build appengine !linux,!darwin,!freebsd,!openbsd,!netbsd package fastwalk import ( "io/ioutil" "os" ) // readDir calls fn for each directory entry in dirName. // It does not descend into directories or follow symlinks. // If fn returns a non-nil error, readDir returns with that error // immediately. func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { fis, err := ioutil.ReadDir(dirName) if err != nil { return err } skipFiles := false for _, fi := range fis { if fi.Mode().IsRegular() && skipFiles { continue } if err := fn(dirName, fi.Name(), fi.Mode()&os.ModeType); err != nil { if err == ErrSkipFiles { skipFiles = true continue } return err } } return nil } walker-0.1.2/testdata/fastwalk/fastwalk_test.go000066400000000000000000000147411401551442200216330ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package fastwalk_test import ( "bytes" "flag" "fmt" "io/ioutil" "os" "path/filepath" "reflect" "runtime" "sort" "strings" "sync" "testing" "github.com/saracen/walker/testdata/fastwalk" ) func formatFileModes(m map[string]os.FileMode) string { var keys []string for k := range m { keys = append(keys, k) } sort.Strings(keys) var buf bytes.Buffer for _, k := range keys { fmt.Fprintf(&buf, "%-20s: %v\n", k, m[k]) } return buf.String() } func testFastWalk(t *testing.T, files map[string]string, callback func(path string, typ os.FileMode) error, want map[string]os.FileMode) { tempdir, err := ioutil.TempDir("", "test-fast-walk") if err != nil { t.Fatal(err) } defer os.RemoveAll(tempdir) symlinks := map[string]string{} for path, contents := range files { file := filepath.Join(tempdir, "/src", path) if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil { t.Fatal(err) } var err error if strings.HasPrefix(contents, "LINK:") { symlinks[file] = filepath.FromSlash(strings.TrimPrefix(contents, "LINK:")) } else { err = ioutil.WriteFile(file, []byte(contents), 0644) } if err != nil { t.Fatal(err) } } // Create symlinks after all other files. Otherwise, directory symlinks on // Windows are unusable (see https://golang.org/issue/39183). for file, dst := range symlinks { err = os.Symlink(dst, file) if err != nil { if writeErr := ioutil.WriteFile(file, []byte(dst), 0644); writeErr == nil { // Couldn't create symlink, but could write the file. // Probably this filesystem doesn't support symlinks. // (Perhaps we are on an older Windows and not running as administrator.) t.Skipf("skipping because symlinks appear to be unsupported: %v", err) } } } got := map[string]os.FileMode{} var mu sync.Mutex err = fastwalk.Walk(tempdir, func(path string, typ os.FileMode) error { mu.Lock() defer mu.Unlock() if !strings.HasPrefix(path, tempdir) { t.Errorf("bogus prefix on %q, expect %q", path, tempdir) } key := filepath.ToSlash(strings.TrimPrefix(path, tempdir)) if old, dup := got[key]; dup { t.Errorf("callback called twice for key %q: %v -> %v", key, old, typ) } got[key] = typ return callback(path, typ) }) if err != nil { t.Fatalf("callback returned: %v", err) } if !reflect.DeepEqual(got, want) { t.Errorf("walk mismatch.\n got:\n%v\nwant:\n%v", formatFileModes(got), formatFileModes(want)) } } func TestFastWalk_Basic(t *testing.T) { testFastWalk(t, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "two", "skip/skip.go": "skip", }, func(path string, typ os.FileMode) error { return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/skip": os.ModeDir, "/src/skip/skip.go": 0, }) } func TestFastWalk_LongFileName(t *testing.T) { longFileName := strings.Repeat("x", 255) testFastWalk(t, map[string]string{ longFileName: "one", }, func(path string, typ os.FileMode) error { return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/" + longFileName: 0, }, ) } func TestFastWalk_Symlink(t *testing.T) { testFastWalk(t, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "LINK:../foo/foo.go", "symdir": "LINK:foo", "broken/broken.go": "LINK:../nonexistent", }, func(path string, typ os.FileMode) error { return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": os.ModeSymlink, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/symdir": os.ModeSymlink, "/src/broken": os.ModeDir, "/src/broken/broken.go": os.ModeSymlink, }) } func TestFastWalk_SkipDir(t *testing.T) { testFastWalk(t, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "two", "skip/skip.go": "skip", }, func(path string, typ os.FileMode) error { if typ == os.ModeDir && strings.HasSuffix(path, "skip") { return filepath.SkipDir } return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/skip": os.ModeDir, }) } func TestFastWalk_SkipFiles(t *testing.T) { // Directory iteration order is undefined, so there's no way to know // which file to expect until the walk happens. Rather than mess // with the test infrastructure, just mutate want. var mu sync.Mutex want := map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/zzz": os.ModeDir, "/src/zzz/c.go": 0, } testFastWalk(t, map[string]string{ "a_skipfiles.go": "a", "b_skipfiles.go": "b", "zzz/c.go": "c", }, func(path string, typ os.FileMode) error { if strings.HasSuffix(path, "_skipfiles.go") { mu.Lock() defer mu.Unlock() want["/src/"+filepath.Base(path)] = 0 return fastwalk.ErrSkipFiles } return nil }, want) if len(want) != 5 { t.Errorf("saw too many files: wanted 5, got %v (%v)", len(want), want) } } func TestFastWalk_TraverseSymlink(t *testing.T) { testFastWalk(t, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "two", "skip/skip.go": "skip", "symdir": "LINK:foo", }, func(path string, typ os.FileMode) error { if typ == os.ModeSymlink { return fastwalk.ErrTraverseLink } return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/skip": os.ModeDir, "/src/skip/skip.go": 0, "/src/symdir": os.ModeSymlink, "/src/symdir/foo.go": 0, }) } var benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan for BenchmarkFastWalk") func BenchmarkFastWalk(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { err := fastwalk.Walk(*benchDir, func(path string, typ os.FileMode) error { return nil }) if err != nil { b.Fatal(err) } } } walker-0.1.2/testdata/fastwalk/fastwalk_unix.go000066400000000000000000000102361401551442200216320ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build (linux || darwin || freebsd || openbsd || netbsd) && !appengine // +build linux darwin freebsd openbsd netbsd // +build !appengine package fastwalk import ( "fmt" "os" "syscall" "unsafe" ) const blockSize = 8 << 10 // unknownFileMode is a sentinel (and bogus) os.FileMode // value used to represent a syscall.DT_UNKNOWN Dirent.Type. const unknownFileMode os.FileMode = os.ModeNamedPipe | os.ModeSocket | os.ModeDevice func readDir(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { fd, err := open(dirName, 0, 0) if err != nil { return &os.PathError{Op: "open", Path: dirName, Err: err} } defer syscall.Close(fd) // The buffer must be at least a block long. buf := make([]byte, blockSize) // stack-allocated; doesn't escape bufp := 0 // starting read position in buf nbuf := 0 // end valid data in buf skipFiles := false for { if bufp >= nbuf { bufp = 0 nbuf, err = readDirent(fd, buf) if err != nil { return os.NewSyscallError("readdirent", err) } if nbuf <= 0 { return nil } } consumed, name, typ := parseDirEnt(buf[bufp:nbuf]) bufp += consumed if name == "" || name == "." || name == ".." { continue } // Fallback for filesystems (like old XFS) that don't // support Dirent.Type and have DT_UNKNOWN (0) there // instead. if typ == unknownFileMode { fi, err := os.Lstat(dirName + "/" + name) if err != nil { // It got deleted in the meantime. if os.IsNotExist(err) { continue } return err } typ = fi.Mode() & os.ModeType } if skipFiles && typ.IsRegular() { continue } if err := fn(dirName, name, typ); err != nil { if err == ErrSkipFiles { skipFiles = true continue } return err } } } func parseDirEnt(buf []byte) (consumed int, name string, typ os.FileMode) { // golang.org/issue/37269 dirent := &syscall.Dirent{} copy((*[unsafe.Sizeof(syscall.Dirent{})]byte)(unsafe.Pointer(dirent))[:], buf) if v := unsafe.Offsetof(dirent.Reclen) + unsafe.Sizeof(dirent.Reclen); uintptr(len(buf)) < v { panic(fmt.Sprintf("buf size of %d smaller than dirent header size %d", len(buf), v)) } if len(buf) < int(dirent.Reclen) { panic(fmt.Sprintf("buf size %d < record length %d", len(buf), dirent.Reclen)) } consumed = int(dirent.Reclen) if direntInode(dirent) == 0 { // File absent in directory. return } switch dirent.Type { case syscall.DT_REG: typ = 0 case syscall.DT_DIR: typ = os.ModeDir case syscall.DT_LNK: typ = os.ModeSymlink case syscall.DT_BLK: typ = os.ModeDevice case syscall.DT_FIFO: typ = os.ModeNamedPipe case syscall.DT_SOCK: typ = os.ModeSocket case syscall.DT_UNKNOWN: typ = unknownFileMode default: // Skip weird things. // It's probably a DT_WHT (http://lwn.net/Articles/325369/) // or something. Revisit if/when this package is moved outside // of goimports. goimports only cares about regular files, // symlinks, and directories. return } nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0])) nameLen := direntNamlen(dirent) // Special cases for common things: if nameLen == 1 && nameBuf[0] == '.' { name = "." } else if nameLen == 2 && nameBuf[0] == '.' && nameBuf[1] == '.' { name = ".." } else { name = string(nameBuf[:nameLen]) } return } // According to https://golang.org/doc/go1.14#runtime // A consequence of the implementation of preemption is that on Unix systems, including Linux and macOS // systems, programs built with Go 1.14 will receive more signals than programs built with earlier releases. // // This causes syscall.Open and syscall.ReadDirent sometimes fail with EINTR errors. // We need to retry in this case. func open(path string, mode int, perm uint32) (fd int, err error) { for { fd, err := syscall.Open(path, mode, perm) if err != syscall.EINTR { return fd, err } } } func readDirent(fd int, buf []byte) (n int, err error) { for { nbuf, err := syscall.ReadDirent(fd, buf) if err != syscall.EINTR { return nbuf, err } } } walker-0.1.2/walker.go000066400000000000000000000051151401551442200146130ustar00rootroot00000000000000package walker import ( "context" "os" "path/filepath" "runtime" "sync/atomic" "golang.org/x/sync/errgroup" ) // Walk wraps WalkWithContext using the background context. func Walk(root string, walkFn func(pathname string, fi os.FileInfo) error, opts ...Option) error { return WalkWithContext(context.Background(), root, walkFn, opts...) } // WalkWithContext walks the file tree rooted at root, calling walkFn for each // file or directory in the tree, including root. // // If fastWalk returns filepath.SkipDir, the directory is skipped. // // Multiple goroutines stat the filesystem concurrently. The provided // walkFn must be safe for concurrent use. func WalkWithContext(ctx context.Context, root string, walkFn func(pathname string, fi os.FileInfo) error, opts ...Option) error { wg, ctx := errgroup.WithContext(ctx) fi, err := os.Lstat(root) if err != nil { return err } if err = walkFn(root, fi); err == filepath.SkipDir { return nil } if err != nil || !fi.IsDir() { return err } w := walker{ counter: 1, limit: runtime.NumCPU(), ctx: ctx, wg: wg, fn: walkFn, } if w.limit < 4 { w.limit = 4 } for _, o := range opts { err := o(&w.options) if err != nil { return err } } w.wg.Go(func() error { return w.gowalk(root) }) return w.wg.Wait() } type walker struct { counter uint32 limit int ctx context.Context wg *errgroup.Group fn func(pathname string, fi os.FileInfo) error options walkerOptions } func (w *walker) walk(dirname string, fi os.FileInfo) error { pathname := dirname + string(filepath.Separator) + fi.Name() err := w.fn(pathname, fi) if err == filepath.SkipDir { return nil } if err != nil { return err } // don't follow symbolic links if fi.Mode()&os.ModeSymlink != 0 { return nil } if !fi.IsDir() { return nil } if err = w.ctx.Err(); err != nil { return err } current := atomic.LoadUint32(&w.counter) // if we haven't reached our goroutine limit, spawn a new one if current < uint32(w.limit) { if atomic.CompareAndSwapUint32(&w.counter, current, current+1) { w.wg.Go(func() error { return w.gowalk(pathname) }) return nil } } // if we've reached our limit, continue with this goroutine err = w.readdir(pathname) if err != nil && w.options.errorCallback != nil { err = w.options.errorCallback(pathname, err) } return err } func (w *walker) gowalk(pathname string) error { err := w.readdir(pathname) if err != nil && w.options.errorCallback != nil { err = w.options.errorCallback(pathname, err) } atomic.AddUint32(&w.counter, ^uint32(0)) return err } walker-0.1.2/walker_option.go000066400000000000000000000010571401551442200162040ustar00rootroot00000000000000package walker // WalkerOption is an option to configure Walk() behaviour. type Option func(*walkerOptions) error type walkerOptions struct { errorCallback func(pathname string, err error) error } // WithErrorCallback sets a callback to be used for error handling. Any error // returned will halt the Walk function and return the error. If the callback // returns nil Walk will continue. func WithErrorCallback(callback func(pathname string, err error) error) Option { return func(o *walkerOptions) error { o.errorCallback = callback return nil } } walker-0.1.2/walker_portable.go000066400000000000000000000005761401551442200165110ustar00rootroot00000000000000// +build appengine !linux,!darwin,!freebsd,!openbsd,!netbsd package walker import "os" func (w *walker) readdir(dirname string) error { f, err := os.Open(dirname) if err != nil { return err } list, err := f.Readdir(-1) f.Close() if err != nil { return err } for _, fi := range list { if err = w.walk(dirname, fi); err != nil { return err } } return nil } walker-0.1.2/walker_test.go000066400000000000000000000160611401551442200156540ustar00rootroot00000000000000package walker_test import ( "context" "flag" "io/ioutil" "os" "path/filepath" "runtime" "strings" "sync" "testing" "time" // "github.com/karrick/godirwalk" "github.com/saracen/walker" "github.com/saracen/walker/testdata/fastwalk" ) func testWalk(t *testing.T, files map[string]os.FileMode) { dir, err := ioutil.TempDir("", "walker-test") if err != nil { t.Error(err) return } defer os.RemoveAll(dir) for path, mode := range files { path = filepath.Join(dir, path) err := os.MkdirAll(filepath.Dir(path), 0777) if err != nil { t.Fatal(err) } switch { case mode&os.ModeSymlink != 0 && mode&os.ModeDir != 0: err = os.Symlink(filepath.Dir(path), path) case mode&os.ModeSymlink != 0: err = os.Symlink("foo/foo.go", path) case mode&os.ModeDir != 0: err = os.Mkdir(path, mode) default: err = ioutil.WriteFile(path, []byte(path), mode) } if err != nil { t.Fatal(err) } } filepathResults := make(map[string]os.FileInfo) err = filepath.Walk(dir, func(pathname string, fi os.FileInfo, err error) error { if strings.Contains(pathname, "skip") { return filepath.SkipDir } if filepath.Base(pathname) == "perm-error" && runtime.GOOS != "windows" { if err == nil { t.Errorf("expected permission error for path %v", pathname) } } else { if err != nil { t.Errorf("unexpected error for path %v", pathname) } } filepathResults[pathname] = fi return nil }) if err != nil { t.Fatal(err) } var l sync.Mutex walkerResults := make(map[string]os.FileInfo) err = walker.Walk(dir, func(pathname string, fi os.FileInfo) error { if strings.Contains(pathname, "skip") { return filepath.SkipDir } l.Lock() walkerResults[pathname] = fi l.Unlock() return nil }, walker.WithErrorCallback(func(pathname string, err error) error { if filepath.Base(pathname) == "perm-error" { if err == nil { t.Errorf("expected permission error for path %v", pathname) } } else { if err != nil { t.Errorf("unexpected error for path %v", pathname) } } return nil })) if err != nil { t.Fatal(err) } for path, info := range filepathResults { info2, ok := walkerResults[path] if !ok { t.Fatalf("walk mismatch, path %q doesn't exist", path) } if info.IsDir() != info2.IsDir() || info.ModTime() != info2.ModTime() || info.Mode() != info2.Mode() || info.Name() != info2.Name() || info.Size() != info2.Size() { t.Fatalf("walk mismatch, got %v, wanted %v", info2, info) } } } func TestWalker(t *testing.T) { testWalk(t, map[string]os.FileMode{ "foo/foo.go": 0644, "bar/bar.go": 0777, "bar/foo/bar/foo/bar": 0600, "skip/file": 0700, "bar/symlink": os.ModeDir | os.ModeSymlink | 0777, "bar/symlink.go": os.ModeSymlink | 0777, "perm-error": os.ModeDir | 0000, }) } func TestWalkerWithContext(t *testing.T) { ctx, cancel := context.WithTimeout(context.Background(), 5*time.Nanosecond) defer cancel() err := walker.WalkWithContext(ctx, runtime.GOROOT(), func(pathname string, fi os.FileInfo) error { return nil }) if err == nil { t.Fatalf("expecting timeout error, got nil") } } var benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan for BenchmarkFilepathWalk and BenchmarkWalkerWalk") type tester interface { Fatal(args ...interface{}) } func filepathWalk(t tester) { err := filepath.Walk(*benchDir, func(pathname string, fi os.FileInfo, err error) error { return nil }) if err != nil { t.Fatal(err) } } func filepathWalkAppend(t tester) (paths []string) { err := filepath.Walk(*benchDir, func(pathname string, fi os.FileInfo, err error) error { paths = append(paths, pathname) return nil }) if err != nil { t.Fatal(err) } return } func TestFilepathWalkDir(t *testing.T) { filepathWalk(t) } func BenchmarkFilepathWalk(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { filepathWalk(b) } } func BenchmarkFilepathWalkAppend(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { _ = filepathWalkAppend(b) } } func walkerWalk(t tester) { err := walker.Walk(*benchDir, func(pathname string, fi os.FileInfo) error { return nil }) if err != nil { t.Fatal(err) } } func walkerWalkAppend(t tester) (paths []string) { var l sync.Mutex err := walker.Walk(*benchDir, func(pathname string, fi os.FileInfo) error { l.Lock() paths = append(paths, pathname) l.Unlock() return nil }) if err != nil { t.Fatal(err) } return } func TestWalkerWalkDir(t *testing.T) { walkerWalk(t) } func BenchmarkWalkerWalk(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { walkerWalk(b) } } func BenchmarkWalkerWalkAppend(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { _ = walkerWalkAppend(b) } } func fastwalkWalk(t tester) { err := fastwalk.Walk(*benchDir, func(pathname string, mode os.FileMode) error { return nil }) if err != nil { t.Fatal(err) } } func fastwalkWalkLstat(t tester) { err := fastwalk.Walk(*benchDir, func(pathname string, mode os.FileMode) error { _, err := os.Lstat(pathname) return err }) if err != nil { t.Fatal(err) } } func fastwalkWalkAppend(t tester) (paths []string) { var l sync.Mutex err := fastwalk.Walk(*benchDir, func(pathname string, mode os.FileMode) error { l.Lock() paths = append(paths, pathname) l.Unlock() return nil }) if err != nil { t.Fatal(err) } return } func TestFastwalkWalkDir(t *testing.T) { fastwalkWalk(t) } func TestFastwalkWalkLstatDir(t *testing.T) { fastwalkWalkLstat(t) } func BenchmarkFastwalkWalk(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { fastwalkWalk(b) } } func BenchmarkFastwalkWalkAppend(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { _ = fastwalkWalkAppend(b) } } func BenchmarkFastwalkWalkLstat(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { fastwalkWalkLstat(b) } } /*func godirwalkWalk(t tester) { err := godirwalk.Walk(*benchDir, &godirwalk.Options{ Callback: func(osPathname string, dirent *godirwalk.Dirent) error { return nil }, Unsorted: true, }) if err != nil { t.Fatal(err) } } func godirwalkWalkLstat(t tester) (paths []string) { err := godirwalk.Walk(*benchDir, &godirwalk.Options{ Callback: func(osPathname string, dirent *godirwalk.Dirent) error { _, err := os.Lstat(osPathname) return err }, Unsorted: true, }) if err != nil { t.Fatal(err) } return } func godirwalkWalkAppend(t tester) (paths []string) { err := godirwalk.Walk(*benchDir, &godirwalk.Options{ Callback: func(osPathname string, dirent *godirwalk.Dirent) error { paths = append(paths, osPathname) return nil }, Unsorted: true, }) if err != nil { t.Fatal(err) } return } func TestGodirwalkWalkDir(t *testing.T) { godirwalkWalk(t) } func BenchmarkGodirwalkWalk(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { godirwalkWalk(b) } } func BenchmarkGodirwalkWalkAppend(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { _ = godirwalkWalkAppend(b) } } func BenchmarkGodirwalkWalkLstat(b *testing.B) { b.ReportAllocs() for i := 0; i < b.N; i++ { godirwalkWalkLstat(b) } }*/ walker-0.1.2/walker_unix.go000066400000000000000000000031341401551442200156550ustar00rootroot00000000000000// +build linux darwin freebsd openbsd netbsd // +build !appengine package walker import ( "os" "syscall" ) func (w *walker) readdir(dirname string) error { fd, err := open(dirname, 0, 0) if err != nil { return &os.PathError{Op: "open", Path: dirname, Err: err} } defer syscall.Close(fd) buf := make([]byte, 8<<10) names := make([]string, 0, 100) nbuf := 0 bufp := 0 for { if bufp >= nbuf { bufp = 0 nbuf, err = readDirent(fd, buf) if err != nil { return err } if nbuf <= 0 { return nil } } consumed, count, names := syscall.ParseDirent(buf[bufp:nbuf], 100, names[0:]) bufp += consumed for _, name := range names[:count] { fi, err := os.Lstat(dirname + "/" + name) if os.IsNotExist(err) { continue } if err != nil { return err } if err = w.walk(dirname, fi); err != nil { return err } } } // never reach } // According to https://golang.org/doc/go1.14#runtime // A consequence of the implementation of preemption is that on Unix systems, including Linux and macOS // systems, programs built with Go 1.14 will receive more signals than programs built with earlier releases. // // This causes syscall.Open and syscall.ReadDirent sometimes fail with EINTR errors. // We need to retry in this case. func open(path string, mode int, perm uint32) (fd int, err error) { for { fd, err := syscall.Open(path, mode, perm) if err != syscall.EINTR { return fd, err } } } func readDirent(fd int, buf []byte) (n int, err error) { for { nbuf, err := syscall.ReadDirent(fd, buf) if err != syscall.EINTR { return nbuf, err } } }