pax_global_header00006660000000000000000000000064147067263220014523gustar00rootroot0000000000000052 comment=17800a2b07b0829938668b367132fff7cdd7e0e7 fastwalk-1.0.9/000077500000000000000000000000001470672632200133465ustar00rootroot00000000000000fastwalk-1.0.9/.github/000077500000000000000000000000001470672632200147065ustar00rootroot00000000000000fastwalk-1.0.9/.github/dependabot.yml000066400000000000000000000004151470672632200175360ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: gomod directory: / labels: - area/dependencies schedule: interval: daily - package-ecosystem: github-actions directory: / labels: - area/dependencies schedule: interval: daily fastwalk-1.0.9/.github/workflows/000077500000000000000000000000001470672632200167435ustar00rootroot00000000000000fastwalk-1.0.9/.github/workflows/linux.yml000066400000000000000000000012031470672632200206210ustar00rootroot00000000000000name: Test fastwalk on Linux on: push: branches: [master] pull_request: branches: [master] jobs: build: runs-on: ubuntu-latest strategy: # Test all supported versions on Linux (since it's fast) matrix: go: ['1.20', '1.21', '1.22', '1.23'] steps: - uses: actions/checkout@v4 with: fetch-depth: 1 - name: Set up Go uses: actions/setup-go@v5 with: go-version: ${{ matrix.go }} - name: Test run: go test ./... - name: Test Race run: go test -race ./... - name: Test Builds run: make test_build --jobs=4 fastwalk-1.0.9/.github/workflows/linux_386.yml000066400000000000000000000006721470672632200212320ustar00rootroot00000000000000name: Test fastwalk on Linux 386 (32-bit) on: push: branches: [master] pull_request: branches: [master] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 1 - name: Set up Go uses: actions/setup-go@v5 with: go-version: '>=1.23.x' - name: Test 386 run: | GOARCH=386 go env GOARCH=386 go test fastwalk-1.0.9/.github/workflows/macos.yml000066400000000000000000000007651470672632200206000ustar00rootroot00000000000000name: Test fastwalk on macOS amd64 on: push: branches: [master] pull_request: branches: [master] jobs: build: runs-on: macos-13 strategy: matrix: go: ['1.22', '1.23'] steps: - uses: actions/checkout@v4 with: fetch-depth: 1 - name: Set up Go uses: actions/setup-go@v5 with: go-version: ${{ matrix.go }} - name: Test run: go test ./... - name: Test Race run: go test -race ./... fastwalk-1.0.9/.github/workflows/macos_arm64.yml000066400000000000000000000007711470672632200216060ustar00rootroot00000000000000name: Test fastwalk on macOS arm64 on: push: branches: [master] pull_request: branches: [master] jobs: build: runs-on: macos-latest strategy: matrix: go: ['1.22', '1.23'] steps: - uses: actions/checkout@v4 with: fetch-depth: 1 - name: Set up Go uses: actions/setup-go@v5 with: go-version: ${{ matrix.go }} - name: Test run: go test ./... - name: Test Race run: go test -race ./... fastwalk-1.0.9/.github/workflows/windows.yml000066400000000000000000000024551470672632200211660ustar00rootroot00000000000000name: Test fastwalk on Windows on: push: branches: [master] pull_request: branches: [master] jobs: build: runs-on: windows-latest strategy: matrix: go: ['1.22', '1.23'] steps: - uses: actions/checkout@v4 with: fetch-depth: 1 - name: Set up Go uses: actions/setup-go@v5 with: go-version: ${{ matrix.go }} - name: Test Race run: go test -race ./... # WSL Test: disabled for now since it's very slow (~5 minutes) # # name: Test fastwalk on Windows WSL # # on: # push: # branches: [ master ] # pull_request: # branches: [ master ] # # jobs: # build: # runs-on: windows-latest # strategy: # matrix: # go: [1.22] # steps: # - uses: actions/checkout@v4 # with: # fetch-depth: 1 # - uses: Vampire/setup-wsl@v3 # with: # distribution: Ubuntu-24.04 # - name: Set up Go # uses: actions/setup-go@v5 # with: # go-version: ${{ matrix.go }} # - name: Build Test # run: go test -c -o fastwalk.test.exe # - name: Test WSL # shell: wsl-bash {0} # run: | # cp ./fastwalk.test.exe /tmp/fastwalk.test.exe # cd /tmp # ./fastwalk.test.exe -test.v -test.run TestRunningUnderWSL -test-wsl fastwalk-1.0.9/.gitignore000066400000000000000000000000171470672632200153340ustar00rootroot00000000000000/vendor *.test fastwalk-1.0.9/LICENSE000066400000000000000000000020701470672632200143520ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2022 Charlie Vieth Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. fastwalk-1.0.9/Makefile000066400000000000000000000047321470672632200150140ustar00rootroot00000000000000.PHONY: all all: test test_build .PHONY: test_build_darwin_arm64 test_build_darwin_arm64: GOOS=darwin GOARCH=arm64 go test -c -o /dev/null .PHONY: test_build_darwin_amd64 test_build_darwin_amd64: GOOS=darwin GOARCH=amd64 go test -c -o /dev/null .PHONY: test_build_linux_arm64 test_build_linux_arm64: GOOS=linux GOARCH=arm64 go test -c -o /dev/null .PHONY: test_build_linux_amd64 test_build_linux_amd64: GOOS=linux GOARCH=amd64 go test -c -o /dev/null .PHONY: test_build_windows_amd64 test_build_windows_amd64: GOOS=windows GOARCH=amd64 go test -c -o /dev/null .PHONY: test_build_windows_arm64 test_build_windows_arm64: GOOS=windows GOARCH=arm64 go test -c -o /dev/null .PHONY: test_build_freebsd_amd64 test_build_freebsd_amd64: GOOS=freebsd GOARCH=amd64 go test -c -o /dev/null .PHONY: test_build_openbsd_amd64 test_build_openbsd_amd64: GOOS=openbsd GOARCH=amd64 go test -c -o /dev/null .PHONY: test_build_netbsd_amd64 test_build_netbsd_amd64: GOOS=netbsd GOARCH=amd64 go test -c -o /dev/null .PHONY: test_build_dragonfly_amd64 test_build_dragonfly_amd64: GOOS=dragonfly GOARCH=amd64 go test -c -o /dev/null .PHONY: test_build_solaris_amd64 test_build_solaris_amd64: GOOS=solaris GOARCH=amd64 go test -c -o /dev/null .PHONY: test_build_wasip1_wasm test_build_wasip1_wasm: @# Ignore versions before 1.21 go version | grep -qE 'go1\.(20|1[0-9])' || \ GOOS=wasip1 GOARCH=wasm go test -c -o /dev/null .PHONY: test_build_aix_ppc64 test_build_aix_ppc64: GOOS=aix GOARCH=ppc64 go test -c -o /dev/null .PHONY: test_build_js_wasm test_build_js_wasm: GOOS=js GOARCH=wasm go test -c -o /dev/null # TODO: clean this up and add all supported targets # # Test that we can build fastwalk on multiple platforms .PHONY: test_build test_build: \ test_build_aix_ppc64 \ test_build_darwin_amd64 \ test_build_darwin_arm64 \ test_build_dragonfly_amd64 \ test_build_freebsd_amd64 \ test_build_js_wasm \ test_build_linux_amd64 \ test_build_linux_arm64 \ test_build_netbsd_amd64 \ test_build_openbsd_amd64 \ test_build_solaris_amd64 \ test_build_wasip1_wasm \ test_build_windows_amd64 \ test_build_windows_arm64 .PHONY: test test: # runs all tests against the package with race detection and coverage percentage @go test -race -cover ./... .PHONY: quick quick: # runs all tests without coverage or the race detector @go test ./... .PHONY: bench bench: go test -run '^$$' -bench . -benchmem ./... .PHONY: bench_comp bench_comp: @go run ./scripts/bench_comp.go .PHONY: clean clean: @go clean fastwalk-1.0.9/README.md000066400000000000000000000151141470672632200146270ustar00rootroot00000000000000[![GoDoc](https://img.shields.io/badge/godoc-reference-blue.svg)](https://pkg.go.dev/github.com/charlievieth/fastwalk) [![Test fastwalk on macOS](https://github.com/charlievieth/fastwalk/actions/workflows/macos.yml/badge.svg)](https://github.com/charlievieth/fastwalk/actions/workflows/macos.yml) [![Test fastwalk on Linux](https://github.com/charlievieth/fastwalk/actions/workflows/linux.yml/badge.svg)](https://github.com/charlievieth/fastwalk/actions/workflows/linux.yml) [![Test fastwalk on Windows](https://github.com/charlievieth/fastwalk/actions/workflows/windows.yml/badge.svg)](https://github.com/charlievieth/fastwalk/actions/workflows/windows.yml) # fastwalk Fast parallel directory traversal for Golang. Package fastwalk provides a fast parallel version of [`filepath.WalkDir`](https://pkg.go.dev/io/fs#WalkDirFunc) that is \~2.5x faster on macOS, \~4x faster on Linux, \~6x faster on Windows, allocates 50% less memory, and requires 25% fewer memory allocations. Additionally, it is \~4-5x faster than [godirwalk](https://github.com/karrick/godirwalk) across OSes. Inspired by and based off of [golang.org/x/tools/internal/fastwalk](https://pkg.go.dev/golang.org/x/tools@v0.1.9/internal/fastwalk). ## Features * Fast: multiple goroutines stat the filesystem and call the [`filepath.WalkDirFunc`](https://pkg.go.dev/io/fs#WalkDirFunc) callback concurrently * Safe symbolic link traversal ([`Config.Follow`](https://pkg.go.dev/github.com/charlievieth/fastwalk#Config)) * Same behavior and callback signature as [`filepath.WalkDir`](https://pkg.go.dev/path/filepath#WalkDir) * Wrapper functions are provided to ignore duplicate files and directories: [`IgnoreDuplicateFiles()`](https://pkg.go.dev/github.com/charlievieth/fastwalk#IgnoreDuplicateFiles) and [`IgnoreDuplicateDirs()`](https://pkg.go.dev/github.com/charlievieth/fastwalk#IgnoreDuplicateDirs) * Extensively tested on macOS, Linux, and Windows ## Usage Usage is the same as [`filepath.WalkDir`](https://pkg.go.dev/io/fs#WalkDirFunc), but the [`walkFn`](https://pkg.go.dev/path/filepath@go1.17.7#WalkFunc) argument to [`fastwalk.Walk`](https://pkg.go.dev/github.com/charlievieth/fastwalk#Walk) must be safe for concurrent use. Examples can be found in the [examples](./examples) directory. The below example is a very simple version of the POSIX [find](https://pubs.opengroup.org/onlinepubs/007904975/utilities/find.html) utility: ```go // fwfind is a an example program that is similar to POSIX find, // but faster and worse (it's an example). package main import ( "flag" "fmt" "io/fs" "os" "path/filepath" "github.com/charlievieth/fastwalk" ) const usageMsg = `Usage: %[1]s [-L] [-name] [PATH...]: %[1]s is a poor replacement for the POSIX find utility ` func main() { flag.Usage = func() { fmt.Fprintf(os.Stdout, usageMsg, filepath.Base(os.Args[0])) flag.PrintDefaults() } pattern := flag.String("name", "", "Pattern to match file names against.") followLinks := flag.Bool("L", false, "Follow symbolic links") flag.Parse() // If no paths are provided default to the current directory: "." args := flag.Args() if len(args) == 0 { args = append(args, ".") } // Follow links if the "-L" flag is provided conf := fastwalk.Config{ Follow: *followLinks, } walkFn := func(path string, d fs.DirEntry, err error) error { if err != nil { fmt.Fprintf(os.Stderr, "%s: %v\n", path, err) return nil // returning the error stops iteration } if *pattern != "" { if ok, err := filepath.Match(*pattern, d.Name()); !ok { // invalid pattern (err != nil) or name does not match return err } } _, err = fmt.Println(path) return err } for _, root := range args { if err := fastwalk.Walk(&conf, root, walkFn); err != nil { fmt.Fprintf(os.Stderr, "%s: %v\n", root, err) os.Exit(1) } } } ``` ## Benchmarks Benchmarks were created using `go1.17.6` and can be generated with the `bench_comp` make target: ```sh $ make bench_comp ``` ### Darwin **Hardware:** ``` goos: darwin goarch: arm64 cpu: Apple M1 Max ``` #### [`filepath.WalkDir`](https://pkg.go.dev/path/filepath@go1.17.7#WalkDir) vs. [`fastwalk.Walk()`](https://pkg.go.dev/github.com/charlievieth/fastwalk#Walk): ``` filepath fastwalk delta time/op 27.9ms ± 1% 13.0ms ± 1% -53.33% alloc/op 4.33MB ± 0% 2.14MB ± 0% -50.55% allocs/op 50.9k ± 0% 37.7k ± 0% -26.01% ``` #### [`godirwalk.Walk()`](https://pkg.go.dev/github.com/karrick/godirwalk@v1.16.1#Walk) vs. [`fastwalk.Walk()`](https://pkg.go.dev/github.com/charlievieth/fastwalk#Walk): ``` godirwalk fastwalk delta time/op 58.5ms ± 3% 18.0ms ± 2% -69.30% alloc/op 25.3MB ± 0% 2.1MB ± 0% -91.55% allocs/op 57.6k ± 0% 37.7k ± 0% -34.59% ``` ### Linux **Hardware:** ``` goos: linux goarch: amd64 cpu: Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz drive: Samsung SSD 970 PRO 1TB ``` #### [`filepath.WalkDir`](https://pkg.go.dev/path/filepath@go1.17.7#WalkDir) vs. [`fastwalk.Walk()`](https://pkg.go.dev/github.com/charlievieth/fastwalk#Walk): ``` filepath fastwalk delta time/op 10.1ms ± 2% 2.8ms ± 2% -72.83% alloc/op 2.44MB ± 0% 1.70MB ± 0% -30.46% allocs/op 47.2k ± 0% 36.9k ± 0% -21.80% ``` #### [`godirwalk.Walk()`](https://pkg.go.dev/github.com/karrick/godirwalk@v1.16.1#Walk) vs. [`fastwalk.Walk()`](https://pkg.go.dev/github.com/charlievieth/fastwalk#Walk): ``` filepath fastwalk delta time/op 13.7ms ±16% 2.8ms ± 2% -79.88% alloc/op 7.48MB ± 0% 1.70MB ± 0% -77.34% allocs/op 53.8k ± 0% 36.9k ± 0% -31.38% ``` ### Windows **Hardware:** ``` goos: windows goarch: amd64 pkg: github.com/charlievieth/fastwalk cpu: Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz ``` #### [`filepath.WalkDir`](https://pkg.go.dev/path/filepath@go1.17.7#WalkDir) vs. [`fastwalk.Walk()`](https://pkg.go.dev/github.com/charlievieth/fastwalk#Walk): ``` filepath fastwalk delta time/op 88.0ms ± 1% 14.6ms ± 1% -83.47% alloc/op 5.68MB ± 0% 6.76MB ± 0% +19.01% allocs/op 69.6k ± 0% 90.4k ± 0% +29.87% ``` #### [`godirwalk.Walk()`](https://pkg.go.dev/github.com/karrick/godirwalk@v1.16.1#Walk) vs. [`fastwalk.Walk()`](https://pkg.go.dev/github.com/charlievieth/fastwalk#Walk): ``` filepath fastwalk delta time/op 87.4ms ± 1% 14.6ms ± 1% -83.34% alloc/op 6.14MB ± 0% 6.76MB ± 0% +10.24% allocs/op 100k ± 0% 90k ± 0% -9.59% ``` fastwalk-1.0.9/adapters.go000066400000000000000000000060231470672632200155010ustar00rootroot00000000000000package fastwalk import ( "io/fs" "os" "path/filepath" ) func isDir(path string, d fs.DirEntry) bool { if d.IsDir() { return true } if d.Type()&os.ModeSymlink != 0 { if fi, err := StatDirEntry(path, d); err == nil { return fi.IsDir() } } return false } // IgnoreDuplicateDirs wraps [fs.WalkDirFunc] walkFn to make it follow symbolic // links and ignore duplicate directories (if a symlink points to a directory // that has already been traversed it is skipped). The walkFn is called for // for skipped directories, but the directory is not traversed (this is // required for error handling). // // The Follow [Config] setting has no effect on the behavior of Walk when // this wrapper is used. // // In most use cases, the returned [fs.WalkDirFunc] should not be reused. // If it is reused, any previously visited file will be skipped. // // NOTE: The order of traversal is undefined. Given an "example" directory // like the one below where "dir" is a directory and "smydir1" and "smydir2" // are links to it, only one of "dir", "smydir1", or "smydir2" will be // traversed, but which one is undefined. // // example // ├── dir // ├── smydir1 -> dir // └── smydir2 -> dir func IgnoreDuplicateDirs(walkFn fs.WalkDirFunc) fs.WalkDirFunc { filter := NewEntryFilter() return func(path string, d fs.DirEntry, err error) error { // Call walkFn before checking the entry filter so that we // don't record directories that are skipped with SkipDir. err = walkFn(path, d, err) if err != nil { if err != filepath.SkipDir && isDir(path, d) { filter.Entry(path, d) } return err } if isDir(path, d) { if filter.Entry(path, d) { return filepath.SkipDir } if d.Type() == os.ModeSymlink { return ErrTraverseLink } } return nil } } // IgnoreDuplicateFiles wraps walkFn so that symlinks are followed and duplicate // files are ignored. If a symlink resolves to a file that has already been // visited it will be skipped. // // In most use cases, the returned [fs.WalkDirFunc] should not be reused. // If it is reused, any previously visited file will be skipped. // // This can significantly slow Walk as os.Stat() is called for each path // (on Windows, os.Stat() is only needed for symlinks). func IgnoreDuplicateFiles(walkFn fs.WalkDirFunc) fs.WalkDirFunc { filter := NewEntryFilter() return func(path string, d fs.DirEntry, err error) error { // Skip all duplicate files, directories, and links if filter.Entry(path, d) { if isDir(path, d) { return filepath.SkipDir } return nil } err = walkFn(path, d, err) if err == nil && d.Type() == os.ModeSymlink && isDir(path, d) { err = ErrTraverseLink } return err } } // IgnorePermissionErrors wraps walkFn so that [fs.ErrPermission] permission // errors are ignored. The returned [fs.WalkDirFunc] may be reused. func IgnorePermissionErrors(walkFn fs.WalkDirFunc) fs.WalkDirFunc { return func(path string, d fs.DirEntry, err error) error { if err != nil && os.IsPermission(err) { return nil } return walkFn(path, d, err) } } fastwalk-1.0.9/adapters_test.go000066400000000000000000000136421470672632200165450ustar00rootroot00000000000000package fastwalk_test import ( "errors" "io/fs" "os" "path/filepath" "reflect" "runtime" "sort" "strings" "sync" "sync/atomic" "testing" "github.com/charlievieth/fastwalk" ) func TestIgnoreDuplicateDirs(t *testing.T) { tempdir, err := os.MkdirTemp("", "test-fast-walk") if err != nil { t.Fatal(err) } // on macOS the tempdir is a symlink tempdir, err = filepath.EvalSymlinks(tempdir) if err != nil { t.Fatal(err) } defer cleanupOrLogTempDir(t, tempdir) files := map[string]string{ "bar/bar.go": "one", "foo/foo.go": "two", "skip/baz.go": "three", // we skip "skip", but visit "baz.go" via "symdir" "symdir": "LINK:skip", "bar/symdir": "LINK:../foo/", "bar/loop": "LINK:../bar/", // symlink loop } testCreateFiles(t, tempdir, files) want := map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/bar/symdir": os.ModeSymlink, "/src/bar/loop": os.ModeSymlink, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/symdir": os.ModeSymlink, "/src/symdir/baz.go": 0, "/src/skip": os.ModeDir, } runTest := func(t *testing.T, conf *fastwalk.Config) { var mu sync.Mutex got := make(map[string]os.FileMode) walkFn := fastwalk.IgnoreDuplicateDirs(func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) if err != nil { return err } // Resolve links for regular files since we don't know which directory // or link we traversed to visit them. Exclude "baz.go" because we want // to test that we visited it through it's link. if de.Type().IsRegular() && de.Name() != "baz.go" { realpath, err := filepath.EvalSymlinks(path) if err != nil { t.Error(err) return err } path = realpath } if !strings.HasPrefix(path, tempdir) { t.Errorf("Path %q not a child of TMPDIR %q", path, tempdir) return errors.New("abort") } key := filepath.ToSlash(strings.TrimPrefix(path, tempdir)) mu.Lock() defer mu.Unlock() got[key] = de.Type().Type() if de.Name() == "skip" { return filepath.SkipDir } return nil }) if err := fastwalk.Walk(conf, tempdir, walkFn); err != nil { t.Error("fastwalk:", err) } if !reflect.DeepEqual(want, got) { t.Errorf("walk mismatch.\n got:\n%v\nwant:\n%v", formatFileModes(got), formatFileModes(want)) diffFileModes(t, got, want) } } t.Run("NoFollow", func(t *testing.T) { runTest(t, &fastwalk.Config{Follow: false}) }) // Test that setting Follow to true has no impact on the behavior t.Run("Follow", func(t *testing.T) { runTest(t, &fastwalk.Config{Follow: true}) }) t.Run("Error", func(t *testing.T) { tempdir := t.TempDir() if err := os.WriteFile(tempdir+"/error_test", []byte("error"), 0644); err != nil { t.Fatal(err) } want := errors.New("my error") var callCount int32 walkFn := fastwalk.IgnoreDuplicateDirs(func(path string, de fs.DirEntry, err error) error { atomic.AddInt32(&callCount, 1) return want }) err := fastwalk.Walk(nil, tempdir, walkFn) if !errors.Is(err, want) { t.Errorf("Error: want: %v got: %v", want, err) } }) } func TestIgnoreDuplicateFiles(t *testing.T) { tempdir := t.TempDir() files := map[string]string{ "foo/foo.go": "one", "bar/bar.go": "LINK:../foo/foo.go", "bar/baz.go": "two", "broken/broken.go": "LINK:../nonexistent", "bar/loop": "LINK:../bar/", // symlink loop "file.go": "three", // Use multiple symdirs to increase the chance that one // of these and not "foo" is followed first. "symdir1": "LINK:foo", "symdir2": "LINK:foo", "symdir3": "LINK:foo", "symdir4": "LINK:foo", } if runtime.GOOS == "windows" { delete(files, "broken/broken.go") } testCreateFiles(t, tempdir, files) var expectedContents []string for _, contents := range files { if !strings.HasPrefix(contents, "LINK:") { expectedContents = append(expectedContents, contents) } } sort.Strings(expectedContents) var ( mu sync.Mutex seen []os.FileInfo contents []string ) walkFn := fastwalk.IgnoreDuplicateFiles(func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) fi1, err := fastwalk.StatDirEntry(path, de) if err != nil { t.Error(err) return err } mu.Lock() defer mu.Unlock() for _, fi2 := range seen { if os.SameFile(fi1, fi2) { t.Errorf("Visited file twice: %q (%s) and %q (%s)", path, fi1.Mode(), fi2.Name(), fi2.Mode()) } } seen = append(seen, fi1) if fi1.Mode().IsRegular() { data, err := os.ReadFile(path) if err != nil { return err } contents = append(contents, string(data)) } return nil }) if err := fastwalk.Walk(nil, tempdir, walkFn); err != nil { t.Fatal(err) } sort.Strings(contents) if !reflect.DeepEqual(expectedContents, contents) { t.Errorf("File contents want: %q got: %q", expectedContents, contents) } } func TestIgnorePermissionErrors(t *testing.T) { var called bool fn := fastwalk.IgnorePermissionErrors(func(path string, _ fs.DirEntry, err error) error { called = true if err != nil { t.Fatal(err) } return nil }) t.Run("PermissionError", func(t *testing.T) { err := fn("", nil, &os.PathError{Op: "open", Path: "foo.go", Err: os.ErrPermission}) if err != nil { t.Fatal(err) } if called { t.Fatal("walkFn should not have been called with os.ErrPermission") } }) t.Run("NilError", func(t *testing.T) { called = false if err := fn("", nil, nil); err != nil { t.Fatal(err) } if !called { t.Fatal("walkFn should have been called with nil error") } }) t.Run("OtherError", func(t *testing.T) { fn := fastwalk.IgnorePermissionErrors(func(path string, _ fs.DirEntry, err error) error { return err }) want := &os.PathError{Op: "open", Path: "foo.go", Err: os.ErrExist} if got := fn("", nil, want); got != want { t.Fatalf("want error: %v got: %v", want, got) } }) } fastwalk-1.0.9/dirent.go000066400000000000000000000026231470672632200151650ustar00rootroot00000000000000package fastwalk import ( "io/fs" "os" "sync" "sync/atomic" "syscall" "unsafe" ) type fileInfo struct { once sync.Once fs.FileInfo err error } func loadFileInfo(pinfo **fileInfo) *fileInfo { ptr := (*unsafe.Pointer)(unsafe.Pointer(pinfo)) fi := (*fileInfo)(atomic.LoadPointer(ptr)) if fi == nil { fi = &fileInfo{} if !atomic.CompareAndSwapPointer( (*unsafe.Pointer)(unsafe.Pointer(pinfo)), // adrr nil, // old unsafe.Pointer(fi), // new ) { fi = (*fileInfo)(atomic.LoadPointer(ptr)) } } return fi } // StatDirEntry returns a [fs.FileInfo] describing the named file ([os.Stat]). // If de is a [fastwalk.DirEntry] its Stat method is used and the returned // FileInfo may be cached from a prior call to Stat. If a cached result is not // desired, users should just call [os.Stat] directly. // // This is a helper function for calling Stat on the DirEntry passed to the // walkFn argument to [Walk]. // // The path argument is only used if de is not of type [fastwalk.DirEntry]. // Therefore, de should be the DirEntry describing path. func StatDirEntry(path string, de fs.DirEntry) (fs.FileInfo, error) { if de == nil { return nil, &os.PathError{Op: "stat", Path: path, Err: syscall.EINVAL} } if de.Type()&os.ModeSymlink == 0 { return de.Info() } if d, ok := de.(DirEntry); ok { return d.Stat() } return os.Stat(path) } fastwalk-1.0.9/dirent_export_test.go000066400000000000000000000031511470672632200176220ustar00rootroot00000000000000package fastwalk import ( "fmt" "io/fs" "os" "runtime" "testing" "time" ) // Export funcs for testing (because I'm too lazy to move the // symlink() and writeFile() funcs) func FormatFileInfo(fi fs.FileInfo) string { return fmt.Sprintf("%+v", struct { Name string Size int64 Mode os.FileMode ModTime time.Time IsDir bool Sys string }{ Name: fi.Name(), Size: fi.Size(), Mode: fi.Mode(), ModTime: fi.ModTime(), IsDir: fi.IsDir(), Sys: fmt.Sprintf("%+v", fi.Sys()), }) } // NB: this test lives here and not in fastwalk_test.go since we need to // access the internal cleanRootPath function. func TestCleanRootPath(t *testing.T) { test := func(t *testing.T, tests map[string]string) { t.Helper() for in, want := range tests { got := cleanRootPath(in) if got != want { t.Errorf("cleanRootPath(%q) = %q; want: %q", in, got, want) } } } // NB: The name here isn't exactly correct since we run this for // any non-Windows OS. t.Run("Unix", func(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("test not supported on Windows") } test(t, map[string]string{ "": "", ".": ".", "/": "/", "//": "/", "/foo": "/foo", "/foo/": "/foo", "a": "a", }) }) // Test that cleanRootPath is a no-op on Windows t.Run("Windows", func(t *testing.T) { if runtime.GOOS != "windows" { t.Skip("test only supported on Windows") } test(t, map[string]string{ `C:/`: `C:/`, `C://`: `C://`, `\\?\GLOBALROOT`: `\\?\GLOBALROOT`, `\\?\GLOBALROOT\\`: `\\?\GLOBALROOT\\`, }) }) } fastwalk-1.0.9/dirent_portable.go000066400000000000000000000051601470672632200170540ustar00rootroot00000000000000//go:build !darwin && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris) // TODO: add a "portable_dirent" build tag so that we can test this // on non-Windows platforms package fastwalk import ( "io/fs" "os" "sort" "sync" "github.com/charlievieth/fastwalk/internal/fmtdirent" ) var _ DirEntry = (*portableDirent)(nil) type portableDirent struct { fs.DirEntry parent string stat *fileInfo } func (d *portableDirent) String() string { return fmtdirent.FormatDirEntry(d) } func (d *portableDirent) Stat() (fs.FileInfo, error) { if d.DirEntry.Type()&os.ModeSymlink == 0 { return d.DirEntry.Info() } stat := loadFileInfo(&d.stat) stat.once.Do(func() { stat.FileInfo, stat.err = os.Stat(d.parent + string(os.PathSeparator) + d.Name()) }) return stat.FileInfo, stat.err } func newDirEntry(dirName string, info fs.DirEntry) DirEntry { return &portableDirent{ DirEntry: info, parent: dirName, } } func fileInfoToDirEntry(dirname string, fi fs.FileInfo) DirEntry { return newDirEntry(dirname, fs.FileInfoToDirEntry(fi)) } var direntSlicePool = sync.Pool{ New: func() any { a := make([]DirEntry, 0, 32) return &a }, } func putDirentSlice(p *[]DirEntry) { // max is half as many as Unix because twice the size if p != nil && cap(*p) <= 16*1024 { a := *p for i := range a { a[i] = nil } *p = a[:0] direntSlicePool.Put(p) } } func sortDirents(mode SortMode, dents []DirEntry) { if len(dents) <= 1 { return } switch mode { case SortLexical: sort.Slice(dents, func(i, j int) bool { return dents[i].Name() < dents[j].Name() }) case SortFilesFirst: sort.Slice(dents, func(i, j int) bool { d1 := dents[i] d2 := dents[j] r1 := d1.Type().IsRegular() r2 := d2.Type().IsRegular() switch { case r1 && !r2: return true case !r1 && r2: return false case !r1 && !r2: // Both are not regular files: sort directories last dd1 := d1.Type().IsDir() dd2 := d2.Type().IsDir() switch { case !dd1 && dd2: return true case dd1 && !dd2: return false } } return d1.Name() < d2.Name() }) case SortDirsFirst: sort.Slice(dents, func(i, j int) bool { d1 := dents[i] d2 := dents[j] dd1 := d1.Type().IsDir() dd2 := d2.Type().IsDir() switch { case dd1 && !dd2: return true case !dd1 && dd2: return false case !dd1 && !dd2: // Both are not directories: sort regular files first r1 := d1.Type().IsRegular() r2 := d2.Type().IsRegular() switch { case r1 && !r2: return true case !r1 && r2: return false } } return d1.Name() < d2.Name() }) } } fastwalk-1.0.9/dirent_portable_test.go000066400000000000000000000070351470672632200201160ustar00rootroot00000000000000//go:build !darwin && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris) package fastwalk import ( "io/fs" "math/rand" "reflect" "testing" "time" "github.com/charlievieth/fastwalk/internal/fmtdirent" ) var _ DirEntry = dirEntry{} // Minimal DirEntry for testing type dirEntry struct { name string typ fs.FileMode } func (de dirEntry) Name() string { return de.name } func (de dirEntry) IsDir() bool { return de.typ.IsDir() } func (de dirEntry) Type() fs.FileMode { return de.typ.Type() } func (de dirEntry) Info() (fs.FileInfo, error) { panic("not implemented") } func (de dirEntry) Stat() (fs.FileInfo, error) { panic("not implemented") } func (de dirEntry) String() string { return fmtdirent.FormatDirEntry(de) } // NB: this must be kept in sync with the // TestSortDirents in dirent_unix_test.go func TestSortDirents(t *testing.T) { direntNames := func(dents []DirEntry) []string { names := make([]string, len(dents)) for i, d := range dents { names[i] = d.Name() } return names } t.Run("None", func(t *testing.T) { dents := []DirEntry{ dirEntry{name: "b"}, dirEntry{name: "a"}, dirEntry{name: "d"}, dirEntry{name: "c"}, } want := direntNames(dents) sortDirents(SortNone, dents) got := direntNames(dents) if !reflect.DeepEqual(got, want) { t.Errorf("got: %q want: %q", got, want) } }) rr := rand.New(rand.NewSource(time.Now().UnixNano())) shuffleDirents := func(dents []DirEntry) []DirEntry { rr.Shuffle(len(dents), func(i, j int) { dents[i], dents[j] = dents[j], dents[i] }) return dents } // dents needs to be in the expected order test := func(t *testing.T, dents []DirEntry, mode SortMode) { want := direntNames(dents) // Run multiple times with different shuffles for i := 0; i < 10; i++ { t.Run("", func(t *testing.T) { sortDirents(mode, shuffleDirents(dents)) got := direntNames(dents) if !reflect.DeepEqual(got, want) { t.Errorf("got: %q want: %q", got, want) } }) } } t.Run("Lexical", func(t *testing.T) { dents := []DirEntry{ dirEntry{name: "a"}, dirEntry{name: "b"}, dirEntry{name: "c"}, dirEntry{name: "d"}, } test(t, dents, SortLexical) }) t.Run("FilesFirst", func(t *testing.T) { dents := []DirEntry{ // Files lexically dirEntry{name: "f1", typ: 0}, dirEntry{name: "f2", typ: 0}, dirEntry{name: "f3", typ: 0}, // Non-dirs lexically dirEntry{name: "a1", typ: fs.ModeSymlink}, dirEntry{name: "a2", typ: fs.ModeSymlink}, dirEntry{name: "a3", typ: fs.ModeSymlink}, dirEntry{name: "s1", typ: fs.ModeSocket}, dirEntry{name: "s2", typ: fs.ModeSocket}, dirEntry{name: "s3", typ: fs.ModeSocket}, // Dirs lexically dirEntry{name: "d1", typ: fs.ModeDir}, dirEntry{name: "d2", typ: fs.ModeDir}, dirEntry{name: "d3", typ: fs.ModeDir}, } test(t, dents, SortFilesFirst) }) t.Run("DirsFirst", func(t *testing.T) { dents := []DirEntry{ // Dirs lexically dirEntry{name: "d1", typ: fs.ModeDir}, dirEntry{name: "d2", typ: fs.ModeDir}, dirEntry{name: "d3", typ: fs.ModeDir}, // Files lexically dirEntry{name: "f1", typ: 0}, dirEntry{name: "f2", typ: 0}, dirEntry{name: "f3", typ: 0}, // Non-dirs lexically dirEntry{name: "a1", typ: fs.ModeSymlink}, dirEntry{name: "a2", typ: fs.ModeSymlink}, dirEntry{name: "a3", typ: fs.ModeSymlink}, dirEntry{name: "s1", typ: fs.ModeSocket}, dirEntry{name: "s2", typ: fs.ModeSocket}, dirEntry{name: "s3", typ: fs.ModeSocket}, } test(t, dents, SortDirsFirst) }) } fastwalk-1.0.9/dirent_test.go000066400000000000000000000074121470672632200162250ustar00rootroot00000000000000//go:build darwin || aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris package fastwalk_test import ( "io/fs" "os" "path/filepath" "runtime" "sync" "testing" "github.com/charlievieth/fastwalk" ) func TestDirent(t *testing.T) { tempdir := t.TempDir() fileName := filepath.Join(tempdir, "file.txt") if err := writeFile(fileName, "file.txt", 0644); err != nil { t.Fatal(err) } linkName := filepath.Join(tempdir, "link.link") if err := symlink(t, filepath.Base(fileName), linkName); err != nil { t.Fatal(err) } // Use fastwalk.Walk to create the dir entries getDirEnts := func(t *testing.T) (linkEnt, fileEnt fs.DirEntry) { err := fastwalk.Walk(nil, tempdir, func(path string, d fs.DirEntry, err error) error { switch path { case linkName: linkEnt = d case fileName: fileEnt = d } return nil }) if err != nil { t.Fatal(err) } if fileEnt == nil || linkEnt == nil { t.Fatal("error walking directory") } return linkEnt, fileEnt } t.Run("Lstat", func(t *testing.T) { linkEnt, _ := getDirEnts(t) want, err := os.Lstat(linkName) if err != nil { t.Fatal(err) } got, err := linkEnt.Info() if err != nil { t.Fatal(err) } if !os.SameFile(want, got) { t.Errorf("lstat mismatch\n got:\n%s\nwant:\n%s", fastwalk.FormatFileInfo(got), fastwalk.FormatFileInfo(want)) } }) t.Run("Stat", func(t *testing.T) { _, fileEnt := getDirEnts(t) want, err := os.Stat(fileName) if err != nil { t.Fatal(err) } got, err := fastwalk.StatDirEntry(linkName, fileEnt) if err != nil { t.Fatal(err) } if !os.SameFile(want, got) { t.Errorf("lstat mismatch\n got:\n%s\nwant:\n%s", fastwalk.FormatFileInfo(got), fastwalk.FormatFileInfo(want)) } fi, err := fileEnt.Info() if err != nil { t.Fatal(err) } if fi != got { t.Error("failed to return or cache FileInfo") } de := fileEnt.(fastwalk.DirEntry) fi, err = de.Stat() if err != nil { t.Fatal(err) } if fi != got { t.Error("failed to use cached Info result for non-symlink") } }) t.Run("Parallel", func(t *testing.T) { testParallel := func(t *testing.T, de fs.DirEntry, fn func() (fs.FileInfo, error)) { numCPU := runtime.NumCPU() infos := make([][]fs.FileInfo, numCPU) for i := range infos { infos[i] = make([]fs.FileInfo, 100) } // Start all the goroutines at the same time to // maximise the chance of a race start := make(chan struct{}) var wg, ready sync.WaitGroup ready.Add(numCPU) wg.Add(numCPU) for i := 0; i < numCPU; i++ { go func(fis []fs.FileInfo, de fs.DirEntry) { defer wg.Done() ready.Done() <-start for i := range fis { fis[i], _ = de.Info() } }(infos[i], de) } ready.Wait() close(start) // start all goroutines at once wg.Wait() first := infos[0][0] if first == nil { t.Fatal("failed to stat file:", de.Name()) } for _, fis := range infos { for _, fi := range fis { if fi != first { t.Errorf("Expected the same fs.FileInfo to always "+ "be returned got: %#v want: %#v", fi, first) } } } } t.Run("File", func(t *testing.T) { t.Run("Stat", func(t *testing.T) { _, fileEnt := getDirEnts(t) de := fileEnt.(fastwalk.DirEntry) testParallel(t, de, de.Stat) }) t.Run("Lstat", func(t *testing.T) { _, fileEnt := getDirEnts(t) de := fileEnt.(fastwalk.DirEntry) testParallel(t, de, de.Info) }) }) t.Run("Link", func(t *testing.T) { t.Run("Stat", func(t *testing.T) { linkEnt, _ := getDirEnts(t) de := linkEnt.(fastwalk.DirEntry) testParallel(t, de, de.Stat) }) t.Run("Lstat", func(t *testing.T) { linkEnt, _ := getDirEnts(t) de := linkEnt.(fastwalk.DirEntry) testParallel(t, de, de.Info) }) }) }) } fastwalk-1.0.9/dirent_unix.go000066400000000000000000000056231470672632200162330ustar00rootroot00000000000000//go:build darwin || aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris package fastwalk import ( "io/fs" "os" "sort" "sync" "github.com/charlievieth/fastwalk/internal/fmtdirent" ) type unixDirent struct { parent string name string typ fs.FileMode info *fileInfo stat *fileInfo } func (d *unixDirent) Name() string { return d.name } func (d *unixDirent) IsDir() bool { return d.typ.IsDir() } func (d *unixDirent) Type() fs.FileMode { return d.typ } func (d *unixDirent) String() string { return fmtdirent.FormatDirEntry(d) } func (d *unixDirent) Info() (fs.FileInfo, error) { info := loadFileInfo(&d.info) info.once.Do(func() { info.FileInfo, info.err = os.Lstat(d.parent + "/" + d.name) }) return info.FileInfo, info.err } func (d *unixDirent) Stat() (fs.FileInfo, error) { if d.typ&os.ModeSymlink == 0 { return d.Info() } stat := loadFileInfo(&d.stat) stat.once.Do(func() { stat.FileInfo, stat.err = os.Stat(d.parent + "/" + d.name) }) return stat.FileInfo, stat.err } func newUnixDirent(parent, name string, typ fs.FileMode) *unixDirent { return &unixDirent{ parent: parent, name: name, typ: typ, } } func fileInfoToDirEntry(dirname string, fi fs.FileInfo) DirEntry { info := &fileInfo{ FileInfo: fi, } info.once.Do(func() {}) return &unixDirent{ parent: dirname, name: fi.Name(), typ: fi.Mode().Type(), info: info, } } var direntSlicePool = sync.Pool{ New: func() any { a := make([]*unixDirent, 0, 32) return &a }, } func putDirentSlice(p *[]*unixDirent) { if p != nil && cap(*p) <= 32*1024 /* 256Kb */ { a := *p for i := range a { a[i] = nil } *p = a[:0] direntSlicePool.Put(p) } } func sortDirents(mode SortMode, dents []*unixDirent) { if len(dents) <= 1 { return } switch mode { case SortLexical: sort.Slice(dents, func(i, j int) bool { return dents[i].name < dents[j].name }) case SortFilesFirst: sort.Slice(dents, func(i, j int) bool { d1 := dents[i] d2 := dents[j] r1 := d1.typ.IsRegular() r2 := d2.typ.IsRegular() switch { case r1 && !r2: return true case !r1 && r2: return false case !r1 && !r2: // Both are not regular files: sort directories last dd1 := d1.typ.IsDir() dd2 := d2.typ.IsDir() switch { case !dd1 && dd2: return true case dd1 && !dd2: return false } } return d1.name < d2.name }) case SortDirsFirst: sort.Slice(dents, func(i, j int) bool { d1 := dents[i] d2 := dents[j] dd1 := d1.typ.IsDir() dd2 := d2.typ.IsDir() switch { case dd1 && !dd2: return true case !dd1 && dd2: return false case !dd1 && !dd2: // Both are not directories: sort regular files first r1 := d1.typ.IsRegular() r2 := d2.typ.IsRegular() switch { case r1 && !r2: return true case !r1 && r2: return false } } return d1.name < d2.name }) } } fastwalk-1.0.9/dirent_unix_test.go000066400000000000000000000150031470672632200172630ustar00rootroot00000000000000//go:build darwin || aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris package fastwalk import ( "io/fs" "math/rand" "os" "path/filepath" "reflect" "runtime" "sync" "sync/atomic" "testing" "time" "unsafe" ) func testUnixDirentParallel(t *testing.T, ent *unixDirent, want fs.FileInfo, fn func(*unixDirent) (fs.FileInfo, error)) { sameFile := func(fi1, fi2 fs.FileInfo) bool { return fi1.Name() == fi2.Name() && fi1.Size() == fi2.Size() && fi1.Mode() == fi2.Mode() && fi1.ModTime() == fi2.ModTime() && fi1.IsDir() == fi2.IsDir() && os.SameFile(fi1, fi2) } numCPU := runtime.NumCPU() if numCPU < 4 { numCPU = 4 } if numCPU > 16 { numCPU = 16 } var wg sync.WaitGroup start := make(chan struct{}) var mu sync.Mutex infos := make(map[*fileInfo]int) stats := make(map[*fileInfo]int) for i := 0; i < numCPU; i++ { wg.Add(1) go func() { defer wg.Done() <-start for i := 0; i < 16; i++ { fi, err := fn(ent) if err != nil { t.Error(err) return } info := (*fileInfo)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&ent.info)))) stat := (*fileInfo)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&ent.stat)))) mu.Lock() infos[info]++ stats[stat]++ mu.Unlock() if !sameFile(fi, want) { t.Errorf("FileInfo not equal:\nwant: %s\ngot: %s\n", FormatFileInfo(want), FormatFileInfo(fi)) return } } }() } close(start) wg.Wait() t.Logf("Infos: %d Stats: %d\n", len(infos), len(stats)) } func TestUnixDirent(t *testing.T) { tempdir := t.TempDir() fileName := filepath.Join(tempdir, "file.txt") if err := os.WriteFile(fileName, []byte("file.txt"), 0644); err != nil { t.Fatal(err) } t.Run("File", func(t *testing.T) { fileInfo, err := os.Lstat(fileName) if err != nil { t.Fatal(err) } t.Run("Stat", func(t *testing.T) { ent := newUnixDirent(tempdir, filepath.Base(fileName), fileInfo.Mode().Type()) testUnixDirentParallel(t, ent, fileInfo, (*unixDirent).Stat) }) t.Run("Info", func(t *testing.T) { ent := newUnixDirent(tempdir, filepath.Base(fileName), fileInfo.Mode().Type()) testUnixDirentParallel(t, ent, fileInfo, (*unixDirent).Info) }) }) t.Run("Link", func(t *testing.T) { linkName := filepath.Join(tempdir, "link.link") if err := os.Symlink(filepath.Base(fileName), linkName); err != nil { t.Fatal(err) } fileInfo, err := os.Lstat(linkName) if err != nil { t.Fatal(err) } t.Run("Stat", func(t *testing.T) { want, err := os.Stat(linkName) if err != nil { t.Fatal(err) } ent := newUnixDirent(tempdir, filepath.Base(linkName), fileInfo.Mode().Type()) testUnixDirentParallel(t, ent, want, (*unixDirent).Stat) }) t.Run("Info", func(t *testing.T) { ent := newUnixDirent(tempdir, filepath.Base(linkName), fileInfo.Mode().Type()) testUnixDirentParallel(t, ent, fileInfo, (*unixDirent).Info) }) }) } // NB: this must be kept in sync with the // TestSortDirents in dirent_portable_test.go func TestSortDirents(t *testing.T) { direntNames := func(dents []*unixDirent) []string { names := make([]string, len(dents)) for i, d := range dents { names[i] = d.Name() } return names } t.Run("None", func(t *testing.T) { dents := []*unixDirent{ {name: "b"}, {name: "a"}, {name: "d"}, {name: "c"}, } want := direntNames(dents) sortDirents(SortNone, dents) got := direntNames(dents) if !reflect.DeepEqual(got, want) { t.Errorf("got: %q want: %q", got, want) } }) rr := rand.New(rand.NewSource(time.Now().UnixNano())) shuffleDirents := func(dents []*unixDirent) []*unixDirent { rr.Shuffle(len(dents), func(i, j int) { dents[i], dents[j] = dents[j], dents[i] }) return dents } // dents needs to be in the expected order test := func(t *testing.T, dents []*unixDirent, mode SortMode) { want := direntNames(dents) // Run multiple times with different shuffles for i := 0; i < 10; i++ { t.Run("", func(t *testing.T) { sortDirents(mode, shuffleDirents(dents)) got := direntNames(dents) if !reflect.DeepEqual(got, want) { t.Errorf("got: %q want: %q", got, want) } }) } } t.Run("Lexical", func(t *testing.T) { dents := []*unixDirent{ {name: "a"}, {name: "b"}, {name: "c"}, {name: "d"}, } test(t, dents, SortLexical) }) t.Run("FilesFirst", func(t *testing.T) { dents := []*unixDirent{ // Files lexically {name: "f1", typ: 0}, {name: "f2", typ: 0}, {name: "f3", typ: 0}, // Non-dirs lexically {name: "a1", typ: fs.ModeSymlink}, {name: "a2", typ: fs.ModeSymlink}, {name: "a3", typ: fs.ModeSymlink}, {name: "s1", typ: fs.ModeSocket}, {name: "s2", typ: fs.ModeSocket}, {name: "s3", typ: fs.ModeSocket}, // Dirs lexically {name: "d1", typ: fs.ModeDir}, {name: "d2", typ: fs.ModeDir}, {name: "d3", typ: fs.ModeDir}, } test(t, dents, SortFilesFirst) }) t.Run("DirsFirst", func(t *testing.T) { dents := []*unixDirent{ // Dirs lexically {name: "d1", typ: fs.ModeDir}, {name: "d2", typ: fs.ModeDir}, {name: "d3", typ: fs.ModeDir}, // Files lexically {name: "f1", typ: 0}, {name: "f2", typ: 0}, {name: "f3", typ: 0}, // Non-dirs lexically {name: "a1", typ: fs.ModeSymlink}, {name: "a2", typ: fs.ModeSymlink}, {name: "a3", typ: fs.ModeSymlink}, {name: "s1", typ: fs.ModeSocket}, {name: "s2", typ: fs.ModeSocket}, {name: "s3", typ: fs.ModeSocket}, } test(t, dents, SortDirsFirst) }) } func BenchmarkUnixDirentLoadFileInfo(b *testing.B) { wd, err := os.Getwd() if err != nil { b.Fatal(err) } fi, err := os.Lstat(wd) if err != nil { b.Fatal(err) } parent, name := filepath.Split(wd) d := newUnixDirent(parent, name, fi.Mode().Type()) for i := 0; i < b.N; i++ { loadFileInfo(&d.info) d.info = nil } } func BenchmarkUnixDirentInfo(b *testing.B) { wd, err := os.Getwd() if err != nil { b.Fatal(err) } fi, err := os.Lstat(wd) if err != nil { b.Fatal(err) } parent, name := filepath.Split(wd) d := newUnixDirent(parent, name, fi.Mode().Type()) for i := 0; i < b.N; i++ { fi, err := d.Info() if err != nil { b.Fatal(err) } if fi == nil { b.Fatal("Nil FileInfo") } } } func BenchmarkUnixDirentStat(b *testing.B) { wd, err := os.Getwd() if err != nil { b.Fatal(err) } fi, err := os.Lstat(wd) if err != nil { b.Fatal(err) } parent, name := filepath.Split(wd) d := newUnixDirent(parent, name, fi.Mode().Type()) for i := 0; i < b.N; i++ { fi, err := d.Stat() if err != nil { b.Fatal(err) } if fi == nil { b.Fatal("Nil FileInfo") } } } fastwalk-1.0.9/entry_filter_portable.go000066400000000000000000000013451470672632200202760ustar00rootroot00000000000000//go:build !darwin && !windows && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris) package fastwalk import ( "io/fs" "path/filepath" "sync" ) type EntryFilter struct { // we assume most files have not been seen so // no need for a RWMutex mu sync.Mutex seen map[string]struct{} } func (e *EntryFilter) Entry(path string, _ fs.DirEntry) bool { name, err := filepath.EvalSymlinks(path) if err != nil { return false } e.mu.Lock() if e.seen == nil { e.seen = make(map[string]struct{}, 128) } _, ok := e.seen[name] if !ok { e.seen[name] = struct{}{} } e.mu.Unlock() return ok } func NewEntryFilter() *EntryFilter { return &EntryFilter{seen: make(map[string]struct{}, 128)} } fastwalk-1.0.9/entry_filter_test.go000066400000000000000000000066241470672632200174520ustar00rootroot00000000000000package fastwalk_test import ( "fmt" "io/fs" "math/rand" "os" "path/filepath" "runtime" "sync" "testing" "time" "github.com/charlievieth/fastwalk" ) func TestEntryFilter(t *testing.T) { tempdir := t.TempDir() files := map[string]string{ "foo/foo.go": "one", "bar/bar.go": "LINK:../foo/foo.go", "bar/baz.go": "two", "bar/loop": "LINK:../bar/", // symlink loop "file.go": "three", // Use multiple symdirs to increase the chance that one // of these and not "foo" is followed first. "symdir1": "LINK:foo", "symdir2": "LINK:foo", "symdir3": "LINK:foo", "symdir4": "LINK:foo", } testCreateFiles(t, tempdir, files) var mu sync.Mutex var seen []os.FileInfo filter := fastwalk.NewEntryFilter() walkFn := fastwalk.IgnoreDuplicateFiles(func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) fi1, err := fastwalk.StatDirEntry(path, de) if err != nil { t.Error(err) return err } mu.Lock() defer mu.Unlock() if !filter.Entry(path, de) { for _, fi2 := range seen { if os.SameFile(fi1, fi2) { t.Errorf("Visited file twice: %q (%s) and %q (%s)", path, fi1.Mode(), fi2.Name(), fi2.Mode()) } } } seen = append(seen, fi1) return nil }) if err := fastwalk.Walk(nil, tempdir, walkFn); err != nil { t.Fatal(err) } // Test that true is returned for a non-existent directory // On Windows the Info field of the returned DirEntry // is already populated so this will succeed. if runtime.GOOS != "windows" { path := filepath.Join(tempdir, "src", "foo/foo.go") fi, err := os.Lstat(path) if err != nil { t.Fatal(err) } if err := os.Remove(path); err != nil { t.Fatal(err) } if !filter.Entry(path, fs.FileInfoToDirEntry(fi)) { t.Error("EntryFilter should return true when the file does not exist") } } } func BenchmarkEntryFilter(b *testing.B) { tempdir := b.TempDir() names := make([]string, 0, 2048) for i := 0; i < 1024; i++ { name := filepath.Join(tempdir, fmt.Sprintf("dir_%04d", i)) if err := os.Mkdir(name, 0755); err != nil { b.Fatal(err) } names = append(names, name) } for i := 0; i < 1024; i++ { name := filepath.Join(tempdir, fmt.Sprintf("file_%04d", i)) if err := writeFile(name, filepath.Base(name), 0644); err != nil { b.Fatal(err) } names = append(names, name) } rr := rand.New(rand.NewSource(time.Now().UnixNano())) rr.Shuffle(len(names), func(i, j int) { names[i], names[j] = names[j], names[i] }) type fileInfo struct { Name string Info fs.DirEntry } infos := make([]fileInfo, len(names)) for i, name := range names { fi, err := os.Lstat(name) if err != nil { b.Fatal(err) } infos[i] = fileInfo{name, fs.FileInfoToDirEntry(fi)} } b.ResetTimer() b.Run("MostlyHits", func(b *testing.B) { filter := fastwalk.NewEntryFilter() for i := 0; i < b.N; i++ { x := infos[i%len(infos)] filter.Entry(x.Name, x.Info) } }) b.Run("MostlyHitsParallel", func(b *testing.B) { filter := fastwalk.NewEntryFilter() b.RunParallel(func(pb *testing.PB) { i := 0 for pb.Next() { x := infos[i%len(infos)] filter.Entry(x.Name, x.Info) i++ } }) }) b.Run("HalfMisses", func(b *testing.B) { filter := fastwalk.NewEntryFilter() n := len(infos) for i := 0; i < b.N; i++ { x := infos[i%len(infos)] filter.Entry(x.Name, x.Info) if i != 0 && i%n == 0 { b.StopTimer() filter = fastwalk.NewEntryFilter() b.StartTimer() } } }) } fastwalk-1.0.9/entry_filter_unix.go000066400000000000000000000027711470672632200174550ustar00rootroot00000000000000//go:build darwin || aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris package fastwalk import ( "io/fs" "sync" "syscall" ) type fileKey struct { Dev uint64 Ino uint64 } type entryMap struct { mu sync.Mutex keys map[fileKey]struct{} } // An EntryFilter keeps track of visited directory entries and can be used to // detect and avoid symlink loops or processing the same file twice. type EntryFilter struct { // Use an array of 8 to reduce lock contention. The entryMap is // picked via the inode number. We don't take the device number // into account because: we don't expect to see many of them and // uniformly spreading the load isn't terribly beneficial here. ents [8]entryMap } // NewEntryFilter returns a new EntryFilter func NewEntryFilter() *EntryFilter { return new(EntryFilter) } func (e *EntryFilter) seen(dev, ino uint64) (seen bool) { m := &e.ents[ino%uint64(len(e.ents))] m.mu.Lock() if _, seen = m.keys[fileKey{dev, ino}]; !seen { if m.keys == nil { m.keys = make(map[fileKey]struct{}) } m.keys[fileKey{dev, ino}] = struct{}{} } m.mu.Unlock() return seen } // TODO: this name is confusing and should be fixed // Entry returns if path and [fs.DirEntry] have been seen before. func (e *EntryFilter) Entry(path string, de fs.DirEntry) (seen bool) { fi, err := StatDirEntry(path, de) if err != nil { return true // treat errors as duplicate files } stat := fi.Sys().(*syscall.Stat_t) return e.seen(uint64(stat.Dev), uint64(stat.Ino)) } fastwalk-1.0.9/entry_filter_unix_test.go000066400000000000000000000052511470672632200205100ustar00rootroot00000000000000//go:build darwin || aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris package fastwalk import ( "math/rand" "runtime" "sync" "testing" "time" ) type devIno struct { Dev, Ino uint64 } func generateDevIno(rr *rand.Rand, ndev, size int) []devIno { devs := make([]uint64, ndev) for i := range devs { devs[i] = rr.Uint64() } pairs := make([]devIno, size) seen := make(map[devIno]struct{}, len(pairs)) for i := range pairs { for { di := devIno{ Dev: devs[rr.Intn(len(devs))], Ino: rr.Uint64(), } if _, ok := seen[di]; !ok { pairs[i] = di seen[di] = struct{}{} break } } } rr.Shuffle(len(pairs), func(i, j int) { pairs[i], pairs[j] = pairs[j], pairs[i] }) return pairs } func TestEntryFilter_Unix(t *testing.T) { rr := rand.New(rand.NewSource(1)) pairs := generateDevIno(rr, 2, 100) x := NewEntryFilter() for _, p := range pairs { if x.seen(p.Dev, p.Ino) { t.Errorf("duplicate: Dev: %d Ino: %d", p.Dev, p.Ino) } } for _, p := range pairs { if !x.seen(p.Dev, p.Ino) { t.Errorf("wat: Dev: %d Ino: %d", p.Dev, p.Ino) } } } func TestEntryFilter_Unix_Parallel(t *testing.T) { if testing.Short() { t.Skip("Short test") } wg := new(sync.WaitGroup) ready := new(sync.WaitGroup) start := make(chan struct{}) x := NewEntryFilter() numWorkers := runtime.NumCPU() * 2 if numWorkers < 2 { numWorkers = 2 } if numWorkers > 8 { numWorkers = 8 } rr := rand.New(rand.NewSource(time.Now().UnixNano())) pairs := generateDevIno(rr, 2, numWorkers*8192) for i := 0; i < numWorkers; i++ { wg.Add(1) ready.Add(1) go func(i int, pairs []devIno) { defer wg.Done() ready.Done() <-start for _, p := range pairs { if x.seen(p.Dev, p.Ino) { t.Errorf("%d: unseen dev/ino: Dev: %d Ino: %d", i, p.Dev, p.Ino) return } } for _, p := range pairs { if !x.seen(p.Dev, p.Ino) { t.Errorf("%d: missed seen dev/ino: Dev: %d Ino: %d", i, p.Dev, p.Ino) return } } }(i, pairs[i*numWorkers:(i+1)*numWorkers]) } ready.Wait() close(start) wg.Wait() } func BenchmarkEntryFilter_Unix(b *testing.B) { if testing.Short() { b.Skip("Skipping: short test") } rr := rand.New(rand.NewSource(1)) pairs := generateDevIno(rr, 2, 8192) x := NewEntryFilter() for _, p := range pairs { x.seen(p.Dev, p.Ino) } if len(pairs) != 8192 { panic("nope!") } b.ResetTimer() b.Run("Sequential", func(b *testing.B) { for i := 0; i < b.N; i++ { p := pairs[i%8192] x.seen(p.Dev, p.Ino) } }) b.Run("Parallel", func(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for i := 0; pb.Next(); i++ { p := pairs[i%8192] x.seen(p.Dev, p.Ino) } }) }) } fastwalk-1.0.9/entry_filter_windows.go000066400000000000000000000077521470672632200201700ustar00rootroot00000000000000//go:build windows package fastwalk import ( "io/fs" "os" "path/filepath" "sync" "syscall" ) type fileKey struct { VolumeSerialNumber uint32 FileIndexHigh uint32 FileIndexLow uint32 } type EntryFilter struct { mu sync.Mutex seen map[fileKey]struct{} } func NewEntryFilter() *EntryFilter { return &EntryFilter{seen: make(map[fileKey]struct{}, 128)} } func (e *EntryFilter) Entry(path string, _ fs.DirEntry) bool { namep, err := syscall.UTF16PtrFromString(fixLongPath(path)) if err != nil { return false } h, err := syscall.CreateFile(namep, 0, 0, nil, syscall.OPEN_EXISTING, syscall.FILE_FLAG_BACKUP_SEMANTICS, 0) if err != nil { return false } var d syscall.ByHandleFileInformation err = syscall.GetFileInformationByHandle(h, &d) syscall.CloseHandle(h) if err != nil { return false } key := fileKey{ VolumeSerialNumber: d.VolumeSerialNumber, FileIndexHigh: d.FileIndexHigh, FileIndexLow: d.FileIndexLow, } e.mu.Lock() if e.seen == nil { e.seen = make(map[fileKey]struct{}) } _, ok := e.seen[key] if !ok { e.seen[key] = struct{}{} } e.mu.Unlock() return ok } func isAbs(path string) (b bool) { v := filepath.VolumeName(path) if v == "" { return false } path = path[len(v):] if path == "" { return false } return os.IsPathSeparator(path[0]) } // fixLongPath returns the extended-length (\\?\-prefixed) form of // path when needed, in order to avoid the default 260 character file // path limit imposed by Windows. If path is not easily converted to // the extended-length form (for example, if path is a relative path // or contains .. elements), or is short enough, fixLongPath returns // path unmodified. // // See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath func fixLongPath(path string) string { // Do nothing (and don't allocate) if the path is "short". // Empirically (at least on the Windows Server 2013 builder), // the kernel is arbitrarily okay with < 248 bytes. That // matches what the docs above say: // "When using an API to create a directory, the specified // path cannot be so long that you cannot append an 8.3 file // name (that is, the directory name cannot exceed MAX_PATH // minus 12)." Since MAX_PATH is 260, 260 - 12 = 248. // // The MSDN docs appear to say that a normal path that is 248 bytes long // will work; empirically the path must be less then 248 bytes long. if len(path) < 248 { // Don't fix. (This is how Go 1.7 and earlier worked, // not automatically generating the \\?\ form) return path } // The extended form begins with \\?\, as in // \\?\c:\windows\foo.txt or \\?\UNC\server\share\foo.txt. // The extended form disables evaluation of . and .. path // elements and disables the interpretation of / as equivalent // to \. The conversion here rewrites / to \ and elides // . elements as well as trailing or duplicate separators. For // simplicity it avoids the conversion entirely for relative // paths or paths containing .. elements. For now, // \\server\share paths are not converted to // \\?\UNC\server\share paths because the rules for doing so // are less well-specified. if len(path) >= 2 && path[:2] == `\\` { // Don't canonicalize UNC paths. return path } if !isAbs(path) { // Relative path return path } const prefix = `\\?` pathbuf := make([]byte, len(prefix)+len(path)+len(`\`)) copy(pathbuf, prefix) n := len(path) r, w := 0, len(prefix) for r < n { switch { case os.IsPathSeparator(path[r]): // empty block r++ case path[r] == '.' && (r+1 == n || os.IsPathSeparator(path[r+1])): // /./ r++ case r+1 < n && path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])): // /../ is currently unhandled return path default: pathbuf[w] = '\\' w++ for ; r < n && !os.IsPathSeparator(path[r]); r++ { pathbuf[w] = path[r] w++ } } } // A drive's root directory needs a trailing \ if w == len(`\\?\c:`) { pathbuf[w] = '\\' w++ } return string(pathbuf[:w]) } fastwalk-1.0.9/entry_filter_windows_test.go000066400000000000000000000045761470672632200212300ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package fastwalk import ( "io/fs" "os" "path/filepath" "strconv" "strings" "testing" ) func TestFixLongPath(t *testing.T) { // 248 is long enough to trigger the longer-than-248 checks in // fixLongPath, but short enough not to make a path component // longer than 255, which is illegal on Windows. (which // doesn't really matter anyway, since this is purely a string // function we're testing, and it's not actually being used to // do a system call) veryLong := "l" + strings.Repeat("o", 248) + "ng" for _, test := range []struct{ in, want string }{ // Short; unchanged: {`C:\short.txt`, `C:\short.txt`}, {`C:\`, `C:\`}, {`C:`, `C:`}, // The "long" substring is replaced by a looooooong // string which triggers the rewriting. Except in the // cases below where it doesn't. {`C:\long\foo.txt`, `\\?\C:\long\foo.txt`}, {`C:/long/foo.txt`, `\\?\C:\long\foo.txt`}, {`C:\long\foo\\bar\.\baz\\`, `\\?\C:\long\foo\bar\baz`}, {`\\unc\path`, `\\unc\path`}, {`long.txt`, `long.txt`}, {`C:long.txt`, `C:long.txt`}, {`c:\long\..\bar\baz`, `c:\long\..\bar\baz`}, {`\\?\c:\long\foo.txt`, `\\?\c:\long\foo.txt`}, {`\\?\c:\long/foo.txt`, `\\?\c:\long/foo.txt`}, } { in := strings.ReplaceAll(test.in, "long", veryLong) want := strings.ReplaceAll(test.want, "long", veryLong) if got := fixLongPath(in); got != want { got = strings.ReplaceAll(got, veryLong, "long") t.Errorf("fixLongPath(%q) = %q; want %q", test.in, got, test.want) } } } func TestEntryFilterLongPath(t *testing.T) { tempdir := t.TempDir() veryLong := "l" + strings.Repeat("o", 248) + "ng" var files []string for i := 0; i <= 9; i++ { dir := filepath.Join(tempdir, strconv.Itoa(i)) if err := os.Mkdir(dir, 0755); err != nil { t.Fatal(err) } name := filepath.Join(dir, veryLong) if err := os.WriteFile(name, []byte(strconv.Itoa(i)), 0644); err != nil { t.Fatal(err) } files = append(files, dir, name) } filter := NewEntryFilter() for _, name := range files { fi, err := os.Lstat(name) if err != nil { t.Fatal(err) } for _, want := range []bool{false, true} { got := filter.Entry(name, fs.FileInfoToDirEntry(fi)) if got != want { t.Errorf("filepath.Entry(%q) = %t want: %t", name, got, want) } } } } fastwalk-1.0.9/examples/000077500000000000000000000000001470672632200151645ustar00rootroot00000000000000fastwalk-1.0.9/examples/fwfind/000077500000000000000000000000001470672632200164415ustar00rootroot00000000000000fastwalk-1.0.9/examples/fwfind/.gitignore000066400000000000000000000000161470672632200204260ustar00rootroot00000000000000/fwfind *.exe fastwalk-1.0.9/examples/fwfind/main.go000066400000000000000000000026021470672632200177140ustar00rootroot00000000000000// fwfind is a an example program that is similar to POSIX find, // but faster and worse (it's an example). package main import ( "flag" "fmt" "io/fs" "os" "path/filepath" "github.com/charlievieth/fastwalk" ) const usageMsg = `Usage: %[1]s [-L] [-name] [PATH...]: %[1]s is a poor replacement for the POSIX find utility ` func main() { flag.Usage = func() { fmt.Fprintf(os.Stdout, usageMsg, filepath.Base(os.Args[0])) flag.PrintDefaults() } pattern := flag.String("name", "", "Pattern to match file names against.") followLinks := flag.Bool("L", false, "Follow symbolic links") flag.Parse() // If no paths are provided default to the current directory: "." args := flag.Args() if len(args) == 0 { args = append(args, ".") } // Follow links if the "-L" flag is provided conf := fastwalk.Config{ Follow: *followLinks, } walkFn := func(path string, d fs.DirEntry, err error) error { if err != nil { fmt.Fprintf(os.Stderr, "%s: %v\n", path, err) return nil // returning the error stops iteration } if *pattern != "" { if ok, err := filepath.Match(*pattern, d.Name()); !ok { // invalid pattern (err != nil) or name does not match return err } } _, err = fmt.Println(path) return err } for _, root := range args { if err := fastwalk.Walk(&conf, root, walkFn); err != nil { fmt.Fprintf(os.Stderr, "%s: %v\n", root, err) os.Exit(1) } } } fastwalk-1.0.9/examples/fwwc/000077500000000000000000000000001470672632200161325ustar00rootroot00000000000000fastwalk-1.0.9/examples/fwwc/.gitignore000066400000000000000000000000141470672632200201150ustar00rootroot00000000000000/fwwc *.exe fastwalk-1.0.9/examples/fwwc/main.go000066400000000000000000000064061470672632200174130ustar00rootroot00000000000000// fwwc is a an example program that recursively walks directories and // prints the number of lines in each file it encounters. package main import ( "bytes" "flag" "fmt" "io" "io/fs" "os" "path/filepath" "github.com/charlievieth/fastwalk" ) var newLine = []byte{'\n'} // countLinesInFile returns the number of newlines ('\n') in file name. func countLinesInFile(name string) (int64, error) { f, err := os.Open(name) if err != nil { return 0, err } defer f.Close() buf := make([]byte, 16*1024) var lines int64 for { n, e := f.Read(buf) if n > 0 { lines += int64(bytes.Count(buf[:n], newLine)) } if e != nil { if e != io.EOF { err = e } break } } return lines, err } func LineCount(root string, followLinks bool) error { countLinesWalkFn := func(path string, d fs.DirEntry, err error) error { // We wrap this with fastwalk.IgnorePermissionErrors so we know the // error is not a permission error (common when walking outside a users // home directory) and is likely something worse so we should return it // and abort the walk. // // A common error here is "too many open files", which can occur if the // walkFn opens, but does not close, files. if err != nil { return err } // If the entry is a symbolic link get the type of file that // it references. typ := d.Type() if typ&fs.ModeSymlink != 0 { if fi, err := fastwalk.StatDirEntry(path, d); err == nil { typ = fi.Mode().Type() } } // Skip dot (".") files (but allow "." / PWD as the path) if path != "." && typ.IsDir() { name := d.Name() if name == "" || name[0] == '.' || name[0] == '_' { return fastwalk.SkipDir } return nil } if typ.IsRegular() { lines, err := countLinesInFile(path) if err == nil { fmt.Printf("%8d %s\n", lines, path) } else { // Print but do not return the error. fmt.Fprintf(os.Stderr, "%s: %s\n", path, err) } } return nil } // Ignore permission errors traversing directories. // // Note: this only ignores permission errors when traversing directories. // Permission errors may still be encountered when accessing files. walkFn := fastwalk.IgnorePermissionErrors(countLinesWalkFn) conf := fastwalk.Config{ // Safely follow symbolic links. This can also be achieved by // wrapping walkFn with fastwalk.FollowSymlinks(). Follow: followLinks, // If NumWorkers is ≤ 0 the default is used, which is sufficient // for most use cases. } // Note: Walk can also be called with a nil Config, in which case // fastwalk.DefaultConfig is used. if err := fastwalk.Walk(&conf, root, walkFn); err != nil { return fmt.Errorf("walking directory %s: %w", root, err) } return nil } const UsageMsg = `Usage: %[1]s [-L] [PATH...]: %[1]s prints the number of lines in each file it finds, ignoring directories that start with '.' or '_'. ` func main() { flag.Usage = func() { fmt.Fprintf(os.Stdout, UsageMsg, filepath.Base(os.Args[0])) flag.PrintDefaults() } followLinks := flag.Bool("L", false, "Follow symbolic links") flag.Parse() args := flag.Args() if len(args) == 0 { args = append(args, ".") } for _, root := range args { // fmt.Println("ROOT:", root) if err := LineCount(root, *followLinks); err != nil { fmt.Fprintln(os.Stderr, "error:", err) os.Exit(1) } } } fastwalk-1.0.9/fastwalk.go000066400000000000000000000541631470672632200155220ustar00rootroot00000000000000// Package fastwalk provides a faster version of [filepath.WalkDir] for file // system scanning tools. package fastwalk /* * This code borrows heavily from golang.org/x/tools/internal/fastwalk * and as such the Go license can be found in the go.LICENSE file and * is reproduced below: * * Copyright (c) 2009 The Go Authors. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following disclaimer * in the documentation and/or other materials provided with the * distribution. * * Neither the name of Google Inc. nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ import ( "errors" "io/fs" "os" "path/filepath" "runtime" "sync" ) // ErrTraverseLink is used as a return value from WalkDirFuncs to indicate that // the symlink named in the call may be traversed. This error is ignored if // the Follow [Config] option is true. var ErrTraverseLink = errors.New("fastwalk: traverse symlink, assuming target is a directory") // ErrSkipFiles is a used as a return value from WalkFuncs to indicate that the // callback should not be called for any other files in the current directory. // Child directories will still be traversed. var ErrSkipFiles = errors.New("fastwalk: skip remaining files in directory") // SkipDir is used as a return value from WalkDirFuncs to indicate that // the directory named in the call is to be skipped. It is not returned // as an error by any function. var SkipDir = fs.SkipDir // TODO(charlie): Look into implementing the fs.SkipAll behavior of // filepath.Walk and filepath.WalkDir. This may not be possible without taking // a performance hit. // DefaultNumWorkers returns the default number of worker goroutines to use in // [Walk] and is the value of [runtime.GOMAXPROCS](-1) clamped to a range // of 4 to 32 except on Darwin where it is either 4 (8 cores or less) or 6 // (more than 8 cores). This is because Walk / IO performance on Darwin // degrades with more concurrency. // // The optimal number for your workload may be lower or higher. The results // of BenchmarkFastWalkNumWorkers benchmark may be informative. func DefaultNumWorkers() int { numCPU := runtime.GOMAXPROCS(-1) if numCPU < 4 { return 4 } // Darwin IO performance on APFS slows with more workers. // Stat performance is best around 2-4 and file IO is best // around 4-6. More workers only benefit CPU intensive tasks. if runtime.GOOS == "darwin" { if numCPU <= 8 { return 4 } return 6 } if numCPU > 32 { return 32 } return numCPU } // DefaultToSlash returns true if this is a Go program compiled for Windows // running in an environment ([MSYS/MSYS2] or [Git for Windows]) that uses // forward slashes as the path separator instead of the native backslash. // // On non-Windows OSes this is a no-op and always returns false. // // To detect if we're running in [MSYS/MSYS2] we check if the "MSYSTEM" // environment variable exists. // // DefaultToSlash does not detect if this is a Windows executable running in [WSL]. // Instead, users should (ideally) use programs compiled for Linux in WSL. // // See: [github.com/junegunn/fzf/issues/3859] // // NOTE: The reason that we do not check if we're running in WSL is that the // test was inconsistent since it depended on the working directory (it seems // that "/proc" cannot be accessed when programs are ran from a mounted Windows // directory) and what environment variables are shared between WSL and Win32 // (this requires explicit [configuration]). // // [MSYS/MSYS2]: https://www.msys2.org/ // [WSL]: https://learn.microsoft.com/en-us/windows/wsl/about // [Git for Windows]: https://gitforwindows.org/ // [github.com/junegunn/fzf/issues/3859]: https://github.com/junegunn/fzf/issues/3859 // [configuration]: https://devblogs.microsoft.com/commandline/share-environment-vars-between-wsl-and-windows/ func DefaultToSlash() bool { if runtime.GOOS != "windows" { return false } // Previously this function attempted to determine if this is a Windows exe // running in WSL. The check was: // // * File /proc/sys/fs/binfmt_misc/WSLInterop exist // * Env var "WSL_DISTRO_NAME" exits // * /proc/version contains "Microsoft" or "microsoft" // // Below are my notes explaining why that check was flaky: // // NOTE: This appears to fail when ran from WSL when the current working // directory is a Windows directory that is mounted ("/mnt/c/...") since // "/proc" is not accessible. It works if ran from a directory that is not // mounted. Additionally, the "WSL_DISTRO_NAME" environment variable is not // set when ran from WSL. // // I'm not sure what causes this, but it would be great to find a solution. // My guess is that when ran from a Windows directory it uses the native // Windows path syscalls (for example os.Getwd reports the canonical Windows // path when a Go exe is ran from a mounted directory in WSL, but reports the // WSL path when ran from outside a mounted Windows directory). // // That said, the real solution here is to use programs compiled for Linux // when running in WSL. _, ok := os.LookupEnv("MSYSTEM") return ok } // SortMode determines the order that a directory's entries are visited by // [Walk]. Sorting applies only at the directory level and since we process // directories in parallel the order in which all files are visited is still // non-deterministic. // // Sorting is mostly useful for programs that print the output of Walk since // it makes it slightly more ordered compared to the default directory order. // Sorting may also help some programs that wish to change the order in which // a directory is processed by either processing all files first or enqueuing // all directories before processing files. // // All lexical sorting is case-sensitive. // // The overhead of sorting is minimal compared to the syscalls needed to // walk directories. The impact on performance due to changing the order // in which directory entries are processed will be dependent on the workload // and the structure of the file tree being visited (it might also have no // impact). type SortMode uint32 const ( // Perform no sorting. Files will be visited in directory order. // This is the default. SortNone SortMode = iota // Directory entries are sorted by name before being visited. SortLexical // Sort the directory entries so that regular files and non-directories // (e.g. symbolic links) are visited before directories. Within each // group (regular files, other files, directories) the entries are sorted // by name. // // This is likely the mode that programs that print the output of Walk // want to use. Since by processing all files before enqueuing // sub-directories the output is slightly more grouped. // // Example order: // - file: "a.txt" // - file: "b.txt" // - link: "a.link" // - link: "b.link" // - dir: "d1/" // - dir: "d2/" // SortFilesFirst // Sort the directory entries so that directories are visited first, then // regular files are visited, and finally everything else is visited // (e.g. symbolic links). Within each group (directories, regular files, // other files) the entries are sorted by name. // // This mode is might be useful at preventing other walk goroutines from // stalling due to lack of work since it immediately enqueues all of a // directory's sub-directories for processing. The impact on performance // will be dependent on the workload and the structure of the file tree // being visited - it might also have no (or even a negative) impact on // performance so testing/benchmarking is recommend. // // An example workload that might cause this is: processing one directory // takes a long time, that directory has sub-directories we want to walk, // while processing that directory all other Walk goroutines have finished // processing their directories, those goroutines are now stalled waiting // for more work (waiting on the one running goroutine to enqueue its // sub-directories for processing). // // This might also be beneficial if processing files is expensive. // // Example order: // - dir: "d1/" // - dir: "d2/" // - file: "a.txt" // - file: "b.txt" // - link: "a.link" // - link: "b.link" // SortDirsFirst ) var sortModeStrs = [...]string{ SortNone: "None", SortLexical: "Lexical", SortDirsFirst: "DirsFirst", SortFilesFirst: "FilesFirst", } func (s SortMode) String() string { if 0 <= int(s) && int(s) < len(sortModeStrs) { return sortModeStrs[s] } return "SortMode(" + itoa(uint64(s)) + ")" } // DefaultConfig is the default [Config] used when none is supplied. var DefaultConfig = Config{ Follow: false, ToSlash: DefaultToSlash(), NumWorkers: DefaultNumWorkers(), Sort: SortNone, } // A Config controls the behavior of [Walk]. type Config struct { // TODO: do we want to pass a sentinel error to WalkFunc if // a symlink loop is detected? // Follow symbolic links ignoring directories that would lead // to infinite loops; that is, entering a previously visited // directory that is an ancestor of the last file encountered. // // The sentinel error ErrTraverseLink is ignored when Follow // is true (this to prevent users from defeating the loop // detection logic), but SkipDir and ErrSkipFiles are still // respected. Follow bool // Join all paths using a forward slash "/" instead of the system // default (the root path will be converted with filepath.ToSlash). // This option exists for users on Windows Subsystem for Linux (WSL) // that are running a Windows executable (like FZF) in WSL and need // forward slashes for compatibility (since binary was compiled for // Windows the path separator will be "\" which can cause issues in // in a Unix shell). // // This option has no effect when the OS path separator is a // forward slash "/". // // See FZF issue: https://github.com/junegunn/fzf/issues/3859 ToSlash bool // Sort a directory's entries by SortMode before visiting them. // The order that files are visited is deterministic only at the directory // level, but not generally deterministic because we process directories // in parallel. The performance impact of sorting entries is generally // negligible compared to the syscalls required to read directories. // // This option mostly exists for programs that print the output of Walk // (like FZF) since it provides some order and thus makes the output much // nicer compared to the default directory order, which is basically random. Sort SortMode // Number of parallel workers to use. If NumWorkers if ≤ 0 then // DefaultNumWorkers is used. NumWorkers int } // Copy returns a copy of c. If c is nil an empty [Config] is returned. func (c *Config) Copy() *Config { dupe := new(Config) if c != nil { *dupe = *c } return dupe } // A DirEntry extends the [fs.DirEntry] interface to add a Stat() method // that returns the result of calling [os.Stat] on the underlying file. // The results of Info() and Stat() are cached. // // The [fs.DirEntry] argument passed to the [fs.WalkDirFunc] by [Walk] is // always a DirEntry. type DirEntry interface { fs.DirEntry // Stat returns the fs.FileInfo for the file or subdirectory described // by the entry. The returned FileInfo may be from the time of the // original directory read or from the time of the call to os.Stat. // If the entry denotes a symbolic link, Stat reports the information // about the target itself, not the link. Stat() (fs.FileInfo, error) } // Walk is a faster implementation of [filepath.WalkDir] that walks the file // tree rooted at root in parallel, calling walkFn for each file or directory // in the tree, including root. // // All errors that arise visiting files and directories are filtered by walkFn // see the [fs.WalkDirFunc] documentation for details. // The [IgnorePermissionErrors] adapter is provided to handle to common case of // ignoring [fs.ErrPermission] errors. // // By default files are walked in directory order, which makes the output // non-deterministic. The Sort [Config] option can be used to control the order // in which directory entries are visited, but since we walk the file tree in // parallel the output is still non-deterministic (it's just slightly more // sorted). // // When a symbolic link is encountered, by default Walk will not follow it // unless walkFn returns [ErrTraverseLink] or the Follow [Config] setting is // true. See below for a more detailed explanation. // // Walk calls walkFn with paths that use the separator character appropriate // for the operating system unless the ToSlash [Config] setting is true which // will cause all paths to be joined with a forward slash. // // If walkFn returns the [SkipDir] sentinel error, the directory is skipped. // If walkFn returns the [ErrSkipFiles] sentinel error, the callback will not // be called for any other files in the current directory. Unlike, // [filepath.Walk] and [filepath.WalkDir] the [fs.SkipAll] sentinel error is // not respected. // // Unlike [filepath.WalkDir]: // // - Multiple goroutines stat the filesystem concurrently. The provided // walkFn must be safe for concurrent use. // // - The order that directories are visited is non-deterministic. // // - File stat calls must be done by the user and should be done via // the [DirEntry] argument to walkFn. The [DirEntry] caches the result // of both Info() and Stat(). The Stat() method is a fastwalk specific // extension and can be called by casting the [fs.DirEntry] to a // [fastwalk.DirEntry] or via the [StatDirEntry] helper. The [fs.DirEntry] // argument to walkFn will always be convertible to a [fastwalk.DirEntry]. // // - The [fs.DirEntry] argument is always a [fastwalk.DirEntry], which has // a Stat() method that returns the result of calling [os.Stat] on the // file. The result of Stat() and Info() are cached. The [StatDirEntry] // helper can be used to call Stat() on the returned [fastwalk.DirEntry]. // // - Walk can follow symlinks in two ways: the fist, and simplest, is to // set Follow [Config] option to true - this will cause Walk to follow // symlinks and detect/ignore any symlink loops; the second, is for walkFn // to return the sentinel [ErrTraverseLink] error. // When using [ErrTraverseLink] to follow symlinks it is walkFn's // responsibility to prevent Walk from going into symlink cycles. // By default Walk does not follow symbolic links. // // - When walking a directory, walkFn will be called for each non-directory // entry and directories will be enqueued and visited at a later time or // by another goroutine. // // - The [fs.SkipAll] sentinel error is not respected and not ignored. If the // WalkDirFunc returns SkipAll then Walk will exit with the error SkipAll. func Walk(conf *Config, root string, walkFn fs.WalkDirFunc) error { fi, err := os.Stat(root) if err != nil { return err } if conf == nil { dupe := DefaultConfig conf = &dupe } if conf.ToSlash { root = filepath.ToSlash(root) } // Make sure to wait for all workers to finish, otherwise // walkFn could still be called after returning. This Wait call // runs after close(e.donec) below. var wg sync.WaitGroup defer wg.Wait() numWorkers := conf.NumWorkers if numWorkers <= 0 { numWorkers = DefaultNumWorkers() } w := &walker{ fn: walkFn, // TODO: Increase the size of enqueuec so that we don't stall // while processing a directory. Increasing the size of workc // doesn't help as much (needs more testing). enqueuec: make(chan walkItem, numWorkers), // buffered for performance workc: make(chan walkItem, numWorkers), // buffered for performance donec: make(chan struct{}), // buffered for correctness & not leaking goroutines: resc: make(chan error, numWorkers), // TODO: we should just pass the Config follow: conf.Follow, toSlash: conf.ToSlash, sortMode: conf.Sort, } if w.follow { w.ignoredDirs = append(w.ignoredDirs, fi) } defer close(w.donec) for i := 0; i < numWorkers; i++ { wg.Add(1) go w.doWork(&wg) } root = cleanRootPath(root) // NOTE: in BenchmarkFastWalk the size of todo averages around // 170 and can be in the ~250 range at max. todo := []walkItem{{dir: root, info: fileInfoToDirEntry(filepath.Dir(root), fi)}} out := 0 for { workc := w.workc var workItem walkItem if len(todo) == 0 { workc = nil } else { workItem = todo[len(todo)-1] } select { case workc <- workItem: todo = todo[:len(todo)-1] out++ case it := <-w.enqueuec: // TODO: consider appending to todo directly and using a // mutext this might help with contention around select todo = append(todo, it) case err := <-w.resc: out-- if err != nil { return err } if out == 0 && len(todo) == 0 { // It's safe to quit here, as long as the buffered // enqueue channel isn't also readable, which might // happen if the worker sends both another unit of // work and its result before the other select was // scheduled and both w.resc and w.enqueuec were // readable. select { case it := <-w.enqueuec: todo = append(todo, it) default: return nil } } } } } // doWork reads directories as instructed (via workc) and runs the // user's callback function. func (w *walker) doWork(wg *sync.WaitGroup) { defer wg.Done() for { select { case <-w.donec: return case it := <-w.workc: select { case <-w.donec: return case w.resc <- w.walk(it.dir, it.info, !it.callbackDone): } } } } type walker struct { fn fs.WalkDirFunc donec chan struct{} // closed on fastWalk's return workc chan walkItem // to workers enqueuec chan walkItem // from workers resc chan error // from workers ignoredDirs []fs.FileInfo follow bool toSlash bool sortMode SortMode } type walkItem struct { dir string info DirEntry callbackDone bool // callback already called; don't do it again } func (w *walker) enqueue(it walkItem) { select { case w.enqueuec <- it: case <-w.donec: } } func (w *walker) shouldSkipDir(fi fs.FileInfo) bool { for _, ignored := range w.ignoredDirs { if os.SameFile(ignored, fi) { return true } } return false } func (w *walker) shouldTraverse(path string, de DirEntry) bool { ts, err := de.Stat() if err != nil { return false } if !ts.IsDir() { return false } if w.shouldSkipDir(ts) { return false } for { parent := filepath.Dir(path) if parent == path { return true } parentInfo, err := os.Stat(parent) if err != nil { return false } if os.SameFile(ts, parentInfo) { return false } path = parent } } func (w *walker) joinPaths(dir, base string) string { // Handle the case where the root path argument to Walk is "/" // without this the returned path is prefixed with "//". if os.PathSeparator == '/' { if len(dir) != 0 && dir[len(dir)-1] == '/' { return dir + base } return dir + "/" + base } if len(dir) != 0 && os.IsPathSeparator(dir[len(dir)-1]) { return dir + base } if w.toSlash { return dir + "/" + base } return dir + string(os.PathSeparator) + base } func (w *walker) onDirEnt(dirName, baseName string, de DirEntry) error { joined := w.joinPaths(dirName, baseName) typ := de.Type() if typ == os.ModeDir { w.enqueue(walkItem{dir: joined, info: de}) return nil } err := w.fn(joined, de, nil) if typ == os.ModeSymlink { if err == ErrTraverseLink { if !w.follow { // Set callbackDone so we don't call it twice for both the // symlink-as-symlink and the symlink-as-directory later: w.enqueue(walkItem{dir: joined, info: de, callbackDone: true}) return nil } err = nil // Ignore ErrTraverseLink when Follow is true. } if err == filepath.SkipDir { // Permit SkipDir on symlinks too. return nil } if err == nil && w.follow && w.shouldTraverse(joined, de) { // Traverse symlink w.enqueue(walkItem{dir: joined, info: de, callbackDone: true}) } } return err } func (w *walker) walk(root string, info DirEntry, runUserCallback bool) error { if runUserCallback { err := w.fn(root, info, nil) if err == filepath.SkipDir { return nil } if err != nil { return err } } err := w.readDir(root) if err != nil { // Second call, to report ReadDir error. return w.fn(root, info, err) } return nil } // cleanRootPath returns the root path trimmed of extraneous trailing slashes. // This is a no-op on Windows. func cleanRootPath(root string) string { if runtime.GOOS == "windows" || len(filepath.VolumeName(root)) != 0 { // Windows paths or any path with a volume name (which AFAIK should // only be Windows) are a bit too complicated to clean. return root } if len(filepath.VolumeName(root)) != 0 { return root } for i := len(root) - 1; i >= 0; i-- { if !os.IsPathSeparator(root[i]) { return root[:i+1] } } if root != "" { return root[0:1] // root is all path separators ("//") } return root } // Avoid the dependency on strconv since it pulls in a large number of other // dependencies which bloats the size of this package. func itoa(val uint64) string { buf := make([]byte, 20) i := len(buf) - 1 for val >= 10 { buf[i] = byte(val%10 + '0') i-- val /= 10 } buf[i] = byte(val + '0') return string(buf[i:]) } fastwalk-1.0.9/fastwalk_darwin.go000066400000000000000000000050751470672632200170640ustar00rootroot00000000000000//go:build darwin && go1.13 package fastwalk import ( "os" "syscall" "unsafe" ) func (w *walker) readDir(dirName string) (err error) { var fd uintptr for { fd, err = opendir(dirName) if err != syscall.EINTR { break } } if err != nil { return &os.PathError{Op: "opendir", Path: dirName, Err: err} } defer closedir(fd) //nolint:errcheck var p *[]*unixDirent if w.sortMode != SortNone { p = direntSlicePool.Get().(*[]*unixDirent) } defer putDirentSlice(p) skipFiles := false var dirent syscall.Dirent var entptr *syscall.Dirent for { if errno := readdir_r(fd, &dirent, &entptr); errno != 0 { if errno == syscall.EINTR { continue } return &os.PathError{Op: "readdir", Path: dirName, Err: errno} } if entptr == nil { // EOF break } // Darwin may return a zero inode when a directory entry has been // deleted but not yet removed from the directory. The man page for // getdirentries(2) states that programs are responsible for skipping // those entries: // // Users of getdirentries() should skip entries with d_fileno = 0, // as such entries represent files which have been deleted but not // yet removed from the directory entry. // if dirent.Ino == 0 { continue } typ := dtToType(dirent.Type) if skipFiles && typ.IsRegular() { continue } name := (*[len(syscall.Dirent{}.Name)]byte)(unsafe.Pointer(&dirent.Name))[:] for i, c := range name { if c == 0 { name = name[:i] break } } // Check for useless names before allocating a string. if string(name) == "." || string(name) == ".." { continue } nm := string(name) de := newUnixDirent(dirName, nm, typ) if w.sortMode == SortNone { if err := w.onDirEnt(dirName, nm, de); err != nil { if err != ErrSkipFiles { return err } skipFiles = true } } else { *p = append(*p, de) } } if w.sortMode == SortNone { return nil } dents := *p sortDirents(w.sortMode, dents) for _, d := range dents { d := d if skipFiles && d.typ.IsRegular() { continue } if err := w.onDirEnt(dirName, d.Name(), d); err != nil { if err != ErrSkipFiles { return err } skipFiles = true } } return nil } func dtToType(typ uint8) os.FileMode { switch typ { case syscall.DT_BLK: return os.ModeDevice case syscall.DT_CHR: return os.ModeDevice | os.ModeCharDevice case syscall.DT_DIR: return os.ModeDir case syscall.DT_FIFO: return os.ModeNamedPipe case syscall.DT_LNK: return os.ModeSymlink case syscall.DT_REG: return 0 case syscall.DT_SOCK: return os.ModeSocket } return ^os.FileMode(0) } fastwalk-1.0.9/fastwalk_portable.go000066400000000000000000000026041470672632200174030ustar00rootroot00000000000000//go:build !darwin && !(aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris) package fastwalk import ( "os" ) // readDir calls fn for each directory entry in dirName. // It does not descend into directories or follow symlinks. // If fn returns a non-nil error, readDir returns with that error // immediately. func (w *walker) readDir(dirName string) error { f, err := os.Open(dirName) if err != nil { return err } des, readErr := f.ReadDir(-1) f.Close() if readErr != nil && len(des) == 0 { return readErr } var p *[]DirEntry if w.sortMode != SortNone { p = direntSlicePool.Get().(*[]DirEntry) } defer putDirentSlice(p) var skipFiles bool for _, d := range des { if skipFiles && d.Type().IsRegular() { continue } // Need to use FileMode.Type().Type() for fs.DirEntry e := newDirEntry(dirName, d) if w.sortMode == SortNone { if err := w.onDirEnt(dirName, d.Name(), e); err != nil { if err != ErrSkipFiles { return err } skipFiles = true } } else { *p = append(*p, e) } } if w.sortMode == SortNone { return readErr } dents := *p sortDirents(w.sortMode, dents) for _, d := range dents { d := d if skipFiles && d.Type().IsRegular() { continue } if err := w.onDirEnt(dirName, d.Name(), d); err != nil { if err != ErrSkipFiles { return err } skipFiles = true } } return readErr } fastwalk-1.0.9/fastwalk_test.go000066400000000000000000001123041470672632200165510ustar00rootroot00000000000000package fastwalk_test import ( "bytes" "crypto/md5" "errors" "flag" "fmt" "io" "io/fs" "math" "os" "path/filepath" "reflect" "regexp" "runtime" "sort" "strings" "sync" "sync/atomic" "testing" "github.com/charlievieth/fastwalk" ) func formatFileModes(m map[string]os.FileMode) string { var keys []string for k := range m { keys = append(keys, k) } sort.Strings(keys) var buf bytes.Buffer for _, k := range keys { fmt.Fprintf(&buf, "%-20s: %v\n", k, m[k]) } return buf.String() } func writeFile(filename string, data interface{}, perm os.FileMode) error { if err := os.MkdirAll(filepath.Dir(filename), 0755); err != nil { return err } f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm) if err != nil { return err } switch v := data.(type) { case []byte: _, err = f.Write(v) case string: _, err = f.WriteString(v) case io.Reader: _, err = io.Copy(f, v) default: f.Close() return &os.PathError{Op: "WriteFile", Path: filename, Err: fmt.Errorf("invalid data type: %T", data)} } if err1 := f.Close(); err1 != nil && err == nil { err = err1 } return err } func symlink(t testing.TB, oldname, newname string) error { err := os.Symlink(oldname, newname) if err != nil { if writeErr := os.WriteFile(newname, []byte(newname), 0644); writeErr == nil { // Couldn't create symlink, but could write the file. // Probably this filesystem doesn't support symlinks. // (Perhaps we are on an older Windows and not running as administrator.) t.Skipf("skipping because symlinks appear to be unsupported: %v", err) } } return err } func cleanupOrLogTempDir(t *testing.T, tempdir string) { if e := recover(); e != nil { t.Log("TMPDIR:", filepath.ToSlash(tempdir)) t.Fatal(e) } if t.Failed() { t.Log("TMPDIR:", filepath.ToSlash(tempdir)) } else { os.RemoveAll(tempdir) } } func testCreateFiles(t *testing.T, tempdir string, files map[string]string) { symlinks := map[string]string{} for path, contents := range files { file := filepath.Join(tempdir, "/src", path) if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil { t.Fatal(err) } var err error if strings.HasPrefix(contents, "LINK:") { symlinks[file] = filepath.FromSlash(strings.TrimPrefix(contents, "LINK:")) } else { err = os.WriteFile(file, []byte(contents), 0644) } if err != nil { t.Fatal(err) } } // Create symlinks after all other files. Otherwise, directory symlinks on // Windows are unusable (see https://golang.org/issue/39183). for file, dst := range symlinks { if err := symlink(t, dst, file); err != nil { t.Fatal(err) } } } func testFastWalkConf(t *testing.T, conf *fastwalk.Config, files map[string]string, callback fs.WalkDirFunc, want map[string]os.FileMode) { tempdir, err := os.MkdirTemp("", "test-fast-walk") if err != nil { t.Fatal(err) } defer cleanupOrLogTempDir(t, tempdir) testCreateFiles(t, tempdir, files) got := map[string]os.FileMode{} var mu sync.Mutex err = fastwalk.Walk(conf, tempdir, func(path string, de fs.DirEntry, err error) error { if de == nil { t.Errorf("nil fs.DirEntry on %q", path) return nil } mu.Lock() defer mu.Unlock() if !strings.HasPrefix(path, tempdir) { t.Errorf("bogus prefix on %q, expect %q", path, tempdir) } key := filepath.ToSlash(strings.TrimPrefix(path, tempdir)) if old, dup := got[key]; dup { t.Errorf("callback called twice for key %q: %v -> %v", key, old, de.Type()) } got[key] = de.Type() return callback(path, de, err) }) if err != nil { t.Fatalf("callback returned: %v", err) } if !reflect.DeepEqual(got, want) { t.Errorf("walk mismatch.\n got:\n%v\nwant:\n%v", formatFileModes(got), formatFileModes(want)) diffFileModes(t, got, want) } } func testFastWalk(t *testing.T, files map[string]string, callback fs.WalkDirFunc, want map[string]os.FileMode) { testFastWalkConf(t, nil, files, callback, want) } func requireNoError(t testing.TB, err error) { t.Helper() if err != nil { t.Error("WalkDirFunc called with error:", err) panic(err) } } func TestFastWalk_Basic(t *testing.T) { testFastWalk(t, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "two", "skip/skip.go": "skip", }, func(path string, typ fs.DirEntry, err error) error { requireNoError(t, err) return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/skip": os.ModeDir, "/src/skip/skip.go": 0, }) } func maxFileNameLength(t testing.TB) int { tmp := t.TempDir() long := strings.Repeat("a", 8192) // Returns if n is an invalid file name length invalidLength := func(n int) bool { path := filepath.Join(tmp, long[:n]) err := os.WriteFile(path, []byte("1"), 0644) if err == nil { os.Remove(path) } return err != nil } // Use a binary search to find the max filename length (+1) n := sort.Search(8192, invalidLength) if n <= 1 { t.Fatal("Failed to find the max filename length:", n) } max := n - 1 if invalidLength(max) { t.Fatal("Failed to find the max filename length:", n) } return max } // This test identified a "checkptr: converted pointer straddles multiple allocations" // error on darwin when getdirentries64 was used with the race-detector enabled. func TestFastWalk_LongFileName(t *testing.T) { // Test is slow since we need to find the longest allowed filename t.Parallel() maxNameLen := maxFileNameLength(t) if maxNameLen > 255 { maxNameLen = 255 } want := map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, } files := make(map[string]string) // This triggers with only one sub-directory but use 2 just to be sure. for r := 'a'; r <= 'b'; r++ { s := string(r) name := s + "/" + strings.Repeat(s, maxNameLen) for i := len("_/") + 1; i <= len(name); i++ { files[name[:i]] = "1" want["/src/"+name[:i]] = 0 } want["/src/"+s] = os.ModeDir } testFastWalk(t, files, func(path string, typ fs.DirEntry, err error) error { requireNoError(t, err) return nil }, want, ) } func maxPathLength(t testing.TB) (root string, pathMax int) { tmp, err := filepath.EvalSymlinks(t.TempDir()) if err != nil { t.Fatal(err) } switch len(tmp) % 4 { case 0: case 1: // Can't just add 1 "/" so add 5 ("/aaaa") tmp = filepath.Join(tmp, "/aaaa") case 2: tmp = filepath.Join(tmp, "/a") case 3: tmp = filepath.Join(tmp, "/aa") } base := tmp // Returns if n is an invalid file name length var longestPath string invalidPathLength := func(n int) bool { m := n - len(tmp) if m <= 0 { return false } var w strings.Builder w.Grow(n + 1) w.WriteString(base) elem := "/" + strings.Repeat("a", 127) // path element for w.Len() < n-len(elem) { w.WriteString(elem) } for w.Len() < n { w.WriteByte('b') } path := w.String() if len(path) != n { t.Fatalf("invalid PATH length: %d want: %d", len(path), n) } err := os.MkdirAll(path, 0755) if err == nil { // Don't remove directories on success since it's slow // and we'll use them again as the path length increases. longestPath = path } return err != nil } // Use a binary search to find the max path length (+1) n := sort.Search(16*1024, invalidPathLength) if n <= 1 { t.Fatal("Failed to find the max path length:", n) } pathMax = n - 1 if invalidPathLength(pathMax) { t.Fatal("Failed to find the max path length:", n) } // Make sure longestPath exists if _, err := os.Stat(longestPath); err != nil { t.Fatalf("Invalid longest path (%q): %v", longestPath, err) } // Create directories under the tmp/root dir: /{TMP}/{b..z}/{LONGEST_PATH} root = filepath.Dir(tmp) name := filepath.Base(tmp) long := strings.TrimPrefix(longestPath, tmp) end := 'z' if testing.Short() { end = 'e' } for r := 'b'; r <= end; r++ { newBase := strings.Repeat(string(r), len(name)) if err := os.MkdirAll(filepath.Join(root, newBase, long), 0755); err != nil { t.Fatal(err) } } return root, pathMax } // Test that we can handle PATH_MAX. This is mostly for the Unix tests // where we pass a buffer to ReadDirect (often getdents64(2)). func TestFastWalk_LongPath(t *testing.T) { // Test is slow since we need to find the longest allowed file path t.Parallel() if runtime.GOOS == "windows" { t.Skip("test not needed on Windows") } root, pathMax := maxPathLength(t) t.Log("PATH_MAX:", pathMax) var want []string err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } want = append(want, filepath.Clean(path)) return nil }) if err != nil { t.Fatal(err) } var got []string var mu sync.Mutex err = fastwalk.Walk(nil, root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } mu.Lock() got = append(got, filepath.Clean(path)) mu.Unlock() return nil }) if err != nil { t.Fatal(err) } sort.Strings(want) sort.Strings(got) if !reflect.DeepEqual(want, got) { // Don't print the delta here since it might be very large. Instead // write it to two temp files in a directory that is not removed on // test exit so that the user can compare them themselves. tempdir, err := os.MkdirTemp("", "fastwalk-test-*") if err != nil { t.Error(err) } if err := writeFile(tempdir+"/want.txt", strings.Join(want, "\n"), 0666); err != nil { t.Error(err) } if err := writeFile(tempdir+"/got.txt", strings.Join(got, "\n"), 0666); err != nil { t.Error(err) } t.Fatalf("Output does not match: see the files in: %q", tempdir) } } func TestFastWalk_WindowsRootPaths(t *testing.T) { if runtime.GOOS != "windows" { t.Skip("test only supported on Windows") } sameFile := func(t *testing.T, name1, name2 string) bool { fi1, err := os.Stat(name1) if err != nil { t.Fatal(err) } fi2, err := os.Stat(name2) if err != nil { t.Fatal(err) } return os.SameFile(fi1, fi2) } walk := func(t *testing.T, root string) map[string]fs.DirEntry { var mu sync.Mutex seen := make(map[string]fs.DirEntry) errStop := errors.New("errStop") fn := func(path string, de fs.DirEntry, err error) error { if err != nil { return err } mu.Lock() seen[path] = de mu.Unlock() if path != root && de.IsDir() { return fs.SkipDir } return nil } err := fastwalk.Walk(nil, root, fastwalk.IgnorePermissionErrors(fn)) if err != nil && err != errStop { t.Fatal(err) } if len(seen) <= 1 { // If we are a child of the root directory we should have visited at // least two entries: the root itself and a directory that leads to, // or is, our current working directory. t.Fatalf("empty directory: %s", root) } return seen } pwd, err := filepath.Abs(".") if err != nil { t.Fatal(err) } vol := filepath.VolumeName(pwd) if !regexp.MustCompile(`^[A-Za-z]:$`).MatchString(vol) { // Ignore UNC names and other weird Windows paths to keep this simple. t.Skipf("unsupported volume name: %s for path: %s", vol, pwd) } if !sameFile(t, pwd, vol) { t.Skipf("skipping %s and %s should be considered the same file", pwd, vol) } // Test that walking the disk root ("C:\") actually walks the disk root. // Previously, there was a bug where the path "C:\" was transformed to "C:" // before walking which caused fastwalk to walk the current directory. // // https://github.com/charlievieth/fastwalk/issues/37 t.Run("FullyQualified", func(t *testing.T) { root := vol + `\` if sameFile(t, pwd, root) { t.Skipf("the current working directory (%s) is the disk root: %s", pwd, root) } seen := walk(t, root) // Make sure we don't append an extraneous slash to the root ("C:\" => "C:\\a"). for path := range seen { rest := strings.TrimPrefix(path, vol) if strings.Contains(rest, `\\`) { t.Errorf(`path contains multiple consecutive slashes after volume (%s): "%s"`, vol, path) } if s := filepath.Clean(path); s != path { t.Errorf(`filepath.Clean("%s") == "%s"`, path, s) } } // Make sure we didn't walk the current directory. This will happen if // the root argument to Walk is a drive letter ("C:\") but we strip off // the trailing slash ("C:\" => "C:") since this makes the path relative // to the current directory on drive "C". // // See: https://github.com/charlievieth/fastwalk/issues/37 // // Docs: https://learn.microsoft.com/en-us/windows/win32/fileio/naming-a-file#fully-qualified-vs-relative-paths for path, de := range seen { if path == root { // Ignore root since filepath.Base("C:\") == "\" and "C:\" and "\" // are equivalent. continue } fi1, err := de.Info() if err != nil { if os.IsNotExist(err) || os.IsPermission(err) { continue } t.Fatal(err) } name := filepath.Base(path) fi2, err := os.Lstat(name) if err != nil { continue } if os.SameFile(fi1, fi2) { t.Errorf("Walking root (%s) returned entries for the current working "+ "directory (%s): file %s is the same as %s", root, pwd, path, name) } } // Add file base name mappings for _, de := range seen { seen[de.Name()] = de } // Make sure we read some files from the disk root. des, err := os.ReadDir(root) if err != nil { t.Fatal(err) } if len(des) == 0 { t.Fatalf("Disk root %s contains no files!", root) } same := 0 for _, d2 := range des { d1 := seen[d2.Name()] if d1 == nil { continue } fi1, err := d1.Info() if err != nil { t.Log(err) continue } fi2, err := d2.Info() if err != nil { t.Log(err) continue } if os.SameFile(fi1, fi2) { same++ } } // TODO: Expect to see N% of files and use // a more descriptive error message if same == 0 { t.Fatalf(`Error failed to walk dist root: "%s"`, root) } }) // Test that paths like "C:" are treated as a relative path. t.Run("Relative", func(t *testing.T) { seen := walk(t, vol) // Make sure we don't append an extraneous slash to the root ("C:\" => "C:\\a"). for path := range seen { rest := strings.TrimPrefix(path, vol) if strings.Contains(rest, `\\`) { t.Errorf(`path contains multiple consecutive slashes after volume (%s): "%s"`, vol, path) } if path == vol { continue // Clean("C:") => "C:." } if s := filepath.Clean(path); s != path { t.Errorf(`filepath.Clean("%s") == "%s"`, path, s) } } // Make sure we walk the current directory. for path, de := range seen { if path == vol { // Ignore the volume since filepath.Base("C:") == "\" and "C:" and "\" // are not equivalent. continue } fi1, err := de.Info() if err != nil { t.Fatal(err) } name := filepath.Base(path) fi2, err := os.Lstat(name) if err != nil { // NB: This test will fail if this file is removed while it's // running. There are workarounds for this, but for now it's // simpler to just error if that happens. t.Fatal(err) } if !os.SameFile(fi1, fi2) { t.Errorf("Expected files (%s) and (%s) to be the same", path, name) } } }) } func TestFastWalk_Symlink(t *testing.T) { testFastWalk(t, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "LINK:../foo/foo.go", "symdir": "LINK:foo", "broken/broken.go": "LINK:../nonexistent", }, func(path string, typ fs.DirEntry, err error) error { requireNoError(t, err) return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": os.ModeSymlink, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/symdir": os.ModeSymlink, "/src/broken": os.ModeDir, "/src/broken/broken.go": os.ModeSymlink, }) } // Test that the fs.DirEntry passed to WalkFunc is always a fastwalk.DirEntry. func TestFastWalk_DirEntryType(t *testing.T) { testFastWalk(t, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "LINK:../foo/foo.go", "symdir": "LINK:foo", "broken/broken.go": "LINK:../nonexistent", }, func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) if _, ok := de.(fastwalk.DirEntry); !ok { t.Errorf("%q: not a fastwalk.DirEntry: %T", path, de) } if de.Type() != de.Type().Type() { t.Errorf("%s: type mismatch got: %q want: %q", path, de.Type(), de.Type().Type()) } return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": os.ModeSymlink, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/symdir": os.ModeSymlink, "/src/broken": os.ModeDir, "/src/broken/broken.go": os.ModeSymlink, }) } func TestFastWalk_SkipDir(t *testing.T) { test := func(t *testing.T, mode fastwalk.SortMode) { conf := fastwalk.DefaultConfig.Copy() conf.Sort = mode testFastWalkConf(t, conf, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "two", "skip/skip.go": "skip", }, func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) typ := de.Type().Type() if typ == os.ModeDir && strings.HasSuffix(path, "skip") { return filepath.SkipDir } return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/skip": os.ModeDir, }) } // Test that sorting respects fastwalk.ErrSkipFiles for _, mode := range []fastwalk.SortMode{ fastwalk.SortNone, fastwalk.SortLexical, fastwalk.SortDirsFirst, fastwalk.SortFilesFirst, } { t.Run(mode.String(), func(t *testing.T) { test(t, mode) }) } } func TestFastWalk_SkipFiles(t *testing.T) { mapKeys := func(m map[string]os.FileMode) []string { a := make([]string, 0, len(m)) for k := range m { a = append(a, k) } return a } test := func(t *testing.T, mode fastwalk.SortMode) { // Directory iteration order is undefined, so there's no way to know // which file to expect until the walk happens. Rather than mess // with the test infrastructure, just mutate want. want := map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/zzz": os.ModeDir, "/src/zzz/c.go": 0, } conf := fastwalk.DefaultConfig.Copy() conf.Sort = mode var mu sync.Mutex testFastWalkConf(t, conf, map[string]string{ "a_skipfiles.go": "a", "b_skipfiles.go": "b", "zzz/c.go": "c", }, func(path string, _ fs.DirEntry, err error) error { requireNoError(t, err) if strings.HasSuffix(path, "_skipfiles.go") { mu.Lock() defer mu.Unlock() want["/src/"+filepath.Base(path)] = 0 return fastwalk.ErrSkipFiles } return nil }, want) if len(want) != 5 { t.Errorf("invalid number of files visited: wanted 5, got %v (%q)", len(want), mapKeys(want)) } } // Test that sorting respects fastwalk.ErrSkipFiles for _, mode := range []fastwalk.SortMode{ fastwalk.SortNone, fastwalk.SortLexical, fastwalk.SortDirsFirst, fastwalk.SortFilesFirst, } { t.Run(mode.String(), func(t *testing.T) { test(t, mode) }) } } func TestFastWalk_TraverseSymlink(t *testing.T) { testFastWalk(t, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "two", "symdir": "LINK:foo", }, func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) typ := de.Type().Type() if typ == os.ModeSymlink { return fastwalk.ErrTraverseLink } return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/symdir": os.ModeSymlink, "/src/symdir/foo.go": 0, }) } func TestFastWalk_Follow(t *testing.T) { subTests := []struct { Name string OnLink func(path string, d fs.DirEntry) error }{ // Test that the walk func does *not* need to return // ErrTraverseLink for links to be followed. { Name: "Default", OnLink: func(path string, d fs.DirEntry) error { return nil }, }, // Test that returning ErrTraverseLink does not interfere // with the Follow logic. { Name: "ErrTraverseLink", OnLink: func(path string, d fs.DirEntry) error { if d.Type()&os.ModeSymlink != 0 { if fi, err := fastwalk.StatDirEntry(path, d); err == nil && fi.IsDir() { return fastwalk.ErrTraverseLink } } return nil }, }, } for _, x := range subTests { t.Run(x.Name, func(t *testing.T) { conf := fastwalk.Config{ Follow: true, } testFastWalkConf(t, &conf, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "two", "foo/symlink": "LINK:foo.go", "bar/symdir": "LINK:../foo/", "bar/link1": "LINK:../foo/", }, func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) if err != nil { return err } if de.Type()&os.ModeSymlink != 0 { return x.OnLink(path, de) } return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/bar/link1": os.ModeSymlink, "/src/bar/link1/foo.go": 0, "/src/bar/link1/symlink": os.ModeSymlink, "/src/bar/symdir": os.ModeSymlink, "/src/bar/symdir/foo.go": 0, "/src/bar/symdir/symlink": os.ModeSymlink, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/foo/symlink": os.ModeSymlink, }) }) } } func TestFastWalk_Follow_SkipDir(t *testing.T) { conf := fastwalk.Config{ Follow: true, } testFastWalkConf(t, &conf, map[string]string{ ".dot/baz.go": "one", "bar/bar.go": "three", "bar/dot": "LINK:../.dot/", "bar/symdir": "LINK:../foo/", "foo/foo.go": "two", "foo/symlink": "LINK:foo.go", }, func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) if err != nil { return err } if strings.HasPrefix(de.Name(), ".") { return filepath.SkipDir } return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/.dot": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/bar/dot": os.ModeSymlink, "/src/bar/dot/baz.go": 0, "/src/bar/symdir": os.ModeSymlink, "/src/bar/symdir/foo.go": 0, "/src/bar/symdir/symlink": os.ModeSymlink, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, "/src/foo/symlink": os.ModeSymlink, }) } func TestFastWalk_Follow_SymlinkLoop(t *testing.T) { tempdir, err := os.MkdirTemp("", "fastwalk-test-*") if err != nil { t.Fatal(err) } defer cleanupOrLogTempDir(t, tempdir) if err := writeFile(tempdir+"/src/foo.go", "hello", 0644); err != nil { t.Fatal(err) } if err := symlink(t, "../src", tempdir+"/src/loop"); err != nil { t.Fatal(err) } conf := fastwalk.Config{ Follow: true, } var walked int32 err = fastwalk.Walk(&conf, tempdir, func(path string, de fs.DirEntry, err error) error { if err != nil { return err } if n := atomic.AddInt32(&walked, 1); n > 20 { return fmt.Errorf("symlink loop: %d", n) } return nil }) if err != nil { t.Fatal(err) } } // Test that ErrTraverseLink is ignored when following symlinks // if it would cause a symlink loop. func TestFastWalk_Follow_ErrTraverseLink(t *testing.T) { conf := fastwalk.Config{ Follow: true, } testFastWalkConf(t, &conf, map[string]string{ "foo/foo.go": "one", "bar/bar.go": "two", "bar/symdir": "LINK:../foo/", "bar/loop": "LINK:../bar/", // symlink loop }, func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) if err != nil { return err } if de.Type()&os.ModeSymlink != 0 { if fi, err := fastwalk.StatDirEntry(path, de); err == nil && fi.IsDir() { return fastwalk.ErrTraverseLink } } return nil }, map[string]os.FileMode{ "": os.ModeDir, "/src": os.ModeDir, "/src/bar": os.ModeDir, "/src/bar/bar.go": 0, "/src/bar/loop": os.ModeSymlink, "/src/bar/symdir": os.ModeSymlink, "/src/bar/symdir/foo.go": 0, "/src/foo": os.ModeDir, "/src/foo/foo.go": 0, }) } func TestFastWalk_Error(t *testing.T) { tmp := t.TempDir() for _, child := range []string{ "foo/foo.go", "bar/bar.go", "skip/skip.go", } { if err := writeFile(filepath.Join(tmp, child), child, 0644); err != nil { t.Fatal(err) } } exp := errors.New("expected") err := fastwalk.Walk(nil, tmp, func(_ string, _ fs.DirEntry, err error) error { requireNoError(t, err) return exp }) if !errors.Is(err, exp) { t.Errorf("want error: %#v got: %#v", exp, err) } } func TestFastWalk_ErrNotExist(t *testing.T) { tmp := t.TempDir() if err := os.Remove(tmp); err != nil { t.Fatal(err) } err := fastwalk.Walk(nil, tmp, func(_ string, _ fs.DirEntry, err error) error { return err }) if !os.IsNotExist(err) { t.Fatalf("os.IsNotExist(%+v) = false want: true", err) } } func TestFastWalk_ErrPermission(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("test not supported for Windows") } tempdir := t.TempDir() want := map[string]os.FileMode{ "": os.ModeDir, "/bad": os.ModeDir, } for i := 0; i < runtime.NumCPU()*4; i++ { dir := fmt.Sprintf("/d%03d", i) name := fmt.Sprintf("%s/f%03d.txt", dir, i) if err := writeFile(filepath.Join(tempdir, name), "data", 0644); err != nil { t.Fatal(err) } want[name] = 0 want[filepath.Dir(name)] = os.ModeDir } filename := filepath.Join(tempdir, "/bad/bad.txt") if err := writeFile(filename, "data", 0644); err != nil { t.Fatal(err) } // Make the directory unreadable dirname := filepath.Dir(filename) if err := os.Chmod(dirname, 0355); err != nil { t.Fatal(err) } t.Cleanup(func() { if err := os.Remove(filename); err != nil { t.Error(err) } if err := os.Chmod(dirname, 0755); err != nil { t.Log(err) } if err := os.Remove(dirname); err != nil { t.Error(err) } }) got := map[string]os.FileMode{} var mu sync.Mutex err := fastwalk.Walk(nil, tempdir, func(path string, de fs.DirEntry, err error) error { if err != nil && os.IsPermission(err) { return nil } mu.Lock() defer mu.Unlock() if !strings.HasPrefix(path, tempdir) { t.Errorf("bogus prefix on %q, expect %q", path, tempdir) } key := filepath.ToSlash(strings.TrimPrefix(path, tempdir)) if old, dup := got[key]; dup { t.Errorf("callback called twice for key %q: %v -> %v", key, old, de.Type()) } got[key] = de.Type() return nil }) if err != nil { t.Error("Walk:", err) } if !reflect.DeepEqual(got, want) { t.Errorf("walk mismatch.\n got:\n%v\nwant:\n%v", formatFileModes(got), formatFileModes(want)) diffFileModes(t, got, want) } } func TestFastWalk_ToSlash(t *testing.T) { if runtime.GOOS != "windows" { t.Skip("test only supported on Windows") } abs, err := filepath.Abs(".") if err != nil { t.Fatal(err) } root := filepath.ToSlash(abs) conf := fastwalk.Config{ ToSlash: true, } var count atomic.Int32 err = fastwalk.Walk(&conf, root, func(path string, de fs.DirEntry, err error) error { requireNoError(t, err) if strings.Contains(path, `\`) { t.Errorf("found non-forward slash separator in path: %q", path) } if _, err := de.Info(); err != nil { t.Fatal(err) } if _, err := de.(fastwalk.DirEntry).Stat(); err != nil { t.Fatal(err) } count.Add(1) return nil }) if err != nil { t.Fatal(err) } if count.Load() == 0 { t.Fatal("did not walk any files") } } func TestFastWalk_SortMode(t *testing.T) { // Can only assert on files since the order that directories are // traversed is non-deterministic. tmp, err := os.MkdirTemp("", "test-fast-walk") if err != nil { t.Fatal(err) } defer cleanupOrLogTempDir(t, tmp) want := []string{ "a.txt", "b.txt", "c.txt", "d.txt", "e.txt", "f.txt", "a.lnk", "b.lnk", "c.lnk", "d.lnk", "e.lnk", "f.lnk", } for _, name := range want { path := filepath.Join(tmp, name) if strings.HasSuffix(name, ".txt") { if err := writeFile(path, "data", 0666); err != nil { t.Fatal(err) } } else { if err := symlink(t, path, path); err != nil { t.Fatal(err) } } } for _, mode := range []fastwalk.SortMode{ fastwalk.SortLexical, fastwalk.SortFilesFirst, // We don't actually have any dirs because the order // they're visited is non-deterministic. fastwalk.SortDirsFirst, } { t.Run(mode.String(), func(t *testing.T) { want := append([]string(nil), want...) if mode == fastwalk.SortLexical { sort.Strings(want) } conf := fastwalk.Config{ Sort: mode, } // We technically don't need a mutex since we're visiting // only one directory, but use it for correctness. var mu sync.Mutex var got []string err := fastwalk.Walk(&conf, tmp, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } // Ignore the parent directory if !d.IsDir() { mu.Lock() got = append(got, d.Name()) mu.Unlock() } return nil }) if err != nil { t.Fatal(err) } if !reflect.DeepEqual(got, want) { t.Errorf("Invalid output\ngot: %q\nwant: %q", got, want) } }) } } func TestSortModeString(t *testing.T) { tests := []struct { mode fastwalk.SortMode want string }{ {fastwalk.SortNone, "None"}, {fastwalk.SortLexical, "Lexical"}, {fastwalk.SortDirsFirst, "DirsFirst"}, {fastwalk.SortFilesFirst, "FilesFirst"}, {100, "SortMode(100)"}, {math.MaxUint32, fmt.Sprintf("SortMode(%d)", uint32(math.MaxUint32))}, } for _, test := range tests { got := test.mode.String() if got != test.want { t.Errorf("%d: got: %s want: %s", test.mode, got, test.want) } } } func TestConfigCopy(t *testing.T) { t.Run("Nil", func(t *testing.T) { c := (*fastwalk.Config)(nil).Copy() if c == nil { t.Fatal("failed to copy nil config") } if *c != (fastwalk.Config{}) { t.Fatal("copy of nil config should be empty") } }) t.Run("Copy", func(t *testing.T) { a := fastwalk.DefaultConfig c := a.Copy() c.NumWorkers *= 2 if a.NumWorkers == c.NumWorkers { t.Fatal("failed to copy config") } }) } func TestFastWalkJoinPaths(t *testing.T) { if runtime.GOOS == "windows" { t.Skip("not supported on Windows") } if abs, err := filepath.Abs("/"); err != nil || abs != "/" { t.Skipf(`skipping filepath.Abs("/") = %q, %v; want: "/", nil`, abs, err) } sentinel := errors.New("halt now") var root string var once sync.Once err := fastwalk.Walk(nil, "///", func(path string, d fs.DirEntry, err error) error { if err != nil { return err } once.Do(func() { root = path }) return sentinel }) if err != nil && err != sentinel { t.Fatal(err) } if root != "/" { t.Fatalf(`failed to convert root "///" to "/" got: %q`, root) } } func TestSkipAll(t *testing.T) { err := fastwalk.Walk(nil, ".", func(path string, info fs.DirEntry, err error) error { return fs.SkipAll }) if err != fs.SkipAll { t.Error("Expected fs.SkipAll to be returned got:", err) } } func BenchmarkSortModeString(b *testing.B) { var s string for i := 0; i < b.N; i++ { s = fastwalk.SortMode(10).String() } if b.Failed() { b.Log(s) } } func diffFileModes(t *testing.T, got, want map[string]os.FileMode) { type Mode struct { Name string Mode os.FileMode } var extra []Mode for k, v := range got { if _, ok := want[k]; !ok { extra = append(extra, Mode{k, v}) } } var missing []Mode for k, v := range want { if _, ok := got[k]; !ok { missing = append(missing, Mode{k, v}) } } var delta []Mode for k, v := range got { if vv, ok := want[k]; ok && vv != v { delta = append(delta, Mode{k, v}, Mode{k, vv}) } } w := new(strings.Builder) printMode := func(name string, modes []Mode) { if len(modes) == 0 { return } sort.Slice(modes, func(i, j int) bool { return modes[i].Name < modes[j].Name }) if w.Len() == 0 { w.WriteString("\n") } fmt.Fprintf(w, "%s:\n", name) for _, m := range modes { fmt.Fprintf(w, " %-20s: %s\n", m.Name, m.Mode.String()) } } printMode("Extra", extra) printMode("Missing", missing) printMode("Delta", delta) if w.Len() != 0 { t.Error(w.String()) } } // Directory to use for benchmarks, GOROOT is used by default var benchDir *string // Make sure we don't register the "benchdir" twice. func init() { ff := flag.Lookup("benchdir") if ff != nil { value := ff.DefValue if ff.Value != nil { value = ff.Value.String() } benchDir = &value } else { benchDir = flag.String("benchdir", runtime.GOROOT(), "The directory to scan for BenchmarkFastWalk") } } func noopWalkFunc(_ string, _ fs.DirEntry, _ error) error { return nil } func benchmarkFastWalk(b *testing.B, conf *fastwalk.Config, adapter func(fs.WalkDirFunc) fs.WalkDirFunc) { b.ReportAllocs() if adapter != nil { walkFn := noopWalkFunc for i := 0; i < b.N; i++ { err := fastwalk.Walk(conf, *benchDir, adapter(walkFn)) if err != nil { b.Fatal(err) } } } else { for i := 0; i < b.N; i++ { err := fastwalk.Walk(conf, *benchDir, noopWalkFunc) if err != nil { b.Fatal(err) } } } } func BenchmarkFastWalk(b *testing.B) { benchmarkFastWalk(b, nil, nil) } func BenchmarkFastWalkSort(b *testing.B) { for _, mode := range []fastwalk.SortMode{ fastwalk.SortNone, fastwalk.SortLexical, fastwalk.SortDirsFirst, fastwalk.SortFilesFirst, } { b.Run(mode.String(), func(b *testing.B) { conf := fastwalk.DefaultConfig.Copy() conf.Sort = mode benchmarkFastWalk(b, conf, func(x fs.WalkDirFunc) fs.WalkDirFunc { return noopWalkFunc }) }) } } func BenchmarkFastWalkFollow(b *testing.B) { benchmarkFastWalk(b, &fastwalk.Config{Follow: true}, nil) } func BenchmarkFastWalkAdapters(b *testing.B) { if testing.Short() { b.Skip("Skipping: short test") } b.Run("IgnoreDuplicateDirs", func(b *testing.B) { benchmarkFastWalk(b, nil, fastwalk.IgnoreDuplicateDirs) }) b.Run("IgnoreDuplicateFiles", func(b *testing.B) { benchmarkFastWalk(b, nil, fastwalk.IgnoreDuplicateFiles) }) } // Benchmark various tasks with different worker counts. // // Observations: // - Linux (Intel i9-9900K / Samsung Pro NVMe): consistently benefits from // more workers // - Darwin (m1): IO heavy tasks (Readfile and Stat) and Traversal perform // best with 4 workers, and only CPU bound tasks benefit from more workers func BenchmarkFastWalkNumWorkers(b *testing.B) { if testing.Short() { b.Skip("Skipping: short test") } runBench := func(b *testing.B, walkFn fs.WalkDirFunc) { maxWorkers := runtime.NumCPU() for i := 2; i <= maxWorkers; i += 2 { b.Run(fmt.Sprint(i), func(b *testing.B) { conf := fastwalk.Config{ NumWorkers: i, } for i := 0; i < b.N; i++ { if err := fastwalk.Walk(&conf, *benchDir, walkFn); err != nil { b.Fatal(err) } } }) } } // Bench pure traversal speed b.Run("NoOp", func(b *testing.B) { runBench(b, func(path string, d fs.DirEntry, err error) error { return err }) }) // No IO and light CPU b.Run("NoIO", func(b *testing.B) { runBench(b, func(path string, d fs.DirEntry, err error) error { if err == nil { fmt.Fprintf(io.Discard, "%s: %q\n", d.Type(), path) } return err }) }) // Stat each regular file b.Run("Stat", func(b *testing.B) { runBench(b, func(path string, d fs.DirEntry, err error) error { if err == nil && d.Type().IsRegular() { _, _ = d.Info() } return err }) }) // IO heavy task b.Run("ReadFile", func(b *testing.B) { runBench(b, func(path string, d fs.DirEntry, err error) error { if err != nil || !d.Type().IsRegular() { return err } f, err := os.Open(path) if err != nil { if os.IsNotExist(err) || os.IsPermission(err) { return nil } return err } defer f.Close() _, err = io.Copy(io.Discard, f) return err }) }) // CPU and IO heavy task b.Run("Hash", func(b *testing.B) { bufPool := &sync.Pool{ New: func() interface{} { b := make([]byte, 96*1024) return &b }, } runBench(b, func(path string, d fs.DirEntry, err error) error { if err != nil || !d.Type().IsRegular() { return err } f, err := os.Open(path) if err != nil { if os.IsNotExist(err) || os.IsPermission(err) { return nil } return err } defer f.Close() p := bufPool.Get().(*[]byte) h := md5.New() _, err = io.CopyBuffer(h, f, *p) bufPool.Put(p) _ = h.Sum(nil) return err }) }) } var benchWalkFunc = flag.String("walkfunc", "fastwalk", "The function to use for BenchmarkWalkComparison") // BenchmarkWalkComparison generates benchmarks using different walk functions // so that the results can be easily compared with `benchcmp` and `benchstat`. func BenchmarkWalkComparison(b *testing.B) { if testing.Short() { b.Skip("Skipping: short test") } switch *benchWalkFunc { case "fastwalk": benchmarkFastWalk(b, nil, nil) case "godirwalk": b.Fatal("comparisons with godirwalk are no longer supported") case "filepath": for i := 0; i < b.N; i++ { err := filepath.WalkDir(*benchDir, func(_ string, _ fs.DirEntry, _ error) error { return nil }) if err != nil { b.Fatal(err) } } default: b.Fatalf("invalid walkfunc: %q", *benchWalkFunc) } } fastwalk-1.0.9/fastwalk_unix.go000066400000000000000000000062661470672632200165660ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris package fastwalk import ( "os" "syscall" "github.com/charlievieth/fastwalk/internal/dirent" ) // More than 5760 to work around https://golang.org/issue/24015. const blockSize = 8192 // unknownFileMode is a sentinel (and bogus) os.FileMode // value used to represent a syscall.DT_UNKNOWN Dirent.Type. const unknownFileMode os.FileMode = ^os.FileMode(0) func (w *walker) readDir(dirName string) error { fd, err := open(dirName, 0, 0) if err != nil { return &os.PathError{Op: "open", Path: dirName, Err: err} } defer syscall.Close(fd) var p *[]*unixDirent if w.sortMode != SortNone { p = direntSlicePool.Get().(*[]*unixDirent) } defer putDirentSlice(p) // The buffer must be at least a block long. buf := make([]byte, blockSize) // stack-allocated; doesn't escape bufp := 0 // starting read position in buf nbuf := 0 // end valid data in buf skipFiles := false for { if bufp >= nbuf { bufp = 0 nbuf, err = readDirent(fd, buf) if err != nil { return os.NewSyscallError("readdirent", err) } if nbuf <= 0 { break // exit loop } } consumed, name, typ := dirent.Parse(buf[bufp:nbuf]) bufp += consumed if name == "" || name == "." || name == ".." { continue } // Fallback for filesystems (like old XFS) that don't // support Dirent.Type and have DT_UNKNOWN (0) there // instead. if typ == unknownFileMode { fi, err := os.Lstat(dirName + "/" + name) if err != nil { // It got deleted in the meantime. if os.IsNotExist(err) { continue } return err } typ = fi.Mode() & os.ModeType } if skipFiles && typ.IsRegular() { continue } de := newUnixDirent(dirName, name, typ) if w.sortMode == SortNone { if err := w.onDirEnt(dirName, name, de); err != nil { if err == ErrSkipFiles { skipFiles = true continue } return err } } else { *p = append(*p, de) } } if w.sortMode == SortNone { return nil } dents := *p sortDirents(w.sortMode, dents) for _, d := range dents { d := d if skipFiles && d.typ.IsRegular() { continue } if err := w.onDirEnt(dirName, d.Name(), d); err != nil { if err != ErrSkipFiles { return err } skipFiles = true } } return nil } // According to https://golang.org/doc/go1.14#runtime // A consequence of the implementation of preemption is that on Unix systems, including Linux and macOS // systems, programs built with Go 1.14 will receive more signals than programs built with earlier releases. // // This causes syscall.Open and syscall.ReadDirent sometimes fail with EINTR errors. // We need to retry in this case. func open(path string, mode int, perm uint32) (fd int, err error) { for { fd, err := syscall.Open(path, mode, perm) if err != syscall.EINTR { return fd, err } } } func readDirent(fd int, buf []byte) (n int, err error) { for { nbuf, err := syscall.ReadDirent(fd, buf) if err != syscall.EINTR { return nbuf, err } } } fastwalk-1.0.9/go.LICENSE000066400000000000000000000027071470672632200147650ustar00rootroot00000000000000Copyright (c) 2009 The Go Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fastwalk-1.0.9/go.mod000066400000000000000000000001341470672632200144520ustar00rootroot00000000000000module github.com/charlievieth/fastwalk go 1.20 retract v1.0.7 // Build broken on Go 1.20 fastwalk-1.0.9/internal/000077500000000000000000000000001470672632200151625ustar00rootroot00000000000000fastwalk-1.0.9/internal/dirent/000077500000000000000000000000001470672632200164475ustar00rootroot00000000000000fastwalk-1.0.9/internal/dirent/dirent.go000066400000000000000000000061351470672632200202700ustar00rootroot00000000000000//go:build aix || dragonfly || freebsd || (js && wasm) || linux || netbsd || openbsd || solaris package dirent import ( "os" "runtime" "syscall" "unsafe" ) // readInt returns the size-bytes unsigned integer in native byte order at offset off. func readInt(b []byte, off, size uintptr) (u uint64, ok bool) { if len(b) < int(off+size) { return 0, false } if isBigEndian { return readIntBE(b[off:], size), true } return readIntLE(b[off:], size), true } func readIntBE(b []byte, size uintptr) uint64 { switch size { case 1: return uint64(b[0]) case 2: _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808 return uint64(b[1]) | uint64(b[0])<<8 case 4: _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808 return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24 case 8: _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 | uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56 default: panic("syscall: readInt with unsupported size") } } func readIntLE(b []byte, size uintptr) uint64 { switch size { case 1: return uint64(b[0]) case 2: _ = b[1] // bounds check hint to compiler; see golang.org/issue/14808 return uint64(b[0]) | uint64(b[1])<<8 case 4: _ = b[3] // bounds check hint to compiler; see golang.org/issue/14808 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 case 8: _ = b[7] // bounds check hint to compiler; see golang.org/issue/14808 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 default: panic("syscall: readInt with unsupported size") } } const InvalidMode = os.FileMode(1<<32 - 1) func Parse(buf []byte) (consumed int, name string, typ os.FileMode) { reclen, ok := direntReclen(buf) if !ok || reclen > uint64(len(buf)) { // WARN: this is a hard error because we consumed 0 bytes // and not stopping here could lead to an infinite loop. return 0, "", InvalidMode } consumed = int(reclen) rec := buf[:reclen] ino, ok := direntIno(rec) if !ok { return consumed, "", InvalidMode } // When building to wasip1, the host runtime might be running on Windows // or might expose a remote file system which does not have the concept // of inodes. Therefore, we cannot make the assumption that it is safe // to skip entries with zero inodes. if ino == 0 && runtime.GOOS != "wasip1" { return consumed, "", InvalidMode } typ = direntType(buf) const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name)) namlen, ok := direntNamlen(rec) if !ok || namoff+namlen > uint64(len(rec)) { return consumed, "", InvalidMode } namebuf := rec[namoff : namoff+namlen] for i, c := range namebuf { if c == 0 { namebuf = namebuf[:i] break } } // Check for useless names before allocating a string. if string(namebuf) == "." { name = "." } else if string(namebuf) == ".." { name = ".." } else { name = string(namebuf) } return consumed, name, typ } fastwalk-1.0.9/internal/dirent/dirent_aix.go000066400000000000000000000011671470672632200211310ustar00rootroot00000000000000//go:build aix package dirent import ( "os" "syscall" "unsafe" ) func direntIno(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino)) } func direntReclen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen)) } func direntNamlen(buf []byte) (uint64, bool) { reclen, ok := direntReclen(buf) if !ok { return 0, false } return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true } func direntType(buf []byte) os.FileMode { return ^os.FileMode(0) // unknown } fastwalk-1.0.9/internal/dirent/dirent_dragonfly.go000066400000000000000000000022741470672632200223350ustar00rootroot00000000000000//go:build dragonfly package dirent import ( "os" "syscall" "unsafe" ) func direntIno(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Fileno), unsafe.Sizeof(syscall.Dirent{}.Fileno)) } func direntReclen(buf []byte) (uint64, bool) { namlen, ok := direntNamlen(buf) if !ok { return 0, false } return (16 + namlen + 1 + 7) &^ 7, true } func direntNamlen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen)) } func direntType(buf []byte) os.FileMode { off := unsafe.Offsetof(syscall.Dirent{}.Type) if off >= uintptr(len(buf)) { return ^os.FileMode(0) // unknown } typ := buf[off] switch typ { case syscall.DT_BLK: return os.ModeDevice case syscall.DT_CHR: return os.ModeDevice | os.ModeCharDevice case syscall.DT_DBF: // DT_DBF is "database record file". // fillFileStatFromSys treats as regular file. return 0 case syscall.DT_DIR: return os.ModeDir case syscall.DT_FIFO: return os.ModeNamedPipe case syscall.DT_LNK: return os.ModeSymlink case syscall.DT_REG: return 0 case syscall.DT_SOCK: return os.ModeSocket } return ^os.FileMode(0) // unknown } fastwalk-1.0.9/internal/dirent/dirent_freebsd.go000066400000000000000000000020771470672632200217630ustar00rootroot00000000000000//go:build freebsd package dirent import ( "os" "syscall" "unsafe" ) func direntIno(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Fileno), unsafe.Sizeof(syscall.Dirent{}.Fileno)) } func direntReclen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen)) } func direntNamlen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen)) } func direntType(buf []byte) os.FileMode { off := unsafe.Offsetof(syscall.Dirent{}.Type) if off >= uintptr(len(buf)) { return ^os.FileMode(0) // unknown } typ := buf[off] switch typ { case syscall.DT_BLK: return os.ModeDevice case syscall.DT_CHR: return os.ModeDevice | os.ModeCharDevice case syscall.DT_DIR: return os.ModeDir case syscall.DT_FIFO: return os.ModeNamedPipe case syscall.DT_LNK: return os.ModeSymlink case syscall.DT_REG: return 0 case syscall.DT_SOCK: return os.ModeSocket } return ^os.FileMode(0) // unknown } fastwalk-1.0.9/internal/dirent/dirent_js.go000066400000000000000000000010261470672632200207560ustar00rootroot00000000000000package dirent import ( "os" "syscall" "unsafe" ) func direntIno(buf []byte) (uint64, bool) { return 1, true } func direntReclen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen)) } func direntNamlen(buf []byte) (uint64, bool) { reclen, ok := direntReclen(buf) if !ok { return 0, false } return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true } func direntType(buf []byte) os.FileMode { return ^os.FileMode(0) // unknown } fastwalk-1.0.9/internal/dirent/dirent_linux.go000066400000000000000000000021261470672632200215030ustar00rootroot00000000000000//go:build linux package dirent import ( "os" "syscall" "unsafe" ) func direntIno(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino)) } func direntReclen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen)) } func direntNamlen(buf []byte) (uint64, bool) { reclen, ok := direntReclen(buf) if !ok { return 0, false } return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true } func direntType(buf []byte) os.FileMode { off := unsafe.Offsetof(syscall.Dirent{}.Type) if off >= uintptr(len(buf)) { return ^os.FileMode(0) // unknown } typ := buf[off] switch typ { case syscall.DT_BLK: return os.ModeDevice case syscall.DT_CHR: return os.ModeDevice | os.ModeCharDevice case syscall.DT_DIR: return os.ModeDir case syscall.DT_FIFO: return os.ModeNamedPipe case syscall.DT_LNK: return os.ModeSymlink case syscall.DT_REG: return 0 case syscall.DT_SOCK: return os.ModeSocket } return ^os.FileMode(0) // unknown } fastwalk-1.0.9/internal/dirent/dirent_netbsd.go000066400000000000000000000020761470672632200216270ustar00rootroot00000000000000//go:build netbsd package dirent import ( "os" "syscall" "unsafe" ) func direntIno(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Fileno), unsafe.Sizeof(syscall.Dirent{}.Fileno)) } func direntReclen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen)) } func direntNamlen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen)) } func direntType(buf []byte) os.FileMode { off := unsafe.Offsetof(syscall.Dirent{}.Type) if off >= uintptr(len(buf)) { return ^os.FileMode(0) // unknown } typ := buf[off] switch typ { case syscall.DT_BLK: return os.ModeDevice case syscall.DT_CHR: return os.ModeDevice | os.ModeCharDevice case syscall.DT_DIR: return os.ModeDir case syscall.DT_FIFO: return os.ModeNamedPipe case syscall.DT_LNK: return os.ModeSymlink case syscall.DT_REG: return 0 case syscall.DT_SOCK: return os.ModeSocket } return ^os.FileMode(0) // unknown } fastwalk-1.0.9/internal/dirent/dirent_openbsd.go000066400000000000000000000020771470672632200220030ustar00rootroot00000000000000//go:build openbsd package dirent import ( "os" "syscall" "unsafe" ) func direntIno(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Fileno), unsafe.Sizeof(syscall.Dirent{}.Fileno)) } func direntReclen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen)) } func direntNamlen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen)) } func direntType(buf []byte) os.FileMode { off := unsafe.Offsetof(syscall.Dirent{}.Type) if off >= uintptr(len(buf)) { return ^os.FileMode(0) // unknown } typ := buf[off] switch typ { case syscall.DT_BLK: return os.ModeDevice case syscall.DT_CHR: return os.ModeDevice | os.ModeCharDevice case syscall.DT_DIR: return os.ModeDir case syscall.DT_FIFO: return os.ModeNamedPipe case syscall.DT_LNK: return os.ModeSymlink case syscall.DT_REG: return 0 case syscall.DT_SOCK: return os.ModeSocket } return ^os.FileMode(0) // unknown } fastwalk-1.0.9/internal/dirent/dirent_solaris.go000066400000000000000000000011731470672632200220210ustar00rootroot00000000000000//go:build solaris package dirent import ( "os" "syscall" "unsafe" ) func direntIno(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino)) } func direntReclen(buf []byte) (uint64, bool) { return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen)) } func direntNamlen(buf []byte) (uint64, bool) { reclen, ok := direntReclen(buf) if !ok { return 0, false } return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true } func direntType(buf []byte) os.FileMode { return ^os.FileMode(0) // unknown } fastwalk-1.0.9/internal/dirent/doc.go000066400000000000000000000000741470672632200175440ustar00rootroot00000000000000// Package dirent parses raw syscall dirents package dirent fastwalk-1.0.9/internal/dirent/endian_big.go000066400000000000000000000005141470672632200210550ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // //go:build armbe || arm64be || m68k || mips || mips64 || mips64p32 || ppc || ppc64 || s390 || s390x || shbe || sparc || sparc64 package dirent const isBigEndian = true fastwalk-1.0.9/internal/dirent/endian_little.go000066400000000000000000000005601470672632200216120ustar00rootroot00000000000000// Copyright 2016 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // //go:build 386 || amd64 || amd64p32 || alpha || arm || arm64 || loong64 || mipsle || mips64le || mips64p32le || nios2 || ppc64le || riscv || riscv64 || sh || wasm package dirent const isBigEndian = false fastwalk-1.0.9/internal/fmtdirent/000077500000000000000000000000001470672632200171565ustar00rootroot00000000000000fastwalk-1.0.9/internal/fmtdirent/fmtdirent_go120.go000066400000000000000000000013311470672632200224070ustar00rootroot00000000000000//go:build !go1.21 package fmtdirent import "io/fs" // Backport fs.FormatDirEntry from go1.21 // FormatDirEntry returns a formatted version of dir for human readability. // Implementations of [DirEntry] can call this from a String method. // The outputs for a directory named subdir and a file named hello.go are: // // d subdir/ // - hello.go func FormatDirEntry(dir fs.DirEntry) string { name := dir.Name() b := make([]byte, 0, 5+len(name)) // The Type method does not return any permission bits, // so strip them from the string. mode := dir.Type().String() mode = mode[:len(mode)-9] b = append(b, mode...) b = append(b, ' ') b = append(b, name...) if dir.IsDir() { b = append(b, '/') } return string(b) } fastwalk-1.0.9/internal/fmtdirent/fmtdirent_go120_test.go000066400000000000000000000045001470672632200234470ustar00rootroot00000000000000//go:build !go1.21 // Copyright 2023 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Backport fs.FormatDirEntry tests from go1.21. We don't test // the go1.21+ FormatDirEntry function since it just calls the // stdlib and we don't want changes in its output to break our // tests. package fmtdirent_test import ( . "io/fs" "testing" "time" "github.com/charlievieth/fastwalk/internal/fmtdirent" ) // formatTest implements FileInfo to test FormatFileInfo, // and implements DirEntry to test FormatDirEntry. type formatTest struct { name string size int64 mode FileMode modTime time.Time isDir bool } func (fs *formatTest) Name() string { return fs.name } func (fs *formatTest) Size() int64 { return fs.size } func (fs *formatTest) Mode() FileMode { return fs.mode } func (fs *formatTest) ModTime() time.Time { return fs.modTime } func (fs *formatTest) IsDir() bool { return fs.isDir } func (fs *formatTest) Sys() any { return nil } func (fs *formatTest) Type() FileMode { return fs.mode.Type() } func (fs *formatTest) Info() (FileInfo, error) { return fs, nil } var formatTests = []struct { input formatTest wantDirEntry string }{ { formatTest{ name: "hello.go", size: 100, mode: 0o644, modTime: time.Date(1970, time.January, 1, 12, 0, 0, 0, time.UTC), isDir: false, }, "- hello.go", }, { formatTest{ name: "home/gopher", size: 0, mode: ModeDir | 0o755, modTime: time.Date(1970, time.January, 1, 12, 0, 0, 0, time.UTC), isDir: true, }, "d home/gopher/", }, { formatTest{ name: "big", size: 0x7fffffffffffffff, mode: ModeIrregular | 0o644, modTime: time.Date(1970, time.January, 1, 12, 0, 0, 0, time.UTC), isDir: false, }, "? big", }, { formatTest{ name: "small", size: -0x8000000000000000, mode: ModeSocket | ModeSetuid | 0o644, modTime: time.Date(1970, time.January, 1, 12, 0, 0, 0, time.UTC), isDir: false, }, "S small", }, } func TestFormatDirEntry(t *testing.T) { for i, test := range formatTests { got := fmtdirent.FormatDirEntry(&test.input) if got != test.wantDirEntry { t.Errorf("%d: FormatDirEntry(%#v) = %q, want %q", i, test.input, got, test.wantDirEntry) } } } fastwalk-1.0.9/internal/fmtdirent/fmtdirent_go121.go000066400000000000000000000005771470672632200224230ustar00rootroot00000000000000//go:build go1.21 package fmtdirent import "io/fs" // FormatDirEntry returns a formatted version of dir for human readability. // Implementations of [DirEntry] can call this from a String method. // The outputs for a directory named subdir and a file named hello.go are: // // d subdir/ // - hello.go func FormatDirEntry(dir fs.DirEntry) string { return fs.FormatDirEntry(dir) } fastwalk-1.0.9/scripts/000077500000000000000000000000001470672632200150355ustar00rootroot00000000000000fastwalk-1.0.9/scripts/.gitignore000066400000000000000000000000221470672632200170170ustar00rootroot00000000000000/bench_comp *.exe fastwalk-1.0.9/scripts/bench_comp.bash000077500000000000000000000016431470672632200200000ustar00rootroot00000000000000#!/usr/bin/env bash set -euo pipefail COUNT=5 TESTS=( 'filepath' 'godirwalk' 'fastwalk' ) ROOT="$(go env GOROOT)" if [[ ! -d "${ROOT}" ]]; then echo >&2 "error: GOROOT (\"${ROOT}\") does not exist and is required to run benchmarks" exit 1 fi TEST_FLAGS=( -run '^$' # skip all tests -bench '^BenchmarkWalkComparison$' -benchmem -count "${COUNT}" ) TMP="$(mktemp -d -t fastwalk-bench.XXXXXX)" for name in "${TESTS[@]}"; do echo "## ${name}" go test "${TEST_FLAGS[@]}" github.com/charlievieth/fastwalk -walkfunc "${name}" | tee "${TMP}/${name}.out" echo '' done echo '## Comparisons' echo '########################################################' echo '' echo '## filepath vs. fastwalk' benchstat "${TMP}/filepath.out" "${TMP}/fastwalk.out" echo '' echo '## godirwalk vs. fastwalk' benchstat "${TMP}/godirwalk.out" "${TMP}/fastwalk.out" echo '' echo "## Temp: ${TMP}" fastwalk-1.0.9/scripts/bench_comp.go000066400000000000000000000037041470672632200174650ustar00rootroot00000000000000package main import ( "flag" "fmt" "io" "io/ioutil" "log" "os" "os/exec" "path/filepath" "strconv" ) func init() { log.SetOutput(os.Stderr) log.SetFlags(log.LstdFlags | log.Lshortfile) } var Tests = []string{ "filepath", "fastwalk", } func main() { count := flag.Int("count", 5, "Run each test and benchmark n times") compCmd := flag.String("comp", "benchstat", "Benchmark comparison command") flag.Parse() if _, err := exec.LookPath(*compCmd); err != nil { log.Fatalf("error: %v: %q\n", err, *compCmd) } tmpdir, err := ioutil.TempDir("", "fastwalk-bench.*") if err != nil { log.Fatal(err) } runTest := func(name string) error { fmt.Println("##", name) filename := filepath.Join(tmpdir, name+".txt") f, err := os.Create(filename) if err != nil { log.Fatal(err) } defer f.Close() args := []string{ "test", "-run", `^$`, "-bench", `^BenchmarkWalkComparison$`, "-benchmem", "-count", strconv.Itoa(*count), "github.com/charlievieth/fastwalk", "-walkfunc", name, } cmd := exec.Command("go", args...) cmd.Stderr = os.Stderr cmd.Stdout = io.MultiWriter(os.Stdout, f) if err := cmd.Run(); err != nil { log.Fatalf("error running command: %q: %v\n", cmd.Args, err) } if err := f.Close(); err != nil { log.Fatal(err) } fmt.Print("\n") return nil } for _, name := range Tests { runTest(name) } benchStat := func(from, to string) { fmt.Printf("## %s vs. %s\n", from, to) cmd := exec.Command(*compCmd, filepath.Join(tmpdir, from+".txt"), filepath.Join(tmpdir, to+".txt"), ) cmd.Stderr = os.Stderr cmd.Stdout = os.Stdout if err := cmd.Run(); err != nil { log.Fatalf("error running command: %q: %v\n", cmd.Args, err) } fmt.Print("\n") } fmt.Println("## Comparisons") fmt.Println("########################################################") fmt.Print("\n") benchStat("filepath", "fastwalk") benchStat("godirwalk", "fastwalk") fmt.Printf("Temp: %s\n", tmpdir) } fastwalk-1.0.9/zsyscall_darwin.go000066400000000000000000000045771470672632200171220ustar00rootroot00000000000000//go:build darwin && go1.13 // +build darwin,go1.13 package fastwalk import ( "strings" "syscall" "unsafe" ) // TODO: consider using "go linkname" for everything but "opendir" which is not // implemented in the stdlib // Implemented in the runtime package (runtime/sys_darwin.go) func syscall_syscall(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) func syscall_syscallPtr(fn, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) //go:linkname syscall_syscall syscall.syscall //go:linkname syscall_syscallPtr syscall.syscallPtr func closedir(dir uintptr) (err error) { _, _, e1 := syscall_syscall(libc_closedir_trampoline_addr, dir, 0, 0) if e1 != 0 { err = errnoErr(e1) } return } var libc_closedir_trampoline_addr uintptr //go:cgo_import_dynamic libc_closedir closedir "/usr/lib/libSystem.B.dylib" func readdir_r(dir uintptr, entry *syscall.Dirent, result **syscall.Dirent) syscall.Errno { res, _, _ := syscall_syscall(libc_readdir_r_trampoline_addr, dir, uintptr(unsafe.Pointer(entry)), uintptr(unsafe.Pointer(result))) return syscall.Errno(res) } var libc_readdir_r_trampoline_addr uintptr //go:cgo_import_dynamic libc_readdir_r readdir_r "/usr/lib/libSystem.B.dylib" func opendir(path string) (dir uintptr, err error) { // We implent opendir so that we don't have to open a file, duplicate // it's FD, then call fdopendir with it. const maxPath = len(syscall.Dirent{}.Name) // Tested by TestFastWalk_LongPath var buf [maxPath]byte if len(path) >= len(buf) { return 0, errEINVAL } if strings.IndexByte(path, 0) != -1 { return 0, errEINVAL } copy(buf[:], path) buf[len(path)] = 0 r0, _, e1 := syscall_syscallPtr(libc_opendir_trampoline_addr, uintptr(unsafe.Pointer(&buf[0])), 0, 0) if e1 != 0 { err = errnoErr(e1) } return r0, err } var libc_opendir_trampoline_addr uintptr //go:cgo_import_dynamic libc_opendir opendir "/usr/lib/libSystem.B.dylib" // Copied from syscall/syscall_unix.go // Do the interface allocations only once for common // Errno values. var ( errEAGAIN error = syscall.EAGAIN errEINVAL error = syscall.EINVAL errENOENT error = syscall.ENOENT ) // errnoErr returns common boxed Errno values, to prevent // allocations at runtime. func errnoErr(e syscall.Errno) error { switch e { case 0: return nil case syscall.EAGAIN: return errEAGAIN case syscall.EINVAL: return errEINVAL case syscall.ENOENT: return errENOENT } return e } fastwalk-1.0.9/zsyscall_darwin_amd64.1_13.s000066400000000000000000000012361470672632200204010ustar00rootroot00000000000000//go:build go1.13 // +build go1.13 #include "textflag.h" TEXT libc_closedir_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_closedir(SB) GLOBL ·libc_closedir_trampoline_addr(SB), RODATA, $8 DATA ·libc_closedir_trampoline_addr(SB)/8, $libc_closedir_trampoline<>(SB) TEXT libc_readdir_r_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_readdir_r(SB) GLOBL ·libc_readdir_r_trampoline_addr(SB), RODATA, $8 DATA ·libc_readdir_r_trampoline_addr(SB)/8, $libc_readdir_r_trampoline<>(SB) TEXT libc_opendir_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_opendir(SB) GLOBL ·libc_opendir_trampoline_addr(SB), RODATA, $8 DATA ·libc_opendir_trampoline_addr(SB)/8, $libc_opendir_trampoline<>(SB) fastwalk-1.0.9/zsyscall_darwin_arm64.1_13.s000066400000000000000000000012361470672632200204170ustar00rootroot00000000000000//go:build go1.13 // +build go1.13 #include "textflag.h" TEXT libc_closedir_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_closedir(SB) GLOBL ·libc_closedir_trampoline_addr(SB), RODATA, $8 DATA ·libc_closedir_trampoline_addr(SB)/8, $libc_closedir_trampoline<>(SB) TEXT libc_readdir_r_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_readdir_r(SB) GLOBL ·libc_readdir_r_trampoline_addr(SB), RODATA, $8 DATA ·libc_readdir_r_trampoline_addr(SB)/8, $libc_readdir_r_trampoline<>(SB) TEXT libc_opendir_trampoline<>(SB),NOSPLIT,$0-0 JMP libc_opendir(SB) GLOBL ·libc_opendir_trampoline_addr(SB), RODATA, $8 DATA ·libc_opendir_trampoline_addr(SB)/8, $libc_opendir_trampoline<>(SB)