123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368 |
- package godirwalk
- import (
- "os"
- "path/filepath"
- "sort"
- "github.com/pkg/errors"
- )
- // DefaultScratchBufferSize specifies the size of the scratch buffer that will
- // be allocated by Walk, ReadDirents, or ReadDirnames when a scratch buffer is
- // not provided or the scratch buffer that is provided is smaller than
- // MinimumScratchBufferSize bytes. This may seem like a large value; however,
- // when a program intends to enumerate large directories, having a larger
- // scratch buffer results in fewer operating system calls.
- const DefaultScratchBufferSize = 64 * 1024
- // MinimumScratchBufferSize specifies the minimum size of the scratch buffer
- // that Walk, ReadDirents, and ReadDirnames will use when reading file entries
- // from the operating system. It is initialized to the result from calling
- // `os.Getpagesize()` during program startup.
- var MinimumScratchBufferSize int
- func init() {
- MinimumScratchBufferSize = os.Getpagesize()
- }
- // Options provide parameters for how the Walk function operates.
- type Options struct {
- // ErrorCallback specifies a function to be invoked in the case of an error
- // that could potentially be ignored while walking a file system
- // hierarchy. When set to nil or left as its zero-value, any error condition
- // causes Walk to immediately return the error describing what took
- // place. When non-nil, this user supplied function is invoked with the OS
- // pathname of the file system object that caused the error along with the
- // error that took place. The return value of the supplied ErrorCallback
- // function determines whether the error will cause Walk to halt immediately
- // as it would were no ErrorCallback value provided, or skip this file
- // system node yet continue on with the remaining nodes in the file system
- // hierarchy.
- //
- // ErrorCallback is invoked both for errors that are returned by the
- // runtime, and for errors returned by other user supplied callback
- // functions.
- ErrorCallback func(string, error) ErrorAction
- // FollowSymbolicLinks specifies whether Walk will follow symbolic links
- // that refer to directories. When set to false or left as its zero-value,
- // Walk will still invoke the callback function with symbolic link nodes,
- // but if the symbolic link refers to a directory, it will not recurse on
- // that directory. When set to true, Walk will recurse on symbolic links
- // that refer to a directory.
- FollowSymbolicLinks bool
- // Unsorted controls whether or not Walk will sort the immediate descendants
- // of a directory by their relative names prior to visiting each of those
- // entries.
- //
- // When set to false or left at its zero-value, Walk will get the list of
- // immediate descendants of a particular directory, sort that list by
- // lexical order of their names, and then visit each node in the list in
- // sorted order. This will cause Walk to always traverse the same directory
- // tree in the same order, however may be inefficient for directories with
- // many immediate descendants.
- //
- // When set to true, Walk skips sorting the list of immediate descendants
- // for a directory, and simply visits each node in the order the operating
- // system enumerated them. This will be more fast, but with the side effect
- // that the traversal order may be different from one invocation to the
- // next.
- Unsorted bool
- // Callback is a required function that Walk will invoke for every file
- // system node it encounters.
- Callback WalkFunc
- // PostChildrenCallback is an option function that Walk will invoke for
- // every file system directory it encounters after its children have been
- // processed.
- PostChildrenCallback WalkFunc
- // ScratchBuffer is an optional byte slice to use as a scratch buffer for
- // Walk to use when reading directory entries, to reduce amount of garbage
- // generation. Not all architectures take advantage of the scratch
- // buffer. If omitted or the provided buffer has fewer bytes than
- // MinimumScratchBufferSize, then a buffer with DefaultScratchBufferSize
- // bytes will be created and used once per Walk invocation.
- ScratchBuffer []byte
- }
- // ErrorAction defines a set of actions the Walk function could take based on
- // the occurrence of an error while walking the file system. See the
- // documentation for the ErrorCallback field of the Options structure for more
- // information.
- type ErrorAction int
- const (
- // Halt is the ErrorAction return value when the upstream code wants to halt
- // the walk process when a runtime error takes place. It matches the default
- // action the Walk function would take were no ErrorCallback provided.
- Halt ErrorAction = iota
- // SkipNode is the ErrorAction return value when the upstream code wants to
- // ignore the runtime error for the current file system node, skip
- // processing of the node that caused the error, and continue walking the
- // file system hierarchy with the remaining nodes.
- SkipNode
- )
- // WalkFunc is the type of the function called for each file system node visited
- // by Walk. The pathname argument will contain the argument to Walk as a prefix;
- // that is, if Walk is called with "dir", which is a directory containing the
- // file "a", the provided WalkFunc will be invoked with the argument "dir/a",
- // using the correct os.PathSeparator for the Go Operating System architecture,
- // GOOS. The directory entry argument is a pointer to a Dirent for the node,
- // providing access to both the basename and the mode type of the file system
- // node.
- //
- // If an error is returned by the Callback or PostChildrenCallback functions,
- // and no ErrorCallback function is provided, processing stops. If an
- // ErrorCallback function is provided, then it is invoked with the OS pathname
- // of the node that caused the error along along with the error. The return
- // value of the ErrorCallback function determines whether to halt processing, or
- // skip this node and continue processing remaining file system nodes.
- //
- // The exception is when the function returns the special value
- // filepath.SkipDir. If the function returns filepath.SkipDir when invoked on a
- // directory, Walk skips the directory's contents entirely. If the function
- // returns filepath.SkipDir when invoked on a non-directory file system node,
- // Walk skips the remaining files in the containing directory. Note that any
- // supplied ErrorCallback function is not invoked with filepath.SkipDir when the
- // Callback or PostChildrenCallback functions return that special value.
- type WalkFunc func(osPathname string, directoryEntry *Dirent) error
- // Walk walks the file tree rooted at the specified directory, calling the
- // specified callback function for each file system node in the tree, including
- // root, symbolic links, and other node types. The nodes are walked in lexical
- // order, which makes the output deterministic but means that for very large
- // directories this function can be inefficient.
- //
- // This function is often much faster than filepath.Walk because it does not
- // invoke os.Stat for every node it encounters, but rather obtains the file
- // system node type when it reads the parent directory.
- //
- // If a runtime error occurs, either from the operating system or from the
- // upstream Callback or PostChildrenCallback functions, processing typically
- // halts. However, when an ErrorCallback function is provided in the provided
- // Options structure, that function is invoked with the error along with the OS
- // pathname of the file system node that caused the error. The ErrorCallback
- // function's return value determines the action that Walk will then take.
- //
- // func main() {
- // dirname := "."
- // if len(os.Args) > 1 {
- // dirname = os.Args[1]
- // }
- // err := godirwalk.Walk(dirname, &godirwalk.Options{
- // Callback: func(osPathname string, de *godirwalk.Dirent) error {
- // fmt.Printf("%s %s\n", de.ModeType(), osPathname)
- // return nil
- // },
- // ErrorCallback: func(osPathname string, err error) godirwalk.ErrorAction {
- // // Your program may want to log the error somehow.
- // fmt.Fprintf(os.Stderr, "ERROR: %s\n", err)
- //
- // // For the purposes of this example, a simple SkipNode will suffice,
- // // although in reality perhaps additional logic might be called for.
- // return godirwalk.SkipNode
- // },
- // })
- // if err != nil {
- // fmt.Fprintf(os.Stderr, "%s\n", err)
- // os.Exit(1)
- // }
- // }
- func Walk(pathname string, options *Options) error {
- pathname = filepath.Clean(pathname)
- var fi os.FileInfo
- var err error
- if options.FollowSymbolicLinks {
- fi, err = os.Stat(pathname)
- if err != nil {
- return errors.Wrap(err, "cannot Stat")
- }
- } else {
- fi, err = os.Lstat(pathname)
- if err != nil {
- return errors.Wrap(err, "cannot Lstat")
- }
- }
- mode := fi.Mode()
- if mode&os.ModeDir == 0 {
- return errors.Errorf("cannot Walk non-directory: %s", pathname)
- }
- dirent := &Dirent{
- name: filepath.Base(pathname),
- modeType: mode & os.ModeType,
- }
- // If ErrorCallback is nil, set to a default value that halts the walk
- // process on all operating system errors. This is done to allow error
- // handling to be more succinct in the walk code.
- if options.ErrorCallback == nil {
- options.ErrorCallback = defaultErrorCallback
- }
- if len(options.ScratchBuffer) < MinimumScratchBufferSize {
- options.ScratchBuffer = make([]byte, DefaultScratchBufferSize)
- }
- err = walk(pathname, dirent, options)
- if err == filepath.SkipDir {
- return nil // silence SkipDir for top level
- }
- return err
- }
- // defaultErrorCallback always returns Halt because if the upstream code did not
- // provide an ErrorCallback function, walking the file system hierarchy ought to
- // halt upon any operating system error.
- func defaultErrorCallback(_ string, _ error) ErrorAction { return Halt }
- // walk recursively traverses the file system node specified by pathname and the
- // Dirent.
- func walk(osPathname string, dirent *Dirent, options *Options) error {
- err := options.Callback(osPathname, dirent)
- if err != nil {
- if err == filepath.SkipDir {
- return err
- }
- err = errors.Wrap(err, "Callback") // wrap potential errors returned by callback
- if action := options.ErrorCallback(osPathname, err); action == SkipNode {
- return nil
- }
- return err
- }
- // On some platforms, an entry can have more than one mode type bit set.
- // For instance, it could have both the symlink bit and the directory bit
- // set indicating it's a symlink to a directory.
- if dirent.IsSymlink() {
- if !options.FollowSymbolicLinks {
- return nil
- }
- // Only need to Stat entry if platform did not already have os.ModeDir
- // set, such as would be the case for unix like operating systems. (This
- // guard eliminates extra os.Stat check on Windows.)
- if !dirent.IsDir() {
- referent, err := os.Readlink(osPathname)
- if err != nil {
- err = errors.Wrap(err, "cannot Readlink")
- if action := options.ErrorCallback(osPathname, err); action == SkipNode {
- return nil
- }
- return err
- }
- var osp string
- if filepath.IsAbs(referent) {
- osp = referent
- } else {
- osp = filepath.Join(filepath.Dir(osPathname), referent)
- }
- fi, err := os.Stat(osp)
- if err != nil {
- err = errors.Wrap(err, "cannot Stat")
- if action := options.ErrorCallback(osp, err); action == SkipNode {
- return nil
- }
- return err
- }
- dirent.modeType = fi.Mode() & os.ModeType
- }
- }
- if !dirent.IsDir() {
- return nil
- }
- // If get here, then specified pathname refers to a directory.
- deChildren, err := ReadDirents(osPathname, options.ScratchBuffer)
- if err != nil {
- err = errors.Wrap(err, "cannot ReadDirents")
- if action := options.ErrorCallback(osPathname, err); action == SkipNode {
- return nil
- }
- return err
- }
- if !options.Unsorted {
- sort.Sort(deChildren) // sort children entries unless upstream says to leave unsorted
- }
- for _, deChild := range deChildren {
- osChildname := filepath.Join(osPathname, deChild.name)
- err = walk(osChildname, deChild, options)
- if err != nil {
- if err != filepath.SkipDir {
- return err
- }
- // If received skipdir on a directory, stop processing that
- // directory, but continue to its siblings. If received skipdir on a
- // non-directory, stop processing remaining siblings.
- if deChild.IsSymlink() {
- // Only need to Stat entry if platform did not already have
- // os.ModeDir set, such as would be the case for unix like
- // operating systems. (This guard eliminates extra os.Stat check
- // on Windows.)
- if !deChild.IsDir() {
- // Resolve symbolic link referent to determine whether node
- // is directory or not.
- referent, err := os.Readlink(osChildname)
- if err != nil {
- err = errors.Wrap(err, "cannot Readlink")
- if action := options.ErrorCallback(osChildname, err); action == SkipNode {
- continue // with next child
- }
- return err
- }
- var osp string
- if filepath.IsAbs(referent) {
- osp = referent
- } else {
- osp = filepath.Join(osPathname, referent)
- }
- fi, err := os.Stat(osp)
- if err != nil {
- err = errors.Wrap(err, "cannot Stat")
- if action := options.ErrorCallback(osp, err); action == SkipNode {
- continue // with next child
- }
- return err
- }
- deChild.modeType = fi.Mode() & os.ModeType
- }
- }
- if !deChild.IsDir() {
- // If not directory, return immediately, thus skipping remainder
- // of siblings.
- return nil
- }
- }
- }
- if options.PostChildrenCallback == nil {
- return nil
- }
- err = options.PostChildrenCallback(osPathname, dirent)
- if err == nil || err == filepath.SkipDir {
- return err
- }
- err = errors.Wrap(err, "PostChildrenCallback") // wrap potential errors returned by callback
- if action := options.ErrorCallback(osPathname, err); action == SkipNode {
- return nil
- }
- return err
- }
|