walk.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. package godirwalk
  2. import (
  3. "os"
  4. "path/filepath"
  5. "sort"
  6. "github.com/pkg/errors"
  7. )
  8. // DefaultScratchBufferSize specifies the size of the scratch buffer that will
  9. // be allocated by Walk, ReadDirents, or ReadDirnames when a scratch buffer is
  10. // not provided or the scratch buffer that is provided is smaller than
  11. // MinimumScratchBufferSize bytes. This may seem like a large value; however,
  12. // when a program intends to enumerate large directories, having a larger
  13. // scratch buffer results in fewer operating system calls.
  14. const DefaultScratchBufferSize = 64 * 1024
  15. // MinimumScratchBufferSize specifies the minimum size of the scratch buffer
  16. // that Walk, ReadDirents, and ReadDirnames will use when reading file entries
  17. // from the operating system. It is initialized to the result from calling
  18. // `os.Getpagesize()` during program startup.
  19. var MinimumScratchBufferSize int
  20. func init() {
  21. MinimumScratchBufferSize = os.Getpagesize()
  22. }
  23. // Options provide parameters for how the Walk function operates.
  24. type Options struct {
  25. // ErrorCallback specifies a function to be invoked in the case of an error
  26. // that could potentially be ignored while walking a file system
  27. // hierarchy. When set to nil or left as its zero-value, any error condition
  28. // causes Walk to immediately return the error describing what took
  29. // place. When non-nil, this user supplied function is invoked with the OS
  30. // pathname of the file system object that caused the error along with the
  31. // error that took place. The return value of the supplied ErrorCallback
  32. // function determines whether the error will cause Walk to halt immediately
  33. // as it would were no ErrorCallback value provided, or skip this file
  34. // system node yet continue on with the remaining nodes in the file system
  35. // hierarchy.
  36. //
  37. // ErrorCallback is invoked both for errors that are returned by the
  38. // runtime, and for errors returned by other user supplied callback
  39. // functions.
  40. ErrorCallback func(string, error) ErrorAction
  41. // FollowSymbolicLinks specifies whether Walk will follow symbolic links
  42. // that refer to directories. When set to false or left as its zero-value,
  43. // Walk will still invoke the callback function with symbolic link nodes,
  44. // but if the symbolic link refers to a directory, it will not recurse on
  45. // that directory. When set to true, Walk will recurse on symbolic links
  46. // that refer to a directory.
  47. FollowSymbolicLinks bool
  48. // Unsorted controls whether or not Walk will sort the immediate descendants
  49. // of a directory by their relative names prior to visiting each of those
  50. // entries.
  51. //
  52. // When set to false or left at its zero-value, Walk will get the list of
  53. // immediate descendants of a particular directory, sort that list by
  54. // lexical order of their names, and then visit each node in the list in
  55. // sorted order. This will cause Walk to always traverse the same directory
  56. // tree in the same order, however may be inefficient for directories with
  57. // many immediate descendants.
  58. //
  59. // When set to true, Walk skips sorting the list of immediate descendants
  60. // for a directory, and simply visits each node in the order the operating
  61. // system enumerated them. This will be more fast, but with the side effect
  62. // that the traversal order may be different from one invocation to the
  63. // next.
  64. Unsorted bool
  65. // Callback is a required function that Walk will invoke for every file
  66. // system node it encounters.
  67. Callback WalkFunc
  68. // PostChildrenCallback is an option function that Walk will invoke for
  69. // every file system directory it encounters after its children have been
  70. // processed.
  71. PostChildrenCallback WalkFunc
  72. // ScratchBuffer is an optional byte slice to use as a scratch buffer for
  73. // Walk to use when reading directory entries, to reduce amount of garbage
  74. // generation. Not all architectures take advantage of the scratch
  75. // buffer. If omitted or the provided buffer has fewer bytes than
  76. // MinimumScratchBufferSize, then a buffer with DefaultScratchBufferSize
  77. // bytes will be created and used once per Walk invocation.
  78. ScratchBuffer []byte
  79. }
  80. // ErrorAction defines a set of actions the Walk function could take based on
  81. // the occurrence of an error while walking the file system. See the
  82. // documentation for the ErrorCallback field of the Options structure for more
  83. // information.
  84. type ErrorAction int
  85. const (
  86. // Halt is the ErrorAction return value when the upstream code wants to halt
  87. // the walk process when a runtime error takes place. It matches the default
  88. // action the Walk function would take were no ErrorCallback provided.
  89. Halt ErrorAction = iota
  90. // SkipNode is the ErrorAction return value when the upstream code wants to
  91. // ignore the runtime error for the current file system node, skip
  92. // processing of the node that caused the error, and continue walking the
  93. // file system hierarchy with the remaining nodes.
  94. SkipNode
  95. )
  96. // WalkFunc is the type of the function called for each file system node visited
  97. // by Walk. The pathname argument will contain the argument to Walk as a prefix;
  98. // that is, if Walk is called with "dir", which is a directory containing the
  99. // file "a", the provided WalkFunc will be invoked with the argument "dir/a",
  100. // using the correct os.PathSeparator for the Go Operating System architecture,
  101. // GOOS. The directory entry argument is a pointer to a Dirent for the node,
  102. // providing access to both the basename and the mode type of the file system
  103. // node.
  104. //
  105. // If an error is returned by the Callback or PostChildrenCallback functions,
  106. // and no ErrorCallback function is provided, processing stops. If an
  107. // ErrorCallback function is provided, then it is invoked with the OS pathname
  108. // of the node that caused the error along along with the error. The return
  109. // value of the ErrorCallback function determines whether to halt processing, or
  110. // skip this node and continue processing remaining file system nodes.
  111. //
  112. // The exception is when the function returns the special value
  113. // filepath.SkipDir. If the function returns filepath.SkipDir when invoked on a
  114. // directory, Walk skips the directory's contents entirely. If the function
  115. // returns filepath.SkipDir when invoked on a non-directory file system node,
  116. // Walk skips the remaining files in the containing directory. Note that any
  117. // supplied ErrorCallback function is not invoked with filepath.SkipDir when the
  118. // Callback or PostChildrenCallback functions return that special value.
  119. type WalkFunc func(osPathname string, directoryEntry *Dirent) error
  120. // Walk walks the file tree rooted at the specified directory, calling the
  121. // specified callback function for each file system node in the tree, including
  122. // root, symbolic links, and other node types. The nodes are walked in lexical
  123. // order, which makes the output deterministic but means that for very large
  124. // directories this function can be inefficient.
  125. //
  126. // This function is often much faster than filepath.Walk because it does not
  127. // invoke os.Stat for every node it encounters, but rather obtains the file
  128. // system node type when it reads the parent directory.
  129. //
  130. // If a runtime error occurs, either from the operating system or from the
  131. // upstream Callback or PostChildrenCallback functions, processing typically
  132. // halts. However, when an ErrorCallback function is provided in the provided
  133. // Options structure, that function is invoked with the error along with the OS
  134. // pathname of the file system node that caused the error. The ErrorCallback
  135. // function's return value determines the action that Walk will then take.
  136. //
  137. // func main() {
  138. // dirname := "."
  139. // if len(os.Args) > 1 {
  140. // dirname = os.Args[1]
  141. // }
  142. // err := godirwalk.Walk(dirname, &godirwalk.Options{
  143. // Callback: func(osPathname string, de *godirwalk.Dirent) error {
  144. // fmt.Printf("%s %s\n", de.ModeType(), osPathname)
  145. // return nil
  146. // },
  147. // ErrorCallback: func(osPathname string, err error) godirwalk.ErrorAction {
  148. // // Your program may want to log the error somehow.
  149. // fmt.Fprintf(os.Stderr, "ERROR: %s\n", err)
  150. //
  151. // // For the purposes of this example, a simple SkipNode will suffice,
  152. // // although in reality perhaps additional logic might be called for.
  153. // return godirwalk.SkipNode
  154. // },
  155. // })
  156. // if err != nil {
  157. // fmt.Fprintf(os.Stderr, "%s\n", err)
  158. // os.Exit(1)
  159. // }
  160. // }
  161. func Walk(pathname string, options *Options) error {
  162. pathname = filepath.Clean(pathname)
  163. var fi os.FileInfo
  164. var err error
  165. if options.FollowSymbolicLinks {
  166. fi, err = os.Stat(pathname)
  167. if err != nil {
  168. return errors.Wrap(err, "cannot Stat")
  169. }
  170. } else {
  171. fi, err = os.Lstat(pathname)
  172. if err != nil {
  173. return errors.Wrap(err, "cannot Lstat")
  174. }
  175. }
  176. mode := fi.Mode()
  177. if mode&os.ModeDir == 0 {
  178. return errors.Errorf("cannot Walk non-directory: %s", pathname)
  179. }
  180. dirent := &Dirent{
  181. name: filepath.Base(pathname),
  182. modeType: mode & os.ModeType,
  183. }
  184. // If ErrorCallback is nil, set to a default value that halts the walk
  185. // process on all operating system errors. This is done to allow error
  186. // handling to be more succinct in the walk code.
  187. if options.ErrorCallback == nil {
  188. options.ErrorCallback = defaultErrorCallback
  189. }
  190. if len(options.ScratchBuffer) < MinimumScratchBufferSize {
  191. options.ScratchBuffer = make([]byte, DefaultScratchBufferSize)
  192. }
  193. err = walk(pathname, dirent, options)
  194. if err == filepath.SkipDir {
  195. return nil // silence SkipDir for top level
  196. }
  197. return err
  198. }
  199. // defaultErrorCallback always returns Halt because if the upstream code did not
  200. // provide an ErrorCallback function, walking the file system hierarchy ought to
  201. // halt upon any operating system error.
  202. func defaultErrorCallback(_ string, _ error) ErrorAction { return Halt }
  203. // walk recursively traverses the file system node specified by pathname and the
  204. // Dirent.
  205. func walk(osPathname string, dirent *Dirent, options *Options) error {
  206. err := options.Callback(osPathname, dirent)
  207. if err != nil {
  208. if err == filepath.SkipDir {
  209. return err
  210. }
  211. err = errors.Wrap(err, "Callback") // wrap potential errors returned by callback
  212. if action := options.ErrorCallback(osPathname, err); action == SkipNode {
  213. return nil
  214. }
  215. return err
  216. }
  217. // On some platforms, an entry can have more than one mode type bit set.
  218. // For instance, it could have both the symlink bit and the directory bit
  219. // set indicating it's a symlink to a directory.
  220. if dirent.IsSymlink() {
  221. if !options.FollowSymbolicLinks {
  222. return nil
  223. }
  224. // Only need to Stat entry if platform did not already have os.ModeDir
  225. // set, such as would be the case for unix like operating systems. (This
  226. // guard eliminates extra os.Stat check on Windows.)
  227. if !dirent.IsDir() {
  228. referent, err := os.Readlink(osPathname)
  229. if err != nil {
  230. err = errors.Wrap(err, "cannot Readlink")
  231. if action := options.ErrorCallback(osPathname, err); action == SkipNode {
  232. return nil
  233. }
  234. return err
  235. }
  236. var osp string
  237. if filepath.IsAbs(referent) {
  238. osp = referent
  239. } else {
  240. osp = filepath.Join(filepath.Dir(osPathname), referent)
  241. }
  242. fi, err := os.Stat(osp)
  243. if err != nil {
  244. err = errors.Wrap(err, "cannot Stat")
  245. if action := options.ErrorCallback(osp, err); action == SkipNode {
  246. return nil
  247. }
  248. return err
  249. }
  250. dirent.modeType = fi.Mode() & os.ModeType
  251. }
  252. }
  253. if !dirent.IsDir() {
  254. return nil
  255. }
  256. // If get here, then specified pathname refers to a directory.
  257. deChildren, err := ReadDirents(osPathname, options.ScratchBuffer)
  258. if err != nil {
  259. err = errors.Wrap(err, "cannot ReadDirents")
  260. if action := options.ErrorCallback(osPathname, err); action == SkipNode {
  261. return nil
  262. }
  263. return err
  264. }
  265. if !options.Unsorted {
  266. sort.Sort(deChildren) // sort children entries unless upstream says to leave unsorted
  267. }
  268. for _, deChild := range deChildren {
  269. osChildname := filepath.Join(osPathname, deChild.name)
  270. err = walk(osChildname, deChild, options)
  271. if err != nil {
  272. if err != filepath.SkipDir {
  273. return err
  274. }
  275. // If received skipdir on a directory, stop processing that
  276. // directory, but continue to its siblings. If received skipdir on a
  277. // non-directory, stop processing remaining siblings.
  278. if deChild.IsSymlink() {
  279. // Only need to Stat entry if platform did not already have
  280. // os.ModeDir set, such as would be the case for unix like
  281. // operating systems. (This guard eliminates extra os.Stat check
  282. // on Windows.)
  283. if !deChild.IsDir() {
  284. // Resolve symbolic link referent to determine whether node
  285. // is directory or not.
  286. referent, err := os.Readlink(osChildname)
  287. if err != nil {
  288. err = errors.Wrap(err, "cannot Readlink")
  289. if action := options.ErrorCallback(osChildname, err); action == SkipNode {
  290. continue // with next child
  291. }
  292. return err
  293. }
  294. var osp string
  295. if filepath.IsAbs(referent) {
  296. osp = referent
  297. } else {
  298. osp = filepath.Join(osPathname, referent)
  299. }
  300. fi, err := os.Stat(osp)
  301. if err != nil {
  302. err = errors.Wrap(err, "cannot Stat")
  303. if action := options.ErrorCallback(osp, err); action == SkipNode {
  304. continue // with next child
  305. }
  306. return err
  307. }
  308. deChild.modeType = fi.Mode() & os.ModeType
  309. }
  310. }
  311. if !deChild.IsDir() {
  312. // If not directory, return immediately, thus skipping remainder
  313. // of siblings.
  314. return nil
  315. }
  316. }
  317. }
  318. if options.PostChildrenCallback == nil {
  319. return nil
  320. }
  321. err = options.PostChildrenCallback(osPathname, dirent)
  322. if err == nil || err == filepath.SkipDir {
  323. return err
  324. }
  325. err = errors.Wrap(err, "PostChildrenCallback") // wrap potential errors returned by callback
  326. if action := options.ErrorCallback(osPathname, err); action == SkipNode {
  327. return nil
  328. }
  329. return err
  330. }