cache.go 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. // Copyright 2017 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package cache implements a build artifact cache.
  5. //
  6. // This package is a slightly modified fork of Go's
  7. // cmd/go/internal/cache package.
  8. package cache
  9. import (
  10. "bytes"
  11. "crypto/sha256"
  12. "encoding/hex"
  13. "errors"
  14. "fmt"
  15. "io"
  16. "io/ioutil"
  17. "os"
  18. "path/filepath"
  19. "strconv"
  20. "strings"
  21. "time"
  22. "honnef.co/go/tools/internal/renameio"
  23. )
  24. // An ActionID is a cache action key, the hash of a complete description of a
  25. // repeatable computation (command line, environment variables,
  26. // input file contents, executable contents).
  27. type ActionID [HashSize]byte
  28. // An OutputID is a cache output key, the hash of an output of a computation.
  29. type OutputID [HashSize]byte
  30. // A Cache is a package cache, backed by a file system directory tree.
  31. type Cache struct {
  32. dir string
  33. now func() time.Time
  34. }
  35. // Open opens and returns the cache in the given directory.
  36. //
  37. // It is safe for multiple processes on a single machine to use the
  38. // same cache directory in a local file system simultaneously.
  39. // They will coordinate using operating system file locks and may
  40. // duplicate effort but will not corrupt the cache.
  41. //
  42. // However, it is NOT safe for multiple processes on different machines
  43. // to share a cache directory (for example, if the directory were stored
  44. // in a network file system). File locking is notoriously unreliable in
  45. // network file systems and may not suffice to protect the cache.
  46. //
  47. func Open(dir string) (*Cache, error) {
  48. info, err := os.Stat(dir)
  49. if err != nil {
  50. return nil, err
  51. }
  52. if !info.IsDir() {
  53. return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
  54. }
  55. for i := 0; i < 256; i++ {
  56. name := filepath.Join(dir, fmt.Sprintf("%02x", i))
  57. if err := os.MkdirAll(name, 0777); err != nil {
  58. return nil, err
  59. }
  60. }
  61. c := &Cache{
  62. dir: dir,
  63. now: time.Now,
  64. }
  65. return c, nil
  66. }
  67. // fileName returns the name of the file corresponding to the given id.
  68. func (c *Cache) fileName(id [HashSize]byte, key string) string {
  69. return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
  70. }
  71. var errMissing = errors.New("cache entry not found")
  72. const (
  73. // action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
  74. hexSize = HashSize * 2
  75. entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
  76. )
  77. // verify controls whether to run the cache in verify mode.
  78. // In verify mode, the cache always returns errMissing from Get
  79. // but then double-checks in Put that the data being written
  80. // exactly matches any existing entry. This provides an easy
  81. // way to detect program behavior that would have been different
  82. // had the cache entry been returned from Get.
  83. //
  84. // verify is enabled by setting the environment variable
  85. // GODEBUG=gocacheverify=1.
  86. var verify = false
  87. // DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
  88. var DebugTest = false
  89. func init() { initEnv() }
  90. func initEnv() {
  91. verify = false
  92. debugHash = false
  93. debug := strings.Split(os.Getenv("GODEBUG"), ",")
  94. for _, f := range debug {
  95. if f == "gocacheverify=1" {
  96. verify = true
  97. }
  98. if f == "gocachehash=1" {
  99. debugHash = true
  100. }
  101. if f == "gocachetest=1" {
  102. DebugTest = true
  103. }
  104. }
  105. }
  106. // Get looks up the action ID in the cache,
  107. // returning the corresponding output ID and file size, if any.
  108. // Note that finding an output ID does not guarantee that the
  109. // saved file for that output ID is still available.
  110. func (c *Cache) Get(id ActionID) (Entry, error) {
  111. if verify {
  112. return Entry{}, errMissing
  113. }
  114. return c.get(id)
  115. }
  116. type Entry struct {
  117. OutputID OutputID
  118. Size int64
  119. Time time.Time
  120. }
  121. // get is Get but does not respect verify mode, so that Put can use it.
  122. func (c *Cache) get(id ActionID) (Entry, error) {
  123. missing := func() (Entry, error) {
  124. return Entry{}, errMissing
  125. }
  126. f, err := os.Open(c.fileName(id, "a"))
  127. if err != nil {
  128. return missing()
  129. }
  130. defer f.Close()
  131. entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
  132. if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
  133. return missing()
  134. }
  135. if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
  136. return missing()
  137. }
  138. eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
  139. eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
  140. esize, entry := entry[1:1+20], entry[1+20:]
  141. //lint:ignore SA4006 See https://github.com/dominikh/go-tools/issues/465
  142. etime, entry := entry[1:1+20], entry[1+20:]
  143. var buf [HashSize]byte
  144. if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
  145. return missing()
  146. }
  147. if _, err := hex.Decode(buf[:], eout); err != nil {
  148. return missing()
  149. }
  150. i := 0
  151. for i < len(esize) && esize[i] == ' ' {
  152. i++
  153. }
  154. size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
  155. if err != nil || size < 0 {
  156. return missing()
  157. }
  158. i = 0
  159. for i < len(etime) && etime[i] == ' ' {
  160. i++
  161. }
  162. tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
  163. if err != nil || size < 0 {
  164. return missing()
  165. }
  166. c.used(c.fileName(id, "a"))
  167. return Entry{buf, size, time.Unix(0, tm)}, nil
  168. }
  169. // GetFile looks up the action ID in the cache and returns
  170. // the name of the corresponding data file.
  171. func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
  172. entry, err = c.Get(id)
  173. if err != nil {
  174. return "", Entry{}, err
  175. }
  176. file = c.OutputFile(entry.OutputID)
  177. info, err := os.Stat(file)
  178. if err != nil || info.Size() != entry.Size {
  179. return "", Entry{}, errMissing
  180. }
  181. return file, entry, nil
  182. }
  183. // GetBytes looks up the action ID in the cache and returns
  184. // the corresponding output bytes.
  185. // GetBytes should only be used for data that can be expected to fit in memory.
  186. func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
  187. entry, err := c.Get(id)
  188. if err != nil {
  189. return nil, entry, err
  190. }
  191. data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
  192. if sha256.Sum256(data) != entry.OutputID {
  193. return nil, entry, errMissing
  194. }
  195. return data, entry, nil
  196. }
  197. // OutputFile returns the name of the cache file storing output with the given OutputID.
  198. func (c *Cache) OutputFile(out OutputID) string {
  199. file := c.fileName(out, "d")
  200. c.used(file)
  201. return file
  202. }
  203. // Time constants for cache expiration.
  204. //
  205. // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
  206. // to avoid causing many unnecessary inode updates. The mtimes therefore
  207. // roughly reflect "time of last use" but may in fact be older by at most an hour.
  208. //
  209. // We scan the cache for entries to delete at most once per trimInterval (1 day).
  210. //
  211. // When we do scan the cache, we delete entries that have not been used for
  212. // at least trimLimit (5 days). Statistics gathered from a month of usage by
  213. // Go developers found that essentially all reuse of cached entries happened
  214. // within 5 days of the previous reuse. See golang.org/issue/22990.
  215. const (
  216. mtimeInterval = 1 * time.Hour
  217. trimInterval = 24 * time.Hour
  218. trimLimit = 5 * 24 * time.Hour
  219. )
  220. // used makes a best-effort attempt to update mtime on file,
  221. // so that mtime reflects cache access time.
  222. //
  223. // Because the reflection only needs to be approximate,
  224. // and to reduce the amount of disk activity caused by using
  225. // cache entries, used only updates the mtime if the current
  226. // mtime is more than an hour old. This heuristic eliminates
  227. // nearly all of the mtime updates that would otherwise happen,
  228. // while still keeping the mtimes useful for cache trimming.
  229. func (c *Cache) used(file string) {
  230. info, err := os.Stat(file)
  231. if err == nil && c.now().Sub(info.ModTime()) < mtimeInterval {
  232. return
  233. }
  234. os.Chtimes(file, c.now(), c.now())
  235. }
  236. // Trim removes old cache entries that are likely not to be reused.
  237. func (c *Cache) Trim() {
  238. now := c.now()
  239. // We maintain in dir/trim.txt the time of the last completed cache trim.
  240. // If the cache has been trimmed recently enough, do nothing.
  241. // This is the common case.
  242. data, _ := ioutil.ReadFile(filepath.Join(c.dir, "trim.txt"))
  243. t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
  244. if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
  245. return
  246. }
  247. // Trim each of the 256 subdirectories.
  248. // We subtract an additional mtimeInterval
  249. // to account for the imprecision of our "last used" mtimes.
  250. cutoff := now.Add(-trimLimit - mtimeInterval)
  251. for i := 0; i < 256; i++ {
  252. subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
  253. c.trimSubdir(subdir, cutoff)
  254. }
  255. // Ignore errors from here: if we don't write the complete timestamp, the
  256. // cache will appear older than it is, and we'll trim it again next time.
  257. renameio.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())))
  258. }
  259. // trimSubdir trims a single cache subdirectory.
  260. func (c *Cache) trimSubdir(subdir string, cutoff time.Time) {
  261. // Read all directory entries from subdir before removing
  262. // any files, in case removing files invalidates the file offset
  263. // in the directory scan. Also, ignore error from f.Readdirnames,
  264. // because we don't care about reporting the error and we still
  265. // want to process any entries found before the error.
  266. f, err := os.Open(subdir)
  267. if err != nil {
  268. return
  269. }
  270. names, _ := f.Readdirnames(-1)
  271. f.Close()
  272. for _, name := range names {
  273. // Remove only cache entries (xxxx-a and xxxx-d).
  274. if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
  275. continue
  276. }
  277. entry := filepath.Join(subdir, name)
  278. info, err := os.Stat(entry)
  279. if err == nil && info.ModTime().Before(cutoff) {
  280. os.Remove(entry)
  281. }
  282. }
  283. }
  284. // putIndexEntry adds an entry to the cache recording that executing the action
  285. // with the given id produces an output with the given output id (hash) and size.
  286. func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
  287. // Note: We expect that for one reason or another it may happen
  288. // that repeating an action produces a different output hash
  289. // (for example, if the output contains a time stamp or temp dir name).
  290. // While not ideal, this is also not a correctness problem, so we
  291. // don't make a big deal about it. In particular, we leave the action
  292. // cache entries writable specifically so that they can be overwritten.
  293. //
  294. // Setting GODEBUG=gocacheverify=1 does make a big deal:
  295. // in verify mode we are double-checking that the cache entries
  296. // are entirely reproducible. As just noted, this may be unrealistic
  297. // in some cases but the check is also useful for shaking out real bugs.
  298. entry := []byte(fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()))
  299. if verify && allowVerify {
  300. old, err := c.get(id)
  301. if err == nil && (old.OutputID != out || old.Size != size) {
  302. // panic to show stack trace, so we can see what code is generating this cache entry.
  303. msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
  304. panic(msg)
  305. }
  306. }
  307. file := c.fileName(id, "a")
  308. if err := ioutil.WriteFile(file, entry, 0666); err != nil {
  309. // TODO(bcmills): This Remove potentially races with another go command writing to file.
  310. // Can we eliminate it?
  311. os.Remove(file)
  312. return err
  313. }
  314. os.Chtimes(file, c.now(), c.now()) // mainly for tests
  315. return nil
  316. }
  317. // Put stores the given output in the cache as the output for the action ID.
  318. // It may read file twice. The content of file must not change between the two passes.
  319. func (c *Cache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
  320. return c.put(id, file, true)
  321. }
  322. // PutNoVerify is like Put but disables the verify check
  323. // when GODEBUG=goverifycache=1 is set.
  324. // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
  325. // like test output containing times and the like.
  326. func (c *Cache) PutNoVerify(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
  327. return c.put(id, file, false)
  328. }
  329. func (c *Cache) put(id ActionID, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
  330. // Compute output ID.
  331. h := sha256.New()
  332. if _, err := file.Seek(0, 0); err != nil {
  333. return OutputID{}, 0, err
  334. }
  335. size, err := io.Copy(h, file)
  336. if err != nil {
  337. return OutputID{}, 0, err
  338. }
  339. var out OutputID
  340. h.Sum(out[:0])
  341. // Copy to cached output file (if not already present).
  342. if err := c.copyFile(file, out, size); err != nil {
  343. return out, size, err
  344. }
  345. // Add to cache index.
  346. return out, size, c.putIndexEntry(id, out, size, allowVerify)
  347. }
  348. // PutBytes stores the given bytes in the cache as the output for the action ID.
  349. func (c *Cache) PutBytes(id ActionID, data []byte) error {
  350. _, _, err := c.Put(id, bytes.NewReader(data))
  351. return err
  352. }
  353. // copyFile copies file into the cache, expecting it to have the given
  354. // output ID and size, if that file is not present already.
  355. func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
  356. name := c.fileName(out, "d")
  357. info, err := os.Stat(name)
  358. if err == nil && info.Size() == size {
  359. // Check hash.
  360. if f, err := os.Open(name); err == nil {
  361. h := sha256.New()
  362. io.Copy(h, f)
  363. f.Close()
  364. var out2 OutputID
  365. h.Sum(out2[:0])
  366. if out == out2 {
  367. return nil
  368. }
  369. }
  370. // Hash did not match. Fall through and rewrite file.
  371. }
  372. // Copy file to cache directory.
  373. mode := os.O_RDWR | os.O_CREATE
  374. if err == nil && info.Size() > size { // shouldn't happen but fix in case
  375. mode |= os.O_TRUNC
  376. }
  377. f, err := os.OpenFile(name, mode, 0666)
  378. if err != nil {
  379. return err
  380. }
  381. defer f.Close()
  382. if size == 0 {
  383. // File now exists with correct size.
  384. // Only one possible zero-length file, so contents are OK too.
  385. // Early return here makes sure there's a "last byte" for code below.
  386. return nil
  387. }
  388. // From here on, if any of the I/O writing the file fails,
  389. // we make a best-effort attempt to truncate the file f
  390. // before returning, to avoid leaving bad bytes in the file.
  391. // Copy file to f, but also into h to double-check hash.
  392. if _, err := file.Seek(0, 0); err != nil {
  393. f.Truncate(0)
  394. return err
  395. }
  396. h := sha256.New()
  397. w := io.MultiWriter(f, h)
  398. if _, err := io.CopyN(w, file, size-1); err != nil {
  399. f.Truncate(0)
  400. return err
  401. }
  402. // Check last byte before writing it; writing it will make the size match
  403. // what other processes expect to find and might cause them to start
  404. // using the file.
  405. buf := make([]byte, 1)
  406. if _, err := file.Read(buf); err != nil {
  407. f.Truncate(0)
  408. return err
  409. }
  410. h.Write(buf)
  411. sum := h.Sum(nil)
  412. if !bytes.Equal(sum, out[:]) {
  413. f.Truncate(0)
  414. return fmt.Errorf("file content changed underfoot")
  415. }
  416. // Commit cache file entry.
  417. if _, err := f.Write(buf); err != nil {
  418. f.Truncate(0)
  419. return err
  420. }
  421. if err := f.Close(); err != nil {
  422. // Data might not have been written,
  423. // but file may look like it is the right size.
  424. // To be extra careful, remove cached file.
  425. os.Remove(name)
  426. return err
  427. }
  428. os.Chtimes(name, c.now(), c.now()) // mainly for tests
  429. return nil
  430. }