diskv.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. // Diskv (disk-vee) is a simple, persistent, key-value store.
  2. // It stores all data flatly on the filesystem.
  3. package diskv
  4. import (
  5. "bytes"
  6. "errors"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "os"
  11. "path/filepath"
  12. "strings"
  13. "sync"
  14. "syscall"
  15. )
  16. const (
  17. defaultBasePath = "diskv"
  18. defaultFilePerm os.FileMode = 0666
  19. defaultPathPerm os.FileMode = 0777
  20. )
  21. var (
  22. defaultTransform = func(s string) []string { return []string{} }
  23. errCanceled = errors.New("canceled")
  24. errEmptyKey = errors.New("empty key")
  25. errBadKey = errors.New("bad key")
  26. errImportDirectory = errors.New("can't import a directory")
  27. )
  28. // TransformFunction transforms a key into a slice of strings, with each
  29. // element in the slice representing a directory in the file path where the
  30. // key's entry will eventually be stored.
  31. //
  32. // For example, if TransformFunc transforms "abcdef" to ["ab", "cde", "f"],
  33. // the final location of the data file will be <basedir>/ab/cde/f/abcdef
  34. type TransformFunction func(s string) []string
  35. // Options define a set of properties that dictate Diskv behavior.
  36. // All values are optional.
  37. type Options struct {
  38. BasePath string
  39. Transform TransformFunction
  40. CacheSizeMax uint64 // bytes
  41. PathPerm os.FileMode
  42. FilePerm os.FileMode
  43. // If TempDir is set, it will enable filesystem atomic writes by
  44. // writing temporary files to that location before being moved
  45. // to BasePath.
  46. // Note that TempDir MUST be on the same device/partition as
  47. // BasePath.
  48. TempDir string
  49. Index Index
  50. IndexLess LessFunction
  51. Compression Compression
  52. }
  53. // Diskv implements the Diskv interface. You shouldn't construct Diskv
  54. // structures directly; instead, use the New constructor.
  55. type Diskv struct {
  56. Options
  57. mu sync.RWMutex
  58. cache map[string][]byte
  59. cacheSize uint64
  60. }
  61. // New returns an initialized Diskv structure, ready to use.
  62. // If the path identified by baseDir already contains data,
  63. // it will be accessible, but not yet cached.
  64. func New(o Options) *Diskv {
  65. if o.BasePath == "" {
  66. o.BasePath = defaultBasePath
  67. }
  68. if o.Transform == nil {
  69. o.Transform = defaultTransform
  70. }
  71. if o.PathPerm == 0 {
  72. o.PathPerm = defaultPathPerm
  73. }
  74. if o.FilePerm == 0 {
  75. o.FilePerm = defaultFilePerm
  76. }
  77. d := &Diskv{
  78. Options: o,
  79. cache: map[string][]byte{},
  80. cacheSize: 0,
  81. }
  82. if d.Index != nil && d.IndexLess != nil {
  83. d.Index.Initialize(d.IndexLess, d.Keys(nil))
  84. }
  85. return d
  86. }
  87. // Write synchronously writes the key-value pair to disk, making it immediately
  88. // available for reads. Write relies on the filesystem to perform an eventual
  89. // sync to physical media. If you need stronger guarantees, see WriteStream.
  90. func (d *Diskv) Write(key string, val []byte) error {
  91. return d.WriteStream(key, bytes.NewBuffer(val), false)
  92. }
  93. // WriteStream writes the data represented by the io.Reader to the disk, under
  94. // the provided key. If sync is true, WriteStream performs an explicit sync on
  95. // the file as soon as it's written.
  96. //
  97. // bytes.Buffer provides io.Reader semantics for basic data types.
  98. func (d *Diskv) WriteStream(key string, r io.Reader, sync bool) error {
  99. if len(key) <= 0 {
  100. return errEmptyKey
  101. }
  102. d.mu.Lock()
  103. defer d.mu.Unlock()
  104. return d.writeStreamWithLock(key, r, sync)
  105. }
  106. // createKeyFileWithLock either creates the key file directly, or
  107. // creates a temporary file in TempDir if it is set.
  108. func (d *Diskv) createKeyFileWithLock(key string) (*os.File, error) {
  109. if d.TempDir != "" {
  110. if err := os.MkdirAll(d.TempDir, d.PathPerm); err != nil {
  111. return nil, fmt.Errorf("temp mkdir: %s", err)
  112. }
  113. f, err := ioutil.TempFile(d.TempDir, "")
  114. if err != nil {
  115. return nil, fmt.Errorf("temp file: %s", err)
  116. }
  117. if err := f.Chmod(d.FilePerm); err != nil {
  118. f.Close() // error deliberately ignored
  119. os.Remove(f.Name()) // error deliberately ignored
  120. return nil, fmt.Errorf("chmod: %s", err)
  121. }
  122. return f, nil
  123. }
  124. mode := os.O_WRONLY | os.O_CREATE | os.O_TRUNC // overwrite if exists
  125. f, err := os.OpenFile(d.completeFilename(key), mode, d.FilePerm)
  126. if err != nil {
  127. return nil, fmt.Errorf("open file: %s", err)
  128. }
  129. return f, nil
  130. }
  131. // writeStream does no input validation checking.
  132. func (d *Diskv) writeStreamWithLock(key string, r io.Reader, sync bool) error {
  133. if err := d.ensurePathWithLock(key); err != nil {
  134. return fmt.Errorf("ensure path: %s", err)
  135. }
  136. f, err := d.createKeyFileWithLock(key)
  137. if err != nil {
  138. return fmt.Errorf("create key file: %s", err)
  139. }
  140. wc := io.WriteCloser(&nopWriteCloser{f})
  141. if d.Compression != nil {
  142. wc, err = d.Compression.Writer(f)
  143. if err != nil {
  144. f.Close() // error deliberately ignored
  145. os.Remove(f.Name()) // error deliberately ignored
  146. return fmt.Errorf("compression writer: %s", err)
  147. }
  148. }
  149. if _, err := io.Copy(wc, r); err != nil {
  150. f.Close() // error deliberately ignored
  151. os.Remove(f.Name()) // error deliberately ignored
  152. return fmt.Errorf("i/o copy: %s", err)
  153. }
  154. if err := wc.Close(); err != nil {
  155. f.Close() // error deliberately ignored
  156. os.Remove(f.Name()) // error deliberately ignored
  157. return fmt.Errorf("compression close: %s", err)
  158. }
  159. if sync {
  160. if err := f.Sync(); err != nil {
  161. f.Close() // error deliberately ignored
  162. os.Remove(f.Name()) // error deliberately ignored
  163. return fmt.Errorf("file sync: %s", err)
  164. }
  165. }
  166. if err := f.Close(); err != nil {
  167. return fmt.Errorf("file close: %s", err)
  168. }
  169. if f.Name() != d.completeFilename(key) {
  170. if err := os.Rename(f.Name(), d.completeFilename(key)); err != nil {
  171. os.Remove(f.Name()) // error deliberately ignored
  172. return fmt.Errorf("rename: %s", err)
  173. }
  174. }
  175. if d.Index != nil {
  176. d.Index.Insert(key)
  177. }
  178. d.bustCacheWithLock(key) // cache only on read
  179. return nil
  180. }
  181. // Import imports the source file into diskv under the destination key. If the
  182. // destination key already exists, it's overwritten. If move is true, the
  183. // source file is removed after a successful import.
  184. func (d *Diskv) Import(srcFilename, dstKey string, move bool) (err error) {
  185. if dstKey == "" {
  186. return errEmptyKey
  187. }
  188. if fi, err := os.Stat(srcFilename); err != nil {
  189. return err
  190. } else if fi.IsDir() {
  191. return errImportDirectory
  192. }
  193. d.mu.Lock()
  194. defer d.mu.Unlock()
  195. if err := d.ensurePathWithLock(dstKey); err != nil {
  196. return fmt.Errorf("ensure path: %s", err)
  197. }
  198. if move {
  199. if err := syscall.Rename(srcFilename, d.completeFilename(dstKey)); err == nil {
  200. d.bustCacheWithLock(dstKey)
  201. return nil
  202. } else if err != syscall.EXDEV {
  203. // If it failed due to being on a different device, fall back to copying
  204. return err
  205. }
  206. }
  207. f, err := os.Open(srcFilename)
  208. if err != nil {
  209. return err
  210. }
  211. defer f.Close()
  212. err = d.writeStreamWithLock(dstKey, f, false)
  213. if err == nil && move {
  214. err = os.Remove(srcFilename)
  215. }
  216. return err
  217. }
  218. // Read reads the key and returns the value.
  219. // If the key is available in the cache, Read won't touch the disk.
  220. // If the key is not in the cache, Read will have the side-effect of
  221. // lazily caching the value.
  222. func (d *Diskv) Read(key string) ([]byte, error) {
  223. rc, err := d.ReadStream(key, false)
  224. if err != nil {
  225. return []byte{}, err
  226. }
  227. defer rc.Close()
  228. return ioutil.ReadAll(rc)
  229. }
  230. // ReadStream reads the key and returns the value (data) as an io.ReadCloser.
  231. // If the value is cached from a previous read, and direct is false,
  232. // ReadStream will use the cached value. Otherwise, it will return a handle to
  233. // the file on disk, and cache the data on read.
  234. //
  235. // If direct is true, ReadStream will lazily delete any cached value for the
  236. // key, and return a direct handle to the file on disk.
  237. //
  238. // If compression is enabled, ReadStream taps into the io.Reader stream prior
  239. // to decompression, and caches the compressed data.
  240. func (d *Diskv) ReadStream(key string, direct bool) (io.ReadCloser, error) {
  241. d.mu.RLock()
  242. defer d.mu.RUnlock()
  243. if val, ok := d.cache[key]; ok {
  244. if !direct {
  245. buf := bytes.NewBuffer(val)
  246. if d.Compression != nil {
  247. return d.Compression.Reader(buf)
  248. }
  249. return ioutil.NopCloser(buf), nil
  250. }
  251. go func() {
  252. d.mu.Lock()
  253. defer d.mu.Unlock()
  254. d.uncacheWithLock(key, uint64(len(val)))
  255. }()
  256. }
  257. return d.readWithRLock(key)
  258. }
  259. // read ignores the cache, and returns an io.ReadCloser representing the
  260. // decompressed data for the given key, streamed from the disk. Clients should
  261. // acquire a read lock on the Diskv and check the cache themselves before
  262. // calling read.
  263. func (d *Diskv) readWithRLock(key string) (io.ReadCloser, error) {
  264. filename := d.completeFilename(key)
  265. fi, err := os.Stat(filename)
  266. if err != nil {
  267. return nil, err
  268. }
  269. if fi.IsDir() {
  270. return nil, os.ErrNotExist
  271. }
  272. f, err := os.Open(filename)
  273. if err != nil {
  274. return nil, err
  275. }
  276. var r io.Reader
  277. if d.CacheSizeMax > 0 {
  278. r = newSiphon(f, d, key)
  279. } else {
  280. r = &closingReader{f}
  281. }
  282. var rc = io.ReadCloser(ioutil.NopCloser(r))
  283. if d.Compression != nil {
  284. rc, err = d.Compression.Reader(r)
  285. if err != nil {
  286. return nil, err
  287. }
  288. }
  289. return rc, nil
  290. }
  291. // closingReader provides a Reader that automatically closes the
  292. // embedded ReadCloser when it reaches EOF
  293. type closingReader struct {
  294. rc io.ReadCloser
  295. }
  296. func (cr closingReader) Read(p []byte) (int, error) {
  297. n, err := cr.rc.Read(p)
  298. if err == io.EOF {
  299. if closeErr := cr.rc.Close(); closeErr != nil {
  300. return n, closeErr // close must succeed for Read to succeed
  301. }
  302. }
  303. return n, err
  304. }
  305. // siphon is like a TeeReader: it copies all data read through it to an
  306. // internal buffer, and moves that buffer to the cache at EOF.
  307. type siphon struct {
  308. f *os.File
  309. d *Diskv
  310. key string
  311. buf *bytes.Buffer
  312. }
  313. // newSiphon constructs a siphoning reader that represents the passed file.
  314. // When a successful series of reads ends in an EOF, the siphon will write
  315. // the buffered data to Diskv's cache under the given key.
  316. func newSiphon(f *os.File, d *Diskv, key string) io.Reader {
  317. return &siphon{
  318. f: f,
  319. d: d,
  320. key: key,
  321. buf: &bytes.Buffer{},
  322. }
  323. }
  324. // Read implements the io.Reader interface for siphon.
  325. func (s *siphon) Read(p []byte) (int, error) {
  326. n, err := s.f.Read(p)
  327. if err == nil {
  328. return s.buf.Write(p[0:n]) // Write must succeed for Read to succeed
  329. }
  330. if err == io.EOF {
  331. s.d.cacheWithoutLock(s.key, s.buf.Bytes()) // cache may fail
  332. if closeErr := s.f.Close(); closeErr != nil {
  333. return n, closeErr // close must succeed for Read to succeed
  334. }
  335. return n, err
  336. }
  337. return n, err
  338. }
  339. // Erase synchronously erases the given key from the disk and the cache.
  340. func (d *Diskv) Erase(key string) error {
  341. d.mu.Lock()
  342. defer d.mu.Unlock()
  343. d.bustCacheWithLock(key)
  344. // erase from index
  345. if d.Index != nil {
  346. d.Index.Delete(key)
  347. }
  348. // erase from disk
  349. filename := d.completeFilename(key)
  350. if s, err := os.Stat(filename); err == nil {
  351. if s.IsDir() {
  352. return errBadKey
  353. }
  354. if err = os.Remove(filename); err != nil {
  355. return err
  356. }
  357. } else {
  358. // Return err as-is so caller can do os.IsNotExist(err).
  359. return err
  360. }
  361. // clean up and return
  362. d.pruneDirsWithLock(key)
  363. return nil
  364. }
  365. // EraseAll will delete all of the data from the store, both in the cache and on
  366. // the disk. Note that EraseAll doesn't distinguish diskv-related data from non-
  367. // diskv-related data. Care should be taken to always specify a diskv base
  368. // directory that is exclusively for diskv data.
  369. func (d *Diskv) EraseAll() error {
  370. d.mu.Lock()
  371. defer d.mu.Unlock()
  372. d.cache = make(map[string][]byte)
  373. d.cacheSize = 0
  374. if d.TempDir != "" {
  375. os.RemoveAll(d.TempDir) // errors ignored
  376. }
  377. return os.RemoveAll(d.BasePath)
  378. }
  379. // Has returns true if the given key exists.
  380. func (d *Diskv) Has(key string) bool {
  381. d.mu.Lock()
  382. defer d.mu.Unlock()
  383. if _, ok := d.cache[key]; ok {
  384. return true
  385. }
  386. filename := d.completeFilename(key)
  387. s, err := os.Stat(filename)
  388. if err != nil {
  389. return false
  390. }
  391. if s.IsDir() {
  392. return false
  393. }
  394. return true
  395. }
  396. // Keys returns a channel that will yield every key accessible by the store,
  397. // in undefined order. If a cancel channel is provided, closing it will
  398. // terminate and close the keys channel.
  399. func (d *Diskv) Keys(cancel <-chan struct{}) <-chan string {
  400. return d.KeysPrefix("", cancel)
  401. }
  402. // KeysPrefix returns a channel that will yield every key accessible by the
  403. // store with the given prefix, in undefined order. If a cancel channel is
  404. // provided, closing it will terminate and close the keys channel. If the
  405. // provided prefix is the empty string, all keys will be yielded.
  406. func (d *Diskv) KeysPrefix(prefix string, cancel <-chan struct{}) <-chan string {
  407. var prepath string
  408. if prefix == "" {
  409. prepath = d.BasePath
  410. } else {
  411. prepath = d.pathFor(prefix)
  412. }
  413. c := make(chan string)
  414. go func() {
  415. filepath.Walk(prepath, walker(c, prefix, cancel))
  416. close(c)
  417. }()
  418. return c
  419. }
  420. // walker returns a function which satisfies the filepath.WalkFunc interface.
  421. // It sends every non-directory file entry down the channel c.
  422. func walker(c chan<- string, prefix string, cancel <-chan struct{}) filepath.WalkFunc {
  423. return func(path string, info os.FileInfo, err error) error {
  424. if err != nil {
  425. return err
  426. }
  427. if info.IsDir() || !strings.HasPrefix(info.Name(), prefix) {
  428. return nil // "pass"
  429. }
  430. select {
  431. case c <- info.Name():
  432. case <-cancel:
  433. return errCanceled
  434. }
  435. return nil
  436. }
  437. }
  438. // pathFor returns the absolute path for location on the filesystem where the
  439. // data for the given key will be stored.
  440. func (d *Diskv) pathFor(key string) string {
  441. return filepath.Join(d.BasePath, filepath.Join(d.Transform(key)...))
  442. }
  443. // ensurePathWithLock is a helper function that generates all necessary
  444. // directories on the filesystem for the given key.
  445. func (d *Diskv) ensurePathWithLock(key string) error {
  446. return os.MkdirAll(d.pathFor(key), d.PathPerm)
  447. }
  448. // completeFilename returns the absolute path to the file for the given key.
  449. func (d *Diskv) completeFilename(key string) string {
  450. return filepath.Join(d.pathFor(key), key)
  451. }
  452. // cacheWithLock attempts to cache the given key-value pair in the store's
  453. // cache. It can fail if the value is larger than the cache's maximum size.
  454. func (d *Diskv) cacheWithLock(key string, val []byte) error {
  455. valueSize := uint64(len(val))
  456. if err := d.ensureCacheSpaceWithLock(valueSize); err != nil {
  457. return fmt.Errorf("%s; not caching", err)
  458. }
  459. // be very strict about memory guarantees
  460. if (d.cacheSize + valueSize) > d.CacheSizeMax {
  461. panic(fmt.Sprintf("failed to make room for value (%d/%d)", valueSize, d.CacheSizeMax))
  462. }
  463. d.cache[key] = val
  464. d.cacheSize += valueSize
  465. return nil
  466. }
  467. // cacheWithoutLock acquires the store's (write) mutex and calls cacheWithLock.
  468. func (d *Diskv) cacheWithoutLock(key string, val []byte) error {
  469. d.mu.Lock()
  470. defer d.mu.Unlock()
  471. return d.cacheWithLock(key, val)
  472. }
  473. func (d *Diskv) bustCacheWithLock(key string) {
  474. if val, ok := d.cache[key]; ok {
  475. d.uncacheWithLock(key, uint64(len(val)))
  476. }
  477. }
  478. func (d *Diskv) uncacheWithLock(key string, sz uint64) {
  479. d.cacheSize -= sz
  480. delete(d.cache, key)
  481. }
  482. // pruneDirsWithLock deletes empty directories in the path walk leading to the
  483. // key k. Typically this function is called after an Erase is made.
  484. func (d *Diskv) pruneDirsWithLock(key string) error {
  485. pathlist := d.Transform(key)
  486. for i := range pathlist {
  487. dir := filepath.Join(d.BasePath, filepath.Join(pathlist[:len(pathlist)-i]...))
  488. // thanks to Steven Blenkinsop for this snippet
  489. switch fi, err := os.Stat(dir); true {
  490. case err != nil:
  491. return err
  492. case !fi.IsDir():
  493. panic(fmt.Sprintf("corrupt dirstate at %s", dir))
  494. }
  495. nlinks, err := filepath.Glob(filepath.Join(dir, "*"))
  496. if err != nil {
  497. return err
  498. } else if len(nlinks) > 0 {
  499. return nil // has subdirs -- do not prune
  500. }
  501. if err = os.Remove(dir); err != nil {
  502. return err
  503. }
  504. }
  505. return nil
  506. }
  507. // ensureCacheSpaceWithLock deletes entries from the cache in arbitrary order
  508. // until the cache has at least valueSize bytes available.
  509. func (d *Diskv) ensureCacheSpaceWithLock(valueSize uint64) error {
  510. if valueSize > d.CacheSizeMax {
  511. return fmt.Errorf("value size (%d bytes) too large for cache (%d bytes)", valueSize, d.CacheSizeMax)
  512. }
  513. safe := func() bool { return (d.cacheSize + valueSize) <= d.CacheSizeMax }
  514. for key, val := range d.cache {
  515. if safe() {
  516. break
  517. }
  518. d.uncacheWithLock(key, uint64(len(val)))
  519. }
  520. if !safe() {
  521. panic(fmt.Sprintf("%d bytes still won't fit in the cache! (max %d bytes)", valueSize, d.CacheSizeMax))
  522. }
  523. return nil
  524. }
  525. // nopWriteCloser wraps an io.Writer and provides a no-op Close method to
  526. // satisfy the io.WriteCloser interface.
  527. type nopWriteCloser struct {
  528. io.Writer
  529. }
  530. func (wc *nopWriteCloser) Write(p []byte) (int, error) { return wc.Writer.Write(p) }
  531. func (wc *nopWriteCloser) Close() error { return nil }