httpcache.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552
  1. // Package httpcache provides a http.RoundTripper implementation that works as a
  2. // mostly RFC-compliant cache for http responses.
  3. //
  4. // It is only suitable for use as a 'private' cache (i.e. for a web-browser or an API-client
  5. // and not for a shared proxy).
  6. //
  7. package httpcache
  8. import (
  9. "bufio"
  10. "bytes"
  11. "errors"
  12. "io"
  13. "io/ioutil"
  14. "net/http"
  15. "net/http/httputil"
  16. "strings"
  17. "sync"
  18. "time"
  19. )
  20. const (
  21. stale = iota
  22. fresh
  23. transparent
  24. // XFromCache is the header added to responses that are returned from the cache
  25. XFromCache = "X-From-Cache"
  26. )
  27. // A Cache interface is used by the Transport to store and retrieve responses.
  28. type Cache interface {
  29. // Get returns the []byte representation of a cached response and a bool
  30. // set to true if the value isn't empty
  31. Get(key string) (responseBytes []byte, ok bool)
  32. // Set stores the []byte representation of a response against a key
  33. Set(key string, responseBytes []byte)
  34. // Delete removes the value associated with the key
  35. Delete(key string)
  36. }
  37. // cacheKey returns the cache key for req.
  38. func cacheKey(req *http.Request) string {
  39. if req.Method == http.MethodGet {
  40. return req.URL.String()
  41. } else {
  42. return req.Method + " " + req.URL.String()
  43. }
  44. }
  45. // CachedResponse returns the cached http.Response for req if present, and nil
  46. // otherwise.
  47. func CachedResponse(c Cache, req *http.Request) (resp *http.Response, err error) {
  48. cachedVal, ok := c.Get(cacheKey(req))
  49. if !ok {
  50. return
  51. }
  52. b := bytes.NewBuffer(cachedVal)
  53. return http.ReadResponse(bufio.NewReader(b), req)
  54. }
  55. // MemoryCache is an implemtation of Cache that stores responses in an in-memory map.
  56. type MemoryCache struct {
  57. mu sync.RWMutex
  58. items map[string][]byte
  59. }
  60. // Get returns the []byte representation of the response and true if present, false if not
  61. func (c *MemoryCache) Get(key string) (resp []byte, ok bool) {
  62. c.mu.RLock()
  63. resp, ok = c.items[key]
  64. c.mu.RUnlock()
  65. return resp, ok
  66. }
  67. // Set saves response resp to the cache with key
  68. func (c *MemoryCache) Set(key string, resp []byte) {
  69. c.mu.Lock()
  70. c.items[key] = resp
  71. c.mu.Unlock()
  72. }
  73. // Delete removes key from the cache
  74. func (c *MemoryCache) Delete(key string) {
  75. c.mu.Lock()
  76. delete(c.items, key)
  77. c.mu.Unlock()
  78. }
  79. // NewMemoryCache returns a new Cache that will store items in an in-memory map
  80. func NewMemoryCache() *MemoryCache {
  81. c := &MemoryCache{items: map[string][]byte{}}
  82. return c
  83. }
  84. // Transport is an implementation of http.RoundTripper that will return values from a cache
  85. // where possible (avoiding a network request) and will additionally add validators (etag/if-modified-since)
  86. // to repeated requests allowing servers to return 304 / Not Modified
  87. type Transport struct {
  88. // The RoundTripper interface actually used to make requests
  89. // If nil, http.DefaultTransport is used
  90. Transport http.RoundTripper
  91. Cache Cache
  92. // If true, responses returned from the cache will be given an extra header, X-From-Cache
  93. MarkCachedResponses bool
  94. }
  95. // NewTransport returns a new Transport with the
  96. // provided Cache implementation and MarkCachedResponses set to true
  97. func NewTransport(c Cache) *Transport {
  98. return &Transport{Cache: c, MarkCachedResponses: true}
  99. }
  100. // Client returns an *http.Client that caches responses.
  101. func (t *Transport) Client() *http.Client {
  102. return &http.Client{Transport: t}
  103. }
  104. // varyMatches will return false unless all of the cached values for the headers listed in Vary
  105. // match the new request
  106. func varyMatches(cachedResp *http.Response, req *http.Request) bool {
  107. for _, header := range headerAllCommaSepValues(cachedResp.Header, "vary") {
  108. header = http.CanonicalHeaderKey(header)
  109. if header != "" && req.Header.Get(header) != cachedResp.Header.Get("X-Varied-"+header) {
  110. return false
  111. }
  112. }
  113. return true
  114. }
  115. // RoundTrip takes a Request and returns a Response
  116. //
  117. // If there is a fresh Response already in cache, then it will be returned without connecting to
  118. // the server.
  119. //
  120. // If there is a stale Response, then any validators it contains will be set on the new request
  121. // to give the server a chance to respond with NotModified. If this happens, then the cached Response
  122. // will be returned.
  123. func (t *Transport) RoundTrip(req *http.Request) (resp *http.Response, err error) {
  124. cacheKey := cacheKey(req)
  125. cacheable := (req.Method == "GET" || req.Method == "HEAD") && req.Header.Get("range") == ""
  126. var cachedResp *http.Response
  127. if cacheable {
  128. cachedResp, err = CachedResponse(t.Cache, req)
  129. } else {
  130. // Need to invalidate an existing value
  131. t.Cache.Delete(cacheKey)
  132. }
  133. transport := t.Transport
  134. if transport == nil {
  135. transport = http.DefaultTransport
  136. }
  137. if cacheable && cachedResp != nil && err == nil {
  138. if t.MarkCachedResponses {
  139. cachedResp.Header.Set(XFromCache, "1")
  140. }
  141. if varyMatches(cachedResp, req) {
  142. // Can only use cached value if the new request doesn't Vary significantly
  143. freshness := getFreshness(cachedResp.Header, req.Header)
  144. if freshness == fresh {
  145. return cachedResp, nil
  146. }
  147. if freshness == stale {
  148. var req2 *http.Request
  149. // Add validators if caller hasn't already done so
  150. etag := cachedResp.Header.Get("etag")
  151. if etag != "" && req.Header.Get("etag") == "" {
  152. req2 = cloneRequest(req)
  153. req2.Header.Set("if-none-match", etag)
  154. }
  155. lastModified := cachedResp.Header.Get("last-modified")
  156. if lastModified != "" && req.Header.Get("last-modified") == "" {
  157. if req2 == nil {
  158. req2 = cloneRequest(req)
  159. }
  160. req2.Header.Set("if-modified-since", lastModified)
  161. }
  162. if req2 != nil {
  163. req = req2
  164. }
  165. }
  166. }
  167. resp, err = transport.RoundTrip(req)
  168. if err == nil && req.Method == "GET" && resp.StatusCode == http.StatusNotModified {
  169. // Replace the 304 response with the one from cache, but update with some new headers
  170. endToEndHeaders := getEndToEndHeaders(resp.Header)
  171. for _, header := range endToEndHeaders {
  172. cachedResp.Header[header] = resp.Header[header]
  173. }
  174. resp = cachedResp
  175. } else if (err != nil || (cachedResp != nil && resp.StatusCode >= 500)) &&
  176. req.Method == "GET" && canStaleOnError(cachedResp.Header, req.Header) {
  177. // In case of transport failure and stale-if-error activated, returns cached content
  178. // when available
  179. return cachedResp, nil
  180. } else {
  181. if err != nil || resp.StatusCode != http.StatusOK {
  182. t.Cache.Delete(cacheKey)
  183. }
  184. if err != nil {
  185. return nil, err
  186. }
  187. }
  188. } else {
  189. reqCacheControl := parseCacheControl(req.Header)
  190. if _, ok := reqCacheControl["only-if-cached"]; ok {
  191. resp = newGatewayTimeoutResponse(req)
  192. } else {
  193. resp, err = transport.RoundTrip(req)
  194. if err != nil {
  195. return nil, err
  196. }
  197. }
  198. }
  199. if cacheable && canStore(parseCacheControl(req.Header), parseCacheControl(resp.Header)) {
  200. for _, varyKey := range headerAllCommaSepValues(resp.Header, "vary") {
  201. varyKey = http.CanonicalHeaderKey(varyKey)
  202. fakeHeader := "X-Varied-" + varyKey
  203. reqValue := req.Header.Get(varyKey)
  204. if reqValue != "" {
  205. resp.Header.Set(fakeHeader, reqValue)
  206. }
  207. }
  208. switch req.Method {
  209. case "GET":
  210. // Delay caching until EOF is reached.
  211. resp.Body = &cachingReadCloser{
  212. R: resp.Body,
  213. OnEOF: func(r io.Reader) {
  214. resp := *resp
  215. resp.Body = ioutil.NopCloser(r)
  216. respBytes, err := httputil.DumpResponse(&resp, true)
  217. if err == nil {
  218. t.Cache.Set(cacheKey, respBytes)
  219. }
  220. },
  221. }
  222. default:
  223. respBytes, err := httputil.DumpResponse(resp, true)
  224. if err == nil {
  225. t.Cache.Set(cacheKey, respBytes)
  226. }
  227. }
  228. } else {
  229. t.Cache.Delete(cacheKey)
  230. }
  231. return resp, nil
  232. }
  233. // ErrNoDateHeader indicates that the HTTP headers contained no Date header.
  234. var ErrNoDateHeader = errors.New("no Date header")
  235. // Date parses and returns the value of the Date header.
  236. func Date(respHeaders http.Header) (date time.Time, err error) {
  237. dateHeader := respHeaders.Get("date")
  238. if dateHeader == "" {
  239. err = ErrNoDateHeader
  240. return
  241. }
  242. return time.Parse(time.RFC1123, dateHeader)
  243. }
  244. type realClock struct{}
  245. func (c *realClock) since(d time.Time) time.Duration {
  246. return time.Since(d)
  247. }
  248. type timer interface {
  249. since(d time.Time) time.Duration
  250. }
  251. var clock timer = &realClock{}
  252. // getFreshness will return one of fresh/stale/transparent based on the cache-control
  253. // values of the request and the response
  254. //
  255. // fresh indicates the response can be returned
  256. // stale indicates that the response needs validating before it is returned
  257. // transparent indicates the response should not be used to fulfil the request
  258. //
  259. // Because this is only a private cache, 'public' and 'private' in cache-control aren't
  260. // signficant. Similarly, smax-age isn't used.
  261. func getFreshness(respHeaders, reqHeaders http.Header) (freshness int) {
  262. respCacheControl := parseCacheControl(respHeaders)
  263. reqCacheControl := parseCacheControl(reqHeaders)
  264. if _, ok := reqCacheControl["no-cache"]; ok {
  265. return transparent
  266. }
  267. if _, ok := respCacheControl["no-cache"]; ok {
  268. return stale
  269. }
  270. if _, ok := reqCacheControl["only-if-cached"]; ok {
  271. return fresh
  272. }
  273. date, err := Date(respHeaders)
  274. if err != nil {
  275. return stale
  276. }
  277. currentAge := clock.since(date)
  278. var lifetime time.Duration
  279. var zeroDuration time.Duration
  280. // If a response includes both an Expires header and a max-age directive,
  281. // the max-age directive overrides the Expires header, even if the Expires header is more restrictive.
  282. if maxAge, ok := respCacheControl["max-age"]; ok {
  283. lifetime, err = time.ParseDuration(maxAge + "s")
  284. if err != nil {
  285. lifetime = zeroDuration
  286. }
  287. } else {
  288. expiresHeader := respHeaders.Get("Expires")
  289. if expiresHeader != "" {
  290. expires, err := time.Parse(time.RFC1123, expiresHeader)
  291. if err != nil {
  292. lifetime = zeroDuration
  293. } else {
  294. lifetime = expires.Sub(date)
  295. }
  296. }
  297. }
  298. if maxAge, ok := reqCacheControl["max-age"]; ok {
  299. // the client is willing to accept a response whose age is no greater than the specified time in seconds
  300. lifetime, err = time.ParseDuration(maxAge + "s")
  301. if err != nil {
  302. lifetime = zeroDuration
  303. }
  304. }
  305. if minfresh, ok := reqCacheControl["min-fresh"]; ok {
  306. // the client wants a response that will still be fresh for at least the specified number of seconds.
  307. minfreshDuration, err := time.ParseDuration(minfresh + "s")
  308. if err == nil {
  309. currentAge = time.Duration(currentAge + minfreshDuration)
  310. }
  311. }
  312. if maxstale, ok := reqCacheControl["max-stale"]; ok {
  313. // Indicates that the client is willing to accept a response that has exceeded its expiration time.
  314. // If max-stale is assigned a value, then the client is willing to accept a response that has exceeded
  315. // its expiration time by no more than the specified number of seconds.
  316. // If no value is assigned to max-stale, then the client is willing to accept a stale response of any age.
  317. //
  318. // Responses served only because of a max-stale value are supposed to have a Warning header added to them,
  319. // but that seems like a hassle, and is it actually useful? If so, then there needs to be a different
  320. // return-value available here.
  321. if maxstale == "" {
  322. return fresh
  323. }
  324. maxstaleDuration, err := time.ParseDuration(maxstale + "s")
  325. if err == nil {
  326. currentAge = time.Duration(currentAge - maxstaleDuration)
  327. }
  328. }
  329. if lifetime > currentAge {
  330. return fresh
  331. }
  332. return stale
  333. }
  334. // Returns true if either the request or the response includes the stale-if-error
  335. // cache control extension: https://tools.ietf.org/html/rfc5861
  336. func canStaleOnError(respHeaders, reqHeaders http.Header) bool {
  337. respCacheControl := parseCacheControl(respHeaders)
  338. reqCacheControl := parseCacheControl(reqHeaders)
  339. var err error
  340. lifetime := time.Duration(-1)
  341. if staleMaxAge, ok := respCacheControl["stale-if-error"]; ok {
  342. if staleMaxAge != "" {
  343. lifetime, err = time.ParseDuration(staleMaxAge + "s")
  344. if err != nil {
  345. return false
  346. }
  347. } else {
  348. return true
  349. }
  350. }
  351. if staleMaxAge, ok := reqCacheControl["stale-if-error"]; ok {
  352. if staleMaxAge != "" {
  353. lifetime, err = time.ParseDuration(staleMaxAge + "s")
  354. if err != nil {
  355. return false
  356. }
  357. } else {
  358. return true
  359. }
  360. }
  361. if lifetime >= 0 {
  362. date, err := Date(respHeaders)
  363. if err != nil {
  364. return false
  365. }
  366. currentAge := clock.since(date)
  367. if lifetime > currentAge {
  368. return true
  369. }
  370. }
  371. return false
  372. }
  373. func getEndToEndHeaders(respHeaders http.Header) []string {
  374. // These headers are always hop-by-hop
  375. hopByHopHeaders := map[string]struct{}{
  376. "Connection": struct{}{},
  377. "Keep-Alive": struct{}{},
  378. "Proxy-Authenticate": struct{}{},
  379. "Proxy-Authorization": struct{}{},
  380. "Te": struct{}{},
  381. "Trailers": struct{}{},
  382. "Transfer-Encoding": struct{}{},
  383. "Upgrade": struct{}{},
  384. }
  385. for _, extra := range strings.Split(respHeaders.Get("connection"), ",") {
  386. // any header listed in connection, if present, is also considered hop-by-hop
  387. if strings.Trim(extra, " ") != "" {
  388. hopByHopHeaders[http.CanonicalHeaderKey(extra)] = struct{}{}
  389. }
  390. }
  391. endToEndHeaders := []string{}
  392. for respHeader, _ := range respHeaders {
  393. if _, ok := hopByHopHeaders[respHeader]; !ok {
  394. endToEndHeaders = append(endToEndHeaders, respHeader)
  395. }
  396. }
  397. return endToEndHeaders
  398. }
  399. func canStore(reqCacheControl, respCacheControl cacheControl) (canStore bool) {
  400. if _, ok := respCacheControl["no-store"]; ok {
  401. return false
  402. }
  403. if _, ok := reqCacheControl["no-store"]; ok {
  404. return false
  405. }
  406. return true
  407. }
  408. func newGatewayTimeoutResponse(req *http.Request) *http.Response {
  409. var braw bytes.Buffer
  410. braw.WriteString("HTTP/1.1 504 Gateway Timeout\r\n\r\n")
  411. resp, err := http.ReadResponse(bufio.NewReader(&braw), req)
  412. if err != nil {
  413. panic(err)
  414. }
  415. return resp
  416. }
  417. // cloneRequest returns a clone of the provided *http.Request.
  418. // The clone is a shallow copy of the struct and its Header map.
  419. // (This function copyright goauth2 authors: https://code.google.com/p/goauth2)
  420. func cloneRequest(r *http.Request) *http.Request {
  421. // shallow copy of the struct
  422. r2 := new(http.Request)
  423. *r2 = *r
  424. // deep copy of the Header
  425. r2.Header = make(http.Header)
  426. for k, s := range r.Header {
  427. r2.Header[k] = s
  428. }
  429. return r2
  430. }
  431. type cacheControl map[string]string
  432. func parseCacheControl(headers http.Header) cacheControl {
  433. cc := cacheControl{}
  434. ccHeader := headers.Get("Cache-Control")
  435. for _, part := range strings.Split(ccHeader, ",") {
  436. part = strings.Trim(part, " ")
  437. if part == "" {
  438. continue
  439. }
  440. if strings.ContainsRune(part, '=') {
  441. keyval := strings.Split(part, "=")
  442. cc[strings.Trim(keyval[0], " ")] = strings.Trim(keyval[1], ",")
  443. } else {
  444. cc[part] = ""
  445. }
  446. }
  447. return cc
  448. }
  449. // headerAllCommaSepValues returns all comma-separated values (each
  450. // with whitespace trimmed) for header name in headers. According to
  451. // Section 4.2 of the HTTP/1.1 spec
  452. // (http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2),
  453. // values from multiple occurrences of a header should be concatenated, if
  454. // the header's value is a comma-separated list.
  455. func headerAllCommaSepValues(headers http.Header, name string) []string {
  456. var vals []string
  457. for _, val := range headers[http.CanonicalHeaderKey(name)] {
  458. fields := strings.Split(val, ",")
  459. for i, f := range fields {
  460. fields[i] = strings.TrimSpace(f)
  461. }
  462. vals = append(vals, fields...)
  463. }
  464. return vals
  465. }
  466. // cachingReadCloser is a wrapper around ReadCloser R that calls OnEOF
  467. // handler with a full copy of the content read from R when EOF is
  468. // reached.
  469. type cachingReadCloser struct {
  470. // Underlying ReadCloser.
  471. R io.ReadCloser
  472. // OnEOF is called with a copy of the content of R when EOF is reached.
  473. OnEOF func(io.Reader)
  474. buf bytes.Buffer // buf stores a copy of the content of R.
  475. }
  476. // Read reads the next len(p) bytes from R or until R is drained. The
  477. // return value n is the number of bytes read. If R has no data to
  478. // return, err is io.EOF and OnEOF is called with a full copy of what
  479. // has been read so far.
  480. func (r *cachingReadCloser) Read(p []byte) (n int, err error) {
  481. n, err = r.R.Read(p)
  482. r.buf.Write(p[:n])
  483. if err == io.EOF {
  484. r.OnEOF(bytes.NewReader(r.buf.Bytes()))
  485. }
  486. return n, err
  487. }
  488. func (r *cachingReadCloser) Close() error {
  489. return r.R.Close()
  490. }
  491. // NewMemoryCacheTransport returns a new Transport using the in-memory cache implementation
  492. func NewMemoryCacheTransport() *Transport {
  493. c := NewMemoryCache()
  494. t := NewTransport(c)
  495. return t
  496. }