logs.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package logs
  14. import (
  15. "bufio"
  16. "bytes"
  17. "context"
  18. "encoding/json"
  19. "errors"
  20. "fmt"
  21. "io"
  22. "math"
  23. "os"
  24. "path/filepath"
  25. "time"
  26. "github.com/docker/docker/daemon/logger/jsonfilelog/jsonlog"
  27. "github.com/fsnotify/fsnotify"
  28. "k8s.io/klog"
  29. "k8s.io/api/core/v1"
  30. internalapi "k8s.io/cri-api/pkg/apis"
  31. runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
  32. "k8s.io/kubernetes/pkg/util/tail"
  33. )
  34. // Notice that the current CRI logs implementation doesn't handle
  35. // log rotation.
  36. // * It will not retrieve logs in rotated log file.
  37. // * If log rotation happens when following the log:
  38. // * If the rotation is using create mode, we'll still follow the old file.
  39. // * If the rotation is using copytruncate, we'll be reading at the original position and get nothing.
  40. // TODO(random-liu): Support log rotation.
  41. const (
  42. // timeFormat is the time format used in the log.
  43. timeFormat = time.RFC3339Nano
  44. // blockSize is the block size used in tail.
  45. blockSize = 1024
  46. // stateCheckPeriod is the period to check container state while following
  47. // the container log. Kubelet should not keep following the log when the
  48. // container is not running.
  49. stateCheckPeriod = 5 * time.Second
  50. // logForceCheckPeriod is the period to check for a new read
  51. logForceCheckPeriod = 1 * time.Second
  52. )
  53. var (
  54. // eol is the end-of-line sign in the log.
  55. eol = []byte{'\n'}
  56. // delimiter is the delimiter for timestamp and stream type in log line.
  57. delimiter = []byte{' '}
  58. // tagDelimiter is the delimiter for log tags.
  59. tagDelimiter = []byte(runtimeapi.LogTagDelimiter)
  60. )
  61. // logMessage is the CRI internal log type.
  62. type logMessage struct {
  63. timestamp time.Time
  64. stream runtimeapi.LogStreamType
  65. log []byte
  66. }
  67. // reset resets the log to nil.
  68. func (l *logMessage) reset() {
  69. l.timestamp = time.Time{}
  70. l.stream = ""
  71. l.log = nil
  72. }
  73. // LogOptions is the CRI internal type of all log options.
  74. type LogOptions struct {
  75. tail int64
  76. bytes int64
  77. since time.Time
  78. follow bool
  79. timestamp bool
  80. }
  81. // NewLogOptions convert the v1.PodLogOptions to CRI internal LogOptions.
  82. func NewLogOptions(apiOpts *v1.PodLogOptions, now time.Time) *LogOptions {
  83. opts := &LogOptions{
  84. tail: -1, // -1 by default which means read all logs.
  85. bytes: -1, // -1 by default which means read all logs.
  86. follow: apiOpts.Follow,
  87. timestamp: apiOpts.Timestamps,
  88. }
  89. if apiOpts.TailLines != nil {
  90. opts.tail = *apiOpts.TailLines
  91. }
  92. if apiOpts.LimitBytes != nil {
  93. opts.bytes = *apiOpts.LimitBytes
  94. }
  95. if apiOpts.SinceSeconds != nil {
  96. opts.since = now.Add(-time.Duration(*apiOpts.SinceSeconds) * time.Second)
  97. }
  98. if apiOpts.SinceTime != nil && apiOpts.SinceTime.After(opts.since) {
  99. opts.since = apiOpts.SinceTime.Time
  100. }
  101. return opts
  102. }
  103. // parseFunc is a function parsing one log line to the internal log type.
  104. // Notice that the caller must make sure logMessage is not nil.
  105. type parseFunc func([]byte, *logMessage) error
  106. var parseFuncs = []parseFunc{
  107. parseCRILog, // CRI log format parse function
  108. parseDockerJSONLog, // Docker JSON log format parse function
  109. }
  110. // parseCRILog parses logs in CRI log format. CRI Log format example:
  111. // 2016-10-06T00:17:09.669794202Z stdout P log content 1
  112. // 2016-10-06T00:17:09.669794203Z stderr F log content 2
  113. func parseCRILog(log []byte, msg *logMessage) error {
  114. var err error
  115. // Parse timestamp
  116. idx := bytes.Index(log, delimiter)
  117. if idx < 0 {
  118. return fmt.Errorf("timestamp is not found")
  119. }
  120. msg.timestamp, err = time.Parse(timeFormat, string(log[:idx]))
  121. if err != nil {
  122. return fmt.Errorf("unexpected timestamp format %q: %v", timeFormat, err)
  123. }
  124. // Parse stream type
  125. log = log[idx+1:]
  126. idx = bytes.Index(log, delimiter)
  127. if idx < 0 {
  128. return fmt.Errorf("stream type is not found")
  129. }
  130. msg.stream = runtimeapi.LogStreamType(log[:idx])
  131. if msg.stream != runtimeapi.Stdout && msg.stream != runtimeapi.Stderr {
  132. return fmt.Errorf("unexpected stream type %q", msg.stream)
  133. }
  134. // Parse log tag
  135. log = log[idx+1:]
  136. idx = bytes.Index(log, delimiter)
  137. if idx < 0 {
  138. return fmt.Errorf("log tag is not found")
  139. }
  140. // Keep this forward compatible.
  141. tags := bytes.Split(log[:idx], tagDelimiter)
  142. partial := (runtimeapi.LogTag(tags[0]) == runtimeapi.LogTagPartial)
  143. // Trim the tailing new line if this is a partial line.
  144. if partial && len(log) > 0 && log[len(log)-1] == '\n' {
  145. log = log[:len(log)-1]
  146. }
  147. // Get log content
  148. msg.log = log[idx+1:]
  149. return nil
  150. }
  151. // parseDockerJSONLog parses logs in Docker JSON log format. Docker JSON log format
  152. // example:
  153. // {"log":"content 1","stream":"stdout","time":"2016-10-20T18:39:20.57606443Z"}
  154. // {"log":"content 2","stream":"stderr","time":"2016-10-20T18:39:20.57606444Z"}
  155. func parseDockerJSONLog(log []byte, msg *logMessage) error {
  156. var l = &jsonlog.JSONLog{}
  157. l.Reset()
  158. // TODO: JSON decoding is fairly expensive, we should evaluate this.
  159. if err := json.Unmarshal(log, l); err != nil {
  160. return fmt.Errorf("failed with %v to unmarshal log %q", err, l)
  161. }
  162. msg.timestamp = l.Created
  163. msg.stream = runtimeapi.LogStreamType(l.Stream)
  164. msg.log = []byte(l.Log)
  165. return nil
  166. }
  167. // getParseFunc returns proper parse function based on the sample log line passed in.
  168. func getParseFunc(log []byte) (parseFunc, error) {
  169. for _, p := range parseFuncs {
  170. if err := p(log, &logMessage{}); err == nil {
  171. return p, nil
  172. }
  173. }
  174. return nil, fmt.Errorf("unsupported log format: %q", log)
  175. }
  176. // logWriter controls the writing into the stream based on the log options.
  177. type logWriter struct {
  178. stdout io.Writer
  179. stderr io.Writer
  180. opts *LogOptions
  181. remain int64
  182. }
  183. // errMaximumWrite is returned when all bytes have been written.
  184. var errMaximumWrite = errors.New("maximum write")
  185. // errShortWrite is returned when the message is not fully written.
  186. var errShortWrite = errors.New("short write")
  187. func newLogWriter(stdout io.Writer, stderr io.Writer, opts *LogOptions) *logWriter {
  188. w := &logWriter{
  189. stdout: stdout,
  190. stderr: stderr,
  191. opts: opts,
  192. remain: math.MaxInt64, // initialize it as infinity
  193. }
  194. if opts.bytes >= 0 {
  195. w.remain = opts.bytes
  196. }
  197. return w
  198. }
  199. // writeLogs writes logs into stdout, stderr.
  200. func (w *logWriter) write(msg *logMessage) error {
  201. if msg.timestamp.Before(w.opts.since) {
  202. // Skip the line because it's older than since
  203. return nil
  204. }
  205. line := msg.log
  206. if w.opts.timestamp {
  207. prefix := append([]byte(msg.timestamp.Format(timeFormat)), delimiter[0])
  208. line = append(prefix, line...)
  209. }
  210. // If the line is longer than the remaining bytes, cut it.
  211. if int64(len(line)) > w.remain {
  212. line = line[:w.remain]
  213. }
  214. // Get the proper stream to write to.
  215. var stream io.Writer
  216. switch msg.stream {
  217. case runtimeapi.Stdout:
  218. stream = w.stdout
  219. case runtimeapi.Stderr:
  220. stream = w.stderr
  221. default:
  222. return fmt.Errorf("unexpected stream type %q", msg.stream)
  223. }
  224. n, err := stream.Write(line)
  225. w.remain -= int64(n)
  226. if err != nil {
  227. return err
  228. }
  229. // If the line has not been fully written, return errShortWrite
  230. if n < len(line) {
  231. return errShortWrite
  232. }
  233. // If there are no more bytes left, return errMaximumWrite
  234. if w.remain <= 0 {
  235. return errMaximumWrite
  236. }
  237. return nil
  238. }
  239. // ReadLogs read the container log and redirect into stdout and stderr.
  240. // Note that containerID is only needed when following the log, or else
  241. // just pass in empty string "".
  242. func ReadLogs(ctx context.Context, path, containerID string, opts *LogOptions, runtimeService internalapi.RuntimeService, stdout, stderr io.Writer) error {
  243. // fsnotify has different behavior for symlinks in different platform,
  244. // for example it follows symlink on Linux, but not on Windows,
  245. // so we explicitly resolve symlinks before reading the logs.
  246. // There shouldn't be security issue because the container log
  247. // path is owned by kubelet and the container runtime.
  248. evaluated, err := filepath.EvalSymlinks(path)
  249. if err != nil {
  250. return fmt.Errorf("failed to try resolving symlinks in path %q: %v", path, err)
  251. }
  252. path = evaluated
  253. f, err := os.Open(path)
  254. if err != nil {
  255. return fmt.Errorf("failed to open log file %q: %v", path, err)
  256. }
  257. defer f.Close()
  258. // Search start point based on tail line.
  259. start, err := tail.FindTailLineStartIndex(f, opts.tail)
  260. if err != nil {
  261. return fmt.Errorf("failed to tail %d lines of log file %q: %v", opts.tail, path, err)
  262. }
  263. if _, err := f.Seek(start, io.SeekStart); err != nil {
  264. return fmt.Errorf("failed to seek %d in log file %q: %v", start, path, err)
  265. }
  266. // Start parsing the logs.
  267. r := bufio.NewReader(f)
  268. // Do not create watcher here because it is not needed if `Follow` is false.
  269. var watcher *fsnotify.Watcher
  270. var parse parseFunc
  271. var stop bool
  272. found := true
  273. writer := newLogWriter(stdout, stderr, opts)
  274. msg := &logMessage{}
  275. for {
  276. if stop {
  277. klog.V(2).Infof("Finish parsing log file %q", path)
  278. return nil
  279. }
  280. l, err := r.ReadBytes(eol[0])
  281. if err != nil {
  282. if err != io.EOF { // This is an real error
  283. return fmt.Errorf("failed to read log file %q: %v", path, err)
  284. }
  285. if opts.follow {
  286. // The container is not running, we got to the end of the log.
  287. if !found {
  288. return nil
  289. }
  290. // Reset seek so that if this is an incomplete line,
  291. // it will be read again.
  292. if _, err := f.Seek(-int64(len(l)), io.SeekCurrent); err != nil {
  293. return fmt.Errorf("failed to reset seek in log file %q: %v", path, err)
  294. }
  295. if watcher == nil {
  296. // Initialize the watcher if it has not been initialized yet.
  297. if watcher, err = fsnotify.NewWatcher(); err != nil {
  298. return fmt.Errorf("failed to create fsnotify watcher: %v", err)
  299. }
  300. defer watcher.Close()
  301. if err := watcher.Add(f.Name()); err != nil {
  302. return fmt.Errorf("failed to watch file %q: %v", f.Name(), err)
  303. }
  304. // If we just created the watcher, try again to read as we might have missed
  305. // the event.
  306. continue
  307. }
  308. var recreated bool
  309. // Wait until the next log change.
  310. found, recreated, err = waitLogs(ctx, containerID, watcher, runtimeService)
  311. if err != nil {
  312. return err
  313. }
  314. if recreated {
  315. newF, err := os.Open(path)
  316. if err != nil {
  317. if os.IsNotExist(err) {
  318. continue
  319. }
  320. return fmt.Errorf("failed to open log file %q: %v", path, err)
  321. }
  322. f.Close()
  323. if err := watcher.Remove(f.Name()); err != nil && !os.IsNotExist(err) {
  324. klog.Errorf("failed to remove file watch %q: %v", f.Name(), err)
  325. }
  326. f = newF
  327. if err := watcher.Add(f.Name()); err != nil {
  328. return fmt.Errorf("failed to watch file %q: %v", f.Name(), err)
  329. }
  330. r = bufio.NewReader(f)
  331. }
  332. // If the container exited consume data until the next EOF
  333. continue
  334. }
  335. // Should stop after writing the remaining content.
  336. stop = true
  337. if len(l) == 0 {
  338. continue
  339. }
  340. klog.Warningf("Incomplete line in log file %q: %q", path, l)
  341. }
  342. if parse == nil {
  343. // Initialize the log parsing function.
  344. parse, err = getParseFunc(l)
  345. if err != nil {
  346. return fmt.Errorf("failed to get parse function: %v", err)
  347. }
  348. }
  349. // Parse the log line.
  350. msg.reset()
  351. if err := parse(l, msg); err != nil {
  352. klog.Errorf("Failed with err %v when parsing log for log file %q: %q", err, path, l)
  353. continue
  354. }
  355. // Write the log line into the stream.
  356. if err := writer.write(msg); err != nil {
  357. if err == errMaximumWrite {
  358. klog.V(2).Infof("Finish parsing log file %q, hit bytes limit %d(bytes)", path, opts.bytes)
  359. return nil
  360. }
  361. klog.Errorf("Failed with err %v when writing log for log file %q: %+v", err, path, msg)
  362. return err
  363. }
  364. }
  365. }
  366. func isContainerRunning(id string, r internalapi.RuntimeService) (bool, error) {
  367. s, err := r.ContainerStatus(id)
  368. if err != nil {
  369. return false, err
  370. }
  371. // Only keep following container log when it is running.
  372. if s.State != runtimeapi.ContainerState_CONTAINER_RUNNING {
  373. klog.V(5).Infof("Container %q is not running (state=%q)", id, s.State)
  374. // Do not return error because it's normal that the container stops
  375. // during waiting.
  376. return false, nil
  377. }
  378. return true, nil
  379. }
  380. // waitLogs wait for the next log write. It returns two booleans and an error. The first boolean
  381. // indicates whether a new log is found; the second boolean if the log file was recreated;
  382. // the error is error happens during waiting new logs.
  383. func waitLogs(ctx context.Context, id string, w *fsnotify.Watcher, runtimeService internalapi.RuntimeService) (bool, bool, error) {
  384. // no need to wait if the pod is not running
  385. if running, err := isContainerRunning(id, runtimeService); !running {
  386. return false, false, err
  387. }
  388. errRetry := 5
  389. for {
  390. select {
  391. case <-ctx.Done():
  392. return false, false, fmt.Errorf("context cancelled")
  393. case e := <-w.Events:
  394. switch e.Op {
  395. case fsnotify.Write:
  396. return true, false, nil
  397. case fsnotify.Create:
  398. fallthrough
  399. case fsnotify.Rename:
  400. fallthrough
  401. case fsnotify.Remove:
  402. fallthrough
  403. case fsnotify.Chmod:
  404. return true, true, nil
  405. default:
  406. klog.Errorf("Unexpected fsnotify event: %v, retrying...", e)
  407. }
  408. case err := <-w.Errors:
  409. klog.Errorf("Fsnotify watch error: %v, %d error retries remaining", err, errRetry)
  410. if errRetry == 0 {
  411. return false, false, err
  412. }
  413. errRetry--
  414. case <-time.After(logForceCheckPeriod):
  415. return true, false, nil
  416. case <-time.After(stateCheckPeriod):
  417. if running, err := isContainerRunning(id, runtimeService); !running {
  418. return false, false, err
  419. }
  420. }
  421. }
  422. }