logs.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. /*
  2. Copyright 2017 The Kubernetes Authors.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. */
  13. package logs
  14. import (
  15. "bufio"
  16. "bytes"
  17. "context"
  18. "encoding/json"
  19. "errors"
  20. "fmt"
  21. "io"
  22. "math"
  23. "os"
  24. "path/filepath"
  25. "time"
  26. "github.com/docker/docker/daemon/logger/jsonfilelog/jsonlog"
  27. "github.com/fsnotify/fsnotify"
  28. "k8s.io/klog"
  29. "k8s.io/api/core/v1"
  30. internalapi "k8s.io/cri-api/pkg/apis"
  31. runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
  32. "k8s.io/kubernetes/pkg/util/tail"
  33. )
  34. // Notice that the current CRI logs implementation doesn't handle
  35. // log rotation.
  36. // * It will not retrieve logs in rotated log file.
  37. // * If log rotation happens when following the log:
  38. // * If the rotation is using create mode, we'll still follow the old file.
  39. // * If the rotation is using copytruncate, we'll be reading at the original position and get nothing.
  40. // TODO(random-liu): Support log rotation.
  41. const (
  42. // timeFormat is the time format used in the log.
  43. timeFormat = time.RFC3339Nano
  44. // logForceCheckPeriod is the period to check for a new read
  45. logForceCheckPeriod = 1 * time.Second
  46. )
  47. var (
  48. // eol is the end-of-line sign in the log.
  49. eol = []byte{'\n'}
  50. // delimiter is the delimiter for timestamp and stream type in log line.
  51. delimiter = []byte{' '}
  52. // tagDelimiter is the delimiter for log tags.
  53. tagDelimiter = []byte(runtimeapi.LogTagDelimiter)
  54. )
  55. // logMessage is the CRI internal log type.
  56. type logMessage struct {
  57. timestamp time.Time
  58. stream runtimeapi.LogStreamType
  59. log []byte
  60. }
  61. // reset resets the log to nil.
  62. func (l *logMessage) reset() {
  63. l.timestamp = time.Time{}
  64. l.stream = ""
  65. l.log = nil
  66. }
  67. // LogOptions is the CRI internal type of all log options.
  68. type LogOptions struct {
  69. tail int64
  70. bytes int64
  71. since time.Time
  72. follow bool
  73. timestamp bool
  74. }
  75. // NewLogOptions convert the v1.PodLogOptions to CRI internal LogOptions.
  76. func NewLogOptions(apiOpts *v1.PodLogOptions, now time.Time) *LogOptions {
  77. opts := &LogOptions{
  78. tail: -1, // -1 by default which means read all logs.
  79. bytes: -1, // -1 by default which means read all logs.
  80. follow: apiOpts.Follow,
  81. timestamp: apiOpts.Timestamps,
  82. }
  83. if apiOpts.TailLines != nil {
  84. opts.tail = *apiOpts.TailLines
  85. }
  86. if apiOpts.LimitBytes != nil {
  87. opts.bytes = *apiOpts.LimitBytes
  88. }
  89. if apiOpts.SinceSeconds != nil {
  90. opts.since = now.Add(-time.Duration(*apiOpts.SinceSeconds) * time.Second)
  91. }
  92. if apiOpts.SinceTime != nil && apiOpts.SinceTime.After(opts.since) {
  93. opts.since = apiOpts.SinceTime.Time
  94. }
  95. return opts
  96. }
  97. // parseFunc is a function parsing one log line to the internal log type.
  98. // Notice that the caller must make sure logMessage is not nil.
  99. type parseFunc func([]byte, *logMessage) error
  100. var parseFuncs = []parseFunc{
  101. parseCRILog, // CRI log format parse function
  102. parseDockerJSONLog, // Docker JSON log format parse function
  103. }
  104. // parseCRILog parses logs in CRI log format. CRI Log format example:
  105. // 2016-10-06T00:17:09.669794202Z stdout P log content 1
  106. // 2016-10-06T00:17:09.669794203Z stderr F log content 2
  107. func parseCRILog(log []byte, msg *logMessage) error {
  108. var err error
  109. // Parse timestamp
  110. idx := bytes.Index(log, delimiter)
  111. if idx < 0 {
  112. return fmt.Errorf("timestamp is not found")
  113. }
  114. msg.timestamp, err = time.Parse(timeFormat, string(log[:idx]))
  115. if err != nil {
  116. return fmt.Errorf("unexpected timestamp format %q: %v", timeFormat, err)
  117. }
  118. // Parse stream type
  119. log = log[idx+1:]
  120. idx = bytes.Index(log, delimiter)
  121. if idx < 0 {
  122. return fmt.Errorf("stream type is not found")
  123. }
  124. msg.stream = runtimeapi.LogStreamType(log[:idx])
  125. if msg.stream != runtimeapi.Stdout && msg.stream != runtimeapi.Stderr {
  126. return fmt.Errorf("unexpected stream type %q", msg.stream)
  127. }
  128. // Parse log tag
  129. log = log[idx+1:]
  130. idx = bytes.Index(log, delimiter)
  131. if idx < 0 {
  132. return fmt.Errorf("log tag is not found")
  133. }
  134. // Keep this forward compatible.
  135. tags := bytes.Split(log[:idx], tagDelimiter)
  136. partial := (runtimeapi.LogTag(tags[0]) == runtimeapi.LogTagPartial)
  137. // Trim the tailing new line if this is a partial line.
  138. if partial && len(log) > 0 && log[len(log)-1] == '\n' {
  139. log = log[:len(log)-1]
  140. }
  141. // Get log content
  142. msg.log = log[idx+1:]
  143. return nil
  144. }
  145. // parseDockerJSONLog parses logs in Docker JSON log format. Docker JSON log format
  146. // example:
  147. // {"log":"content 1","stream":"stdout","time":"2016-10-20T18:39:20.57606443Z"}
  148. // {"log":"content 2","stream":"stderr","time":"2016-10-20T18:39:20.57606444Z"}
  149. func parseDockerJSONLog(log []byte, msg *logMessage) error {
  150. var l = &jsonlog.JSONLog{}
  151. l.Reset()
  152. // TODO: JSON decoding is fairly expensive, we should evaluate this.
  153. if err := json.Unmarshal(log, l); err != nil {
  154. return fmt.Errorf("failed with %v to unmarshal log %q", err, l)
  155. }
  156. msg.timestamp = l.Created
  157. msg.stream = runtimeapi.LogStreamType(l.Stream)
  158. msg.log = []byte(l.Log)
  159. return nil
  160. }
  161. // getParseFunc returns proper parse function based on the sample log line passed in.
  162. func getParseFunc(log []byte) (parseFunc, error) {
  163. for _, p := range parseFuncs {
  164. if err := p(log, &logMessage{}); err == nil {
  165. return p, nil
  166. }
  167. }
  168. return nil, fmt.Errorf("unsupported log format: %q", log)
  169. }
  170. // logWriter controls the writing into the stream based on the log options.
  171. type logWriter struct {
  172. stdout io.Writer
  173. stderr io.Writer
  174. opts *LogOptions
  175. remain int64
  176. }
  177. // errMaximumWrite is returned when all bytes have been written.
  178. var errMaximumWrite = errors.New("maximum write")
  179. // errShortWrite is returned when the message is not fully written.
  180. var errShortWrite = errors.New("short write")
  181. func newLogWriter(stdout io.Writer, stderr io.Writer, opts *LogOptions) *logWriter {
  182. w := &logWriter{
  183. stdout: stdout,
  184. stderr: stderr,
  185. opts: opts,
  186. remain: math.MaxInt64, // initialize it as infinity
  187. }
  188. if opts.bytes >= 0 {
  189. w.remain = opts.bytes
  190. }
  191. return w
  192. }
  193. // writeLogs writes logs into stdout, stderr.
  194. func (w *logWriter) write(msg *logMessage) error {
  195. if msg.timestamp.Before(w.opts.since) {
  196. // Skip the line because it's older than since
  197. return nil
  198. }
  199. line := msg.log
  200. if w.opts.timestamp {
  201. prefix := append([]byte(msg.timestamp.Format(timeFormat)), delimiter[0])
  202. line = append(prefix, line...)
  203. }
  204. // If the line is longer than the remaining bytes, cut it.
  205. if int64(len(line)) > w.remain {
  206. line = line[:w.remain]
  207. }
  208. // Get the proper stream to write to.
  209. var stream io.Writer
  210. switch msg.stream {
  211. case runtimeapi.Stdout:
  212. stream = w.stdout
  213. case runtimeapi.Stderr:
  214. stream = w.stderr
  215. default:
  216. return fmt.Errorf("unexpected stream type %q", msg.stream)
  217. }
  218. n, err := stream.Write(line)
  219. w.remain -= int64(n)
  220. if err != nil {
  221. return err
  222. }
  223. // If the line has not been fully written, return errShortWrite
  224. if n < len(line) {
  225. return errShortWrite
  226. }
  227. // If there are no more bytes left, return errMaximumWrite
  228. if w.remain <= 0 {
  229. return errMaximumWrite
  230. }
  231. return nil
  232. }
  233. // ReadLogs read the container log and redirect into stdout and stderr.
  234. // Note that containerID is only needed when following the log, or else
  235. // just pass in empty string "".
  236. func ReadLogs(ctx context.Context, path, containerID string, opts *LogOptions, runtimeService internalapi.RuntimeService, stdout, stderr io.Writer) error {
  237. // fsnotify has different behavior for symlinks in different platform,
  238. // for example it follows symlink on Linux, but not on Windows,
  239. // so we explicitly resolve symlinks before reading the logs.
  240. // There shouldn't be security issue because the container log
  241. // path is owned by kubelet and the container runtime.
  242. evaluated, err := filepath.EvalSymlinks(path)
  243. if err != nil {
  244. return fmt.Errorf("failed to try resolving symlinks in path %q: %v", path, err)
  245. }
  246. path = evaluated
  247. f, err := os.Open(path)
  248. if err != nil {
  249. return fmt.Errorf("failed to open log file %q: %v", path, err)
  250. }
  251. defer f.Close()
  252. // Search start point based on tail line.
  253. start, err := tail.FindTailLineStartIndex(f, opts.tail)
  254. if err != nil {
  255. return fmt.Errorf("failed to tail %d lines of log file %q: %v", opts.tail, path, err)
  256. }
  257. if _, err := f.Seek(start, io.SeekStart); err != nil {
  258. return fmt.Errorf("failed to seek %d in log file %q: %v", start, path, err)
  259. }
  260. // Start parsing the logs.
  261. r := bufio.NewReader(f)
  262. // Do not create watcher here because it is not needed if `Follow` is false.
  263. var watcher *fsnotify.Watcher
  264. var parse parseFunc
  265. var stop bool
  266. found := true
  267. writer := newLogWriter(stdout, stderr, opts)
  268. msg := &logMessage{}
  269. for {
  270. if stop {
  271. klog.V(2).Infof("Finish parsing log file %q", path)
  272. return nil
  273. }
  274. l, err := r.ReadBytes(eol[0])
  275. if err != nil {
  276. if err != io.EOF { // This is an real error
  277. return fmt.Errorf("failed to read log file %q: %v", path, err)
  278. }
  279. if opts.follow {
  280. // The container is not running, we got to the end of the log.
  281. if !found {
  282. return nil
  283. }
  284. // Reset seek so that if this is an incomplete line,
  285. // it will be read again.
  286. if _, err := f.Seek(-int64(len(l)), io.SeekCurrent); err != nil {
  287. return fmt.Errorf("failed to reset seek in log file %q: %v", path, err)
  288. }
  289. if watcher == nil {
  290. // Initialize the watcher if it has not been initialized yet.
  291. if watcher, err = fsnotify.NewWatcher(); err != nil {
  292. return fmt.Errorf("failed to create fsnotify watcher: %v", err)
  293. }
  294. defer watcher.Close()
  295. if err := watcher.Add(f.Name()); err != nil {
  296. return fmt.Errorf("failed to watch file %q: %v", f.Name(), err)
  297. }
  298. // If we just created the watcher, try again to read as we might have missed
  299. // the event.
  300. continue
  301. }
  302. var recreated bool
  303. // Wait until the next log change.
  304. found, recreated, err = waitLogs(ctx, containerID, watcher, runtimeService)
  305. if err != nil {
  306. return err
  307. }
  308. if recreated {
  309. newF, err := os.Open(path)
  310. if err != nil {
  311. if os.IsNotExist(err) {
  312. continue
  313. }
  314. return fmt.Errorf("failed to open log file %q: %v", path, err)
  315. }
  316. f.Close()
  317. if err := watcher.Remove(f.Name()); err != nil && !os.IsNotExist(err) {
  318. klog.Errorf("failed to remove file watch %q: %v", f.Name(), err)
  319. }
  320. f = newF
  321. if err := watcher.Add(f.Name()); err != nil {
  322. return fmt.Errorf("failed to watch file %q: %v", f.Name(), err)
  323. }
  324. r = bufio.NewReader(f)
  325. }
  326. // If the container exited consume data until the next EOF
  327. continue
  328. }
  329. // Should stop after writing the remaining content.
  330. stop = true
  331. if len(l) == 0 {
  332. continue
  333. }
  334. klog.Warningf("Incomplete line in log file %q: %q", path, l)
  335. }
  336. if parse == nil {
  337. // Initialize the log parsing function.
  338. parse, err = getParseFunc(l)
  339. if err != nil {
  340. return fmt.Errorf("failed to get parse function: %v", err)
  341. }
  342. }
  343. // Parse the log line.
  344. msg.reset()
  345. if err := parse(l, msg); err != nil {
  346. klog.Errorf("Failed with err %v when parsing log for log file %q: %q", err, path, l)
  347. continue
  348. }
  349. // Write the log line into the stream.
  350. if err := writer.write(msg); err != nil {
  351. if err == errMaximumWrite {
  352. klog.V(2).Infof("Finish parsing log file %q, hit bytes limit %d(bytes)", path, opts.bytes)
  353. return nil
  354. }
  355. klog.Errorf("Failed with err %v when writing log for log file %q: %+v", err, path, msg)
  356. return err
  357. }
  358. }
  359. }
  360. func isContainerRunning(id string, r internalapi.RuntimeService) (bool, error) {
  361. s, err := r.ContainerStatus(id)
  362. if err != nil {
  363. return false, err
  364. }
  365. // Only keep following container log when it is running.
  366. if s.State != runtimeapi.ContainerState_CONTAINER_RUNNING {
  367. klog.V(5).Infof("Container %q is not running (state=%q)", id, s.State)
  368. // Do not return error because it's normal that the container stops
  369. // during waiting.
  370. return false, nil
  371. }
  372. return true, nil
  373. }
  374. // waitLogs wait for the next log write. It returns two booleans and an error. The first boolean
  375. // indicates whether a new log is found; the second boolean if the log file was recreated;
  376. // the error is error happens during waiting new logs.
  377. func waitLogs(ctx context.Context, id string, w *fsnotify.Watcher, runtimeService internalapi.RuntimeService) (bool, bool, error) {
  378. // no need to wait if the pod is not running
  379. if running, err := isContainerRunning(id, runtimeService); !running {
  380. return false, false, err
  381. }
  382. errRetry := 5
  383. for {
  384. select {
  385. case <-ctx.Done():
  386. return false, false, fmt.Errorf("context cancelled")
  387. case e := <-w.Events:
  388. switch e.Op {
  389. case fsnotify.Write:
  390. return true, false, nil
  391. case fsnotify.Create:
  392. fallthrough
  393. case fsnotify.Rename:
  394. fallthrough
  395. case fsnotify.Remove:
  396. fallthrough
  397. case fsnotify.Chmod:
  398. return true, true, nil
  399. default:
  400. klog.Errorf("Unexpected fsnotify event: %v, retrying...", e)
  401. }
  402. case err := <-w.Errors:
  403. klog.Errorf("Fsnotify watch error: %v, %d error retries remaining", err, errRetry)
  404. if errRetry == 0 {
  405. return false, false, err
  406. }
  407. errRetry--
  408. case <-time.After(logForceCheckPeriod):
  409. return true, false, nil
  410. }
  411. }
  412. }