lexer.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. // Copyright 2015 Light Code Labs, LLC
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package caddyfile
  15. import (
  16. "bufio"
  17. "io"
  18. "unicode"
  19. )
  20. type (
  21. // lexer is a utility which can get values, token by
  22. // token, from a Reader. A token is a word, and tokens
  23. // are separated by whitespace. A word can be enclosed
  24. // in quotes if it contains whitespace.
  25. lexer struct {
  26. reader *bufio.Reader
  27. token Token
  28. line int
  29. }
  30. // Token represents a single parsable unit.
  31. Token struct {
  32. File string
  33. Line int
  34. Text string
  35. }
  36. )
  37. // load prepares the lexer to scan an input for tokens.
  38. // It discards any leading byte order mark.
  39. func (l *lexer) load(input io.Reader) error {
  40. l.reader = bufio.NewReader(input)
  41. l.line = 1
  42. // discard byte order mark, if present
  43. firstCh, _, err := l.reader.ReadRune()
  44. if err != nil {
  45. return err
  46. }
  47. if firstCh != 0xFEFF {
  48. err := l.reader.UnreadRune()
  49. if err != nil {
  50. return err
  51. }
  52. }
  53. return nil
  54. }
  55. // next loads the next token into the lexer.
  56. // A token is delimited by whitespace, unless
  57. // the token starts with a quotes character (")
  58. // in which case the token goes until the closing
  59. // quotes (the enclosing quotes are not included).
  60. // Inside quoted strings, quotes may be escaped
  61. // with a preceding \ character. No other chars
  62. // may be escaped. The rest of the line is skipped
  63. // if a "#" character is read in. Returns true if
  64. // a token was loaded; false otherwise.
  65. func (l *lexer) next() bool {
  66. var val []rune
  67. var comment, quoted, escaped bool
  68. makeToken := func() bool {
  69. l.token.Text = string(val)
  70. return true
  71. }
  72. for {
  73. ch, _, err := l.reader.ReadRune()
  74. if err != nil {
  75. if len(val) > 0 {
  76. return makeToken()
  77. }
  78. if err == io.EOF {
  79. return false
  80. }
  81. panic(err)
  82. }
  83. if quoted {
  84. if !escaped {
  85. if ch == '\\' {
  86. escaped = true
  87. continue
  88. } else if ch == '"' {
  89. quoted = false
  90. return makeToken()
  91. }
  92. }
  93. if ch == '\n' {
  94. l.line++
  95. }
  96. if escaped {
  97. // only escape quotes
  98. if ch != '"' {
  99. val = append(val, '\\')
  100. }
  101. }
  102. val = append(val, ch)
  103. escaped = false
  104. continue
  105. }
  106. if unicode.IsSpace(ch) {
  107. if ch == '\r' {
  108. continue
  109. }
  110. if ch == '\n' {
  111. l.line++
  112. comment = false
  113. }
  114. if len(val) > 0 {
  115. return makeToken()
  116. }
  117. continue
  118. }
  119. if ch == '#' {
  120. comment = true
  121. }
  122. if comment {
  123. continue
  124. }
  125. if len(val) == 0 {
  126. l.token = Token{Line: l.line}
  127. if ch == '"' {
  128. quoted = true
  129. continue
  130. }
  131. }
  132. val = append(val, ch)
  133. }
  134. }