stream.go 11 KB


  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "errors"
  8. "io"
  9. )
  10. // A Decoder reads and decodes JSON objects from an input stream.
  11. type Decoder struct {
  12. r io.Reader
  13. buf []byte
  14. d decodeState
  15. scanp int // start of unread data in buf
  16. scan scanner
  17. err error
  18. tokenState int
  19. tokenStack []int
  20. }
  21. // NewDecoder returns a new decoder that reads from r.
  22. //
  23. // The decoder introduces its own buffering and may
  24. // read data from r beyond the JSON values requested.
  25. func NewDecoder(r io.Reader) *Decoder {
  26. return &Decoder{r: r}
  27. }
  28. // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
  29. // Number instead of as a float64.
  30. func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
  31. // Decode reads the next JSON-encoded value from its
  32. // input and stores it in the value pointed to by v.
  33. //
  34. // See the documentation for Unmarshal for details about
  35. // the conversion of JSON into a Go value.
  36. func (dec *Decoder) Decode(v interface{}) error {
  37. if dec.err != nil {
  38. return dec.err
  39. }
  40. if err := dec.tokenPrepareForDecode(); err != nil {
  41. return err
  42. }
  43. if !dec.tokenValueAllowed() {
  44. return &SyntaxError{msg: "not at beginning of value"}
  45. }
  46. // Read whole value into buffer.
  47. n, err := dec.readValue()
  48. if err != nil {
  49. return err
  50. }
  51. dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
  52. dec.scanp += n
  53. // Don't save err from unmarshal into dec.err:
  54. // the connection is still usable since we read a complete JSON
  55. // object from it before the error happened.
  56. err = dec.d.unmarshal(v)
  57. // fixup token streaming state
  58. dec.tokenValueEnd()
  59. return err
  60. }
  61. // Buffered returns a reader of the data remaining in the Decoder's
  62. // buffer. The reader is valid until the next call to Decode.
  63. func (dec *Decoder) Buffered() io.Reader {
  64. return bytes.NewReader(dec.buf[dec.scanp:])
  65. }
  66. // readValue reads a JSON value into dec.buf.
  67. // It returns the length of the encoding.
  68. func (dec *Decoder) readValue() (int, error) {
  69. dec.scan.reset()
  70. scanp := dec.scanp
  71. var err error
  72. Input:
  73. for {
  74. // Look in the buffer for a new value.
  75. for i, c := range dec.buf[scanp:] {
  76. dec.scan.bytes++
  77. v := dec.scan.step(&dec.scan, c)
  78. if v == scanEnd {
  79. scanp += i
  80. break Input
  81. }
  82. // scanEnd is delayed one byte.
  83. // We might block trying to get that byte from src,
  84. // so instead invent a space byte.
  85. if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
  86. scanp += i + 1
  87. break Input
  88. }
  89. if v == scanError {
  90. dec.err = dec.scan.err
  91. return 0, dec.scan.err
  92. }
  93. }
  94. scanp = len(dec.buf)
  95. // Did the last read have an error?
  96. // Delayed until now to allow buffer scan.
  97. if err != nil {
  98. if err == io.EOF {
  99. if dec.scan.step(&dec.scan, ' ') == scanEnd {
  100. break Input
  101. }
  102. if nonSpace(dec.buf) {
  103. err = io.ErrUnexpectedEOF
  104. }
  105. }
  106. dec.err = err
  107. return 0, err
  108. }
  109. n := scanp - dec.scanp
  110. err = dec.refill()
  111. scanp = dec.scanp + n
  112. }
  113. return scanp - dec.scanp, nil
  114. }
  115. func (dec *Decoder) refill() error {
  116. // Make room to read more into the buffer.
  117. // First slide down data already consumed.
  118. if dec.scanp > 0 {
  119. n := copy(dec.buf, dec.buf[dec.scanp:])
  120. dec.buf = dec.buf[:n]
  121. dec.scanp = 0
  122. }
  123. // Grow buffer if not large enough.
  124. const minRead = 512
  125. if cap(dec.buf)-len(dec.buf) < minRead {
  126. newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
  127. copy(newBuf, dec.buf)
  128. dec.buf = newBuf
  129. }
  130. // Read. Delay error for next iteration (after scan).
  131. n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
  132. dec.buf = dec.buf[0 : len(dec.buf)+n]
  133. return err
  134. }
  135. func nonSpace(b []byte) bool {
  136. for _, c := range b {
  137. if !isSpace(c) {
  138. return true
  139. }
  140. }
  141. return false
  142. }
  143. // An Encoder writes JSON objects to an output stream.
  144. type Encoder struct {
  145. w io.Writer
  146. err error
  147. }
  148. // NewEncoder returns a new encoder that writes to w.
  149. func NewEncoder(w io.Writer) *Encoder {
  150. return &Encoder{w: w}
  151. }
  152. // Encode writes the JSON encoding of v to the stream,
  153. // followed by a newline character.
  154. //
  155. // See the documentation for Marshal for details about the
  156. // conversion of Go values to JSON.
  157. func (enc *Encoder) Encode(v interface{}) error {
  158. if enc.err != nil {
  159. return enc.err
  160. }
  161. e := newEncodeState()
  162. err := e.marshal(v)
  163. if err != nil {
  164. return err
  165. }
  166. // Terminate each value with a newline.
  167. // This makes the output look a little nicer
  168. // when debugging, and some kind of space
  169. // is required if the encoded value was a number,
  170. // so that the reader knows there aren't more
  171. // digits coming.
  172. e.WriteByte('\n')
  173. if _, err = enc.w.Write(e.Bytes()); err != nil {
  174. enc.err = err
  175. }
  176. encodeStatePool.Put(e)
  177. return err
  178. }
  179. // RawMessage is a raw encoded JSON object.
  180. // It implements Marshaler and Unmarshaler and can
  181. // be used to delay JSON decoding or precompute a JSON encoding.
  182. type RawMessage []byte
  183. // MarshalJSON returns *m as the JSON encoding of m.
  184. func (m *RawMessage) MarshalJSON() ([]byte, error) {
  185. return *m, nil
  186. }
  187. // UnmarshalJSON sets *m to a copy of data.
  188. func (m *RawMessage) UnmarshalJSON(data []byte) error {
  189. if m == nil {
  190. return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
  191. }
  192. *m = append((*m)[0:0], data...)
  193. return nil
  194. }
  195. var _ Marshaler = (*RawMessage)(nil)
  196. var _ Unmarshaler = (*RawMessage)(nil)
  197. // A Token holds a value of one of these types:
  198. //
  199. // Delim, for the four JSON delimiters [ ] { }
  200. // bool, for JSON booleans
  201. // float64, for JSON numbers
  202. // Number, for JSON numbers
  203. // string, for JSON string literals
  204. // nil, for JSON null
  205. //
  206. type Token interface{}
  207. const (
  208. tokenTopValue = iota
  209. tokenArrayStart
  210. tokenArrayValue
  211. tokenArrayComma
  212. tokenObjectStart
  213. tokenObjectKey
  214. tokenObjectColon
  215. tokenObjectValue
  216. tokenObjectComma
  217. )
  218. // advance tokenstate from a separator state to a value state
  219. func (dec *Decoder) tokenPrepareForDecode() error {
  220. // Note: Not calling peek before switch, to avoid
  221. // putting peek into the standard Decode path.
  222. // peek is only called when using the Token API.
  223. switch dec.tokenState {
  224. case tokenArrayComma:
  225. c, err := dec.peek()
  226. if err != nil {
  227. return err
  228. }
  229. if c != ',' {
  230. return &SyntaxError{"expected comma after array element", 0}
  231. }
  232. dec.scanp++
  233. dec.tokenState = tokenArrayValue
  234. case tokenObjectColon:
  235. c, err := dec.peek()
  236. if err != nil {
  237. return err
  238. }
  239. if c != ':' {
  240. return &SyntaxError{"expected colon after object key", 0}
  241. }
  242. dec.scanp++
  243. dec.tokenState = tokenObjectValue
  244. }
  245. return nil
  246. }
  247. func (dec *Decoder) tokenValueAllowed() bool {
  248. switch dec.tokenState {
  249. case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
  250. return true
  251. }
  252. return false
  253. }
  254. func (dec *Decoder) tokenValueEnd() {
  255. switch dec.tokenState {
  256. case tokenArrayStart, tokenArrayValue:
  257. dec.tokenState = tokenArrayComma
  258. case tokenObjectValue:
  259. dec.tokenState = tokenObjectComma
  260. }
  261. }
  262. // A Delim is a JSON array or object delimiter, one of [ ] { or }.
  263. type Delim rune
  264. func (d Delim) String() string {
  265. return string(d)
  266. }
  267. // Token returns the next JSON token in the input stream.
  268. // At the end of the input stream, Token returns nil, io.EOF.
  269. //
  270. // Token guarantees that the delimiters [ ] { } it returns are
  271. // properly nested and matched: if Token encounters an unexpected
  272. // delimiter in the input, it will return an error.
  273. //
  274. // The input stream consists of basic JSON values—bool, string,
  275. // number, and null—along with delimiters [ ] { } of type Delim
  276. // to mark the start and end of arrays and objects.
  277. // Commas and colons are elided.
  278. func (dec *Decoder) Token() (Token, error) {
  279. for {
  280. c, err := dec.peek()
  281. if err != nil {
  282. return nil, err
  283. }
  284. switch c {
  285. case '[':
  286. if !dec.tokenValueAllowed() {
  287. return dec.tokenError(c)
  288. }
  289. dec.scanp++
  290. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  291. dec.tokenState = tokenArrayStart
  292. return Delim('['), nil
  293. case ']':
  294. if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
  295. return dec.tokenError(c)
  296. }
  297. dec.scanp++
  298. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  299. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  300. dec.tokenValueEnd()
  301. return Delim(']'), nil
  302. case '{':
  303. if !dec.tokenValueAllowed() {
  304. return dec.tokenError(c)
  305. }
  306. dec.scanp++
  307. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  308. dec.tokenState = tokenObjectStart
  309. return Delim('{'), nil
  310. case '}':
  311. if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
  312. return dec.tokenError(c)
  313. }
  314. dec.scanp++
  315. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  316. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  317. dec.tokenValueEnd()
  318. return Delim('}'), nil
  319. case ':':
  320. if dec.tokenState != tokenObjectColon {
  321. return dec.tokenError(c)
  322. }
  323. dec.scanp++
  324. dec.tokenState = tokenObjectValue
  325. continue
  326. case ',':
  327. if dec.tokenState == tokenArrayComma {
  328. dec.scanp++
  329. dec.tokenState = tokenArrayValue
  330. continue
  331. }
  332. if dec.tokenState == tokenObjectComma {
  333. dec.scanp++
  334. dec.tokenState = tokenObjectKey
  335. continue
  336. }
  337. return dec.tokenError(c)
  338. case '"':
  339. if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
  340. var x string
  341. old := dec.tokenState
  342. dec.tokenState = tokenTopValue
  343. err := dec.Decode(&x)
  344. dec.tokenState = old
  345. if err != nil {
  346. clearOffset(err)
  347. return nil, err
  348. }
  349. dec.tokenState = tokenObjectColon
  350. return x, nil
  351. }
  352. fallthrough
  353. default:
  354. if !dec.tokenValueAllowed() {
  355. return dec.tokenError(c)
  356. }
  357. var x interface{}
  358. if err := dec.Decode(&x); err != nil {
  359. clearOffset(err)
  360. return nil, err
  361. }
  362. return x, nil
  363. }
  364. }
  365. }
  366. func clearOffset(err error) {
  367. if s, ok := err.(*SyntaxError); ok {
  368. s.Offset = 0
  369. }
  370. }
  371. func (dec *Decoder) tokenError(c byte) (Token, error) {
  372. var context string
  373. switch dec.tokenState {
  374. case tokenTopValue:
  375. context = " looking for beginning of value"
  376. case tokenArrayStart, tokenArrayValue, tokenObjectValue:
  377. context = " looking for beginning of value"
  378. case tokenArrayComma:
  379. context = " after array element"
  380. case tokenObjectKey:
  381. context = " looking for beginning of object key string"
  382. case tokenObjectColon:
  383. context = " after object key"
  384. case tokenObjectComma:
  385. context = " after object key:value pair"
  386. }
  387. return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
  388. }
  389. // More reports whether there is another element in the
  390. // current array or object being parsed.
  391. func (dec *Decoder) More() bool {
  392. c, err := dec.peek()
  393. return err == nil && c != ']' && c != '}'
  394. }
  395. func (dec *Decoder) peek() (byte, error) {
  396. var err error
  397. for {
  398. for i := dec.scanp; i < len(dec.buf); i++ {
  399. c := dec.buf[i]
  400. if isSpace(c) {
  401. continue
  402. }
  403. dec.scanp = i
  404. return c, nil
  405. }
  406. // buffer has been scanned, now report any error
  407. if err != nil {
  408. return 0, err
  409. }
  410. err = dec.refill()
  411. }
  412. }
  413. /*
  414. TODO
  415. // EncodeToken writes the given JSON token to the stream.
  416. // It returns an error if the delimiters [ ] { } are not properly used.
  417. //
  418. // EncodeToken does not call Flush, because usually it is part of
  419. // a larger operation such as Encode, and those will call Flush when finished.
  420. // Callers that create an Encoder and then invoke EncodeToken directly,
  421. // without using Encode, need to call Flush when finished to ensure that
  422. // the JSON is written to the underlying writer.
  423. func (e *Encoder) EncodeToken(t Token) error {
  424. ...
  425. }
  426. */