stream.go 12 KB


  1. // Copyright 2010 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package json
  5. import (
  6. "bytes"
  7. "errors"
  8. "io"
  9. )
  10. // A Decoder reads and decodes JSON values from an input stream.
  11. type Decoder struct {
  12. r io.Reader
  13. buf []byte
  14. d decodeState
  15. scanp int // start of unread data in buf
  16. scan scanner
  17. err error
  18. tokenState int
  19. tokenStack []int
  20. }
  21. // NewDecoder returns a new decoder that reads from r.
  22. //
  23. // The decoder introduces its own buffering and may
  24. // read data from r beyond the JSON values requested.
  25. func NewDecoder(r io.Reader) *Decoder {
  26. return &Decoder{r: r}
  27. }
  28. // UseNumber causes the Decoder to unmarshal a number into an interface{} as a
  29. // Number instead of as a float64.
  30. func (dec *Decoder) UseNumber() { dec.d.useNumber = true }
  31. // Decode reads the next JSON-encoded value from its
  32. // input and stores it in the value pointed to by v.
  33. //
  34. // See the documentation for Unmarshal for details about
  35. // the conversion of JSON into a Go value.
  36. func (dec *Decoder) Decode(v interface{}) error {
  37. if dec.err != nil {
  38. return dec.err
  39. }
  40. if err := dec.tokenPrepareForDecode(); err != nil {
  41. return err
  42. }
  43. if !dec.tokenValueAllowed() {
  44. return &SyntaxError{msg: "not at beginning of value"}
  45. }
  46. // Read whole value into buffer.
  47. n, err := dec.readValue()
  48. if err != nil {
  49. return err
  50. }
  51. dec.d.init(dec.buf[dec.scanp : dec.scanp+n])
  52. dec.scanp += n
  53. // Don't save err from unmarshal into dec.err:
  54. // the connection is still usable since we read a complete JSON
  55. // object from it before the error happened.
  56. err = dec.d.unmarshal(v)
  57. // fixup token streaming state
  58. dec.tokenValueEnd()
  59. return err
  60. }
  61. // Buffered returns a reader of the data remaining in the Decoder's
  62. // buffer. The reader is valid until the next call to Decode.
  63. func (dec *Decoder) Buffered() io.Reader {
  64. return bytes.NewReader(dec.buf[dec.scanp:])
  65. }
  66. // readValue reads a JSON value into dec.buf.
  67. // It returns the length of the encoding.
  68. func (dec *Decoder) readValue() (int, error) {
  69. dec.scan.reset()
  70. scanp := dec.scanp
  71. var err error
  72. Input:
  73. for {
  74. // Look in the buffer for a new value.
  75. for i, c := range dec.buf[scanp:] {
  76. dec.scan.bytes++
  77. v := dec.scan.step(&dec.scan, c)
  78. if v == scanEnd {
  79. scanp += i
  80. break Input
  81. }
  82. // scanEnd is delayed one byte.
  83. // We might block trying to get that byte from src,
  84. // so instead invent a space byte.
  85. if (v == scanEndObject || v == scanEndArray) && dec.scan.step(&dec.scan, ' ') == scanEnd {
  86. scanp += i + 1
  87. break Input
  88. }
  89. if v == scanError {
  90. dec.err = dec.scan.err
  91. return 0, dec.scan.err
  92. }
  93. }
  94. scanp = len(dec.buf)
  95. // Did the last read have an error?
  96. // Delayed until now to allow buffer scan.
  97. if err != nil {
  98. if err == io.EOF {
  99. if dec.scan.step(&dec.scan, ' ') == scanEnd {
  100. break Input
  101. }
  102. if nonSpace(dec.buf) {
  103. err = io.ErrUnexpectedEOF
  104. }
  105. }
  106. dec.err = err
  107. return 0, err
  108. }
  109. n := scanp - dec.scanp
  110. err = dec.refill()
  111. scanp = dec.scanp + n
  112. }
  113. return scanp - dec.scanp, nil
  114. }
  115. func (dec *Decoder) refill() error {
  116. // Make room to read more into the buffer.
  117. // First slide down data already consumed.
  118. if dec.scanp > 0 {
  119. n := copy(dec.buf, dec.buf[dec.scanp:])
  120. dec.buf = dec.buf[:n]
  121. dec.scanp = 0
  122. }
  123. // Grow buffer if not large enough.
  124. const minRead = 512
  125. if cap(dec.buf)-len(dec.buf) < minRead {
  126. newBuf := make([]byte, len(dec.buf), 2*cap(dec.buf)+minRead)
  127. copy(newBuf, dec.buf)
  128. dec.buf = newBuf
  129. }
  130. // Read. Delay error for next iteration (after scan).
  131. n, err := dec.r.Read(dec.buf[len(dec.buf):cap(dec.buf)])
  132. dec.buf = dec.buf[0 : len(dec.buf)+n]
  133. return err
  134. }
  135. func nonSpace(b []byte) bool {
  136. for _, c := range b {
  137. if !isSpace(c) {
  138. return true
  139. }
  140. }
  141. return false
  142. }
  143. // An Encoder writes JSON values to an output stream.
  144. type Encoder struct {
  145. w io.Writer
  146. err error
  147. escapeHTML bool
  148. indentBuf *bytes.Buffer
  149. indentPrefix string
  150. indentValue string
  151. ext Extension
  152. }
  153. // NewEncoder returns a new encoder that writes to w.
  154. func NewEncoder(w io.Writer) *Encoder {
  155. return &Encoder{w: w, escapeHTML: true}
  156. }
  157. // Encode writes the JSON encoding of v to the stream,
  158. // followed by a newline character.
  159. //
  160. // See the documentation for Marshal for details about the
  161. // conversion of Go values to JSON.
  162. func (enc *Encoder) Encode(v interface{}) error {
  163. if enc.err != nil {
  164. return enc.err
  165. }
  166. e := newEncodeState()
  167. e.ext = enc.ext
  168. err := e.marshal(v, encOpts{escapeHTML: enc.escapeHTML})
  169. if err != nil {
  170. return err
  171. }
  172. // Terminate each value with a newline.
  173. // This makes the output look a little nicer
  174. // when debugging, and some kind of space
  175. // is required if the encoded value was a number,
  176. // so that the reader knows there aren't more
  177. // digits coming.
  178. e.WriteByte('\n')
  179. b := e.Bytes()
  180. if enc.indentBuf != nil {
  181. enc.indentBuf.Reset()
  182. err = Indent(enc.indentBuf, b, enc.indentPrefix, enc.indentValue)
  183. if err != nil {
  184. return err
  185. }
  186. b = enc.indentBuf.Bytes()
  187. }
  188. if _, err = enc.w.Write(b); err != nil {
  189. enc.err = err
  190. }
  191. encodeStatePool.Put(e)
  192. return err
  193. }
  194. // Indent sets the encoder to format each encoded value with Indent.
  195. func (enc *Encoder) Indent(prefix, indent string) {
  196. enc.indentBuf = new(bytes.Buffer)
  197. enc.indentPrefix = prefix
  198. enc.indentValue = indent
  199. }
  200. // DisableHTMLEscaping causes the encoder not to escape angle brackets
  201. // ("<" and ">") or ampersands ("&") in JSON strings.
  202. func (enc *Encoder) DisableHTMLEscaping() {
  203. enc.escapeHTML = false
  204. }
  205. // RawMessage is a raw encoded JSON value.
  206. // It implements Marshaler and Unmarshaler and can
  207. // be used to delay JSON decoding or precompute a JSON encoding.
  208. type RawMessage []byte
  209. // MarshalJSON returns *m as the JSON encoding of m.
  210. func (m *RawMessage) MarshalJSON() ([]byte, error) {
  211. return *m, nil
  212. }
  213. // UnmarshalJSON sets *m to a copy of data.
  214. func (m *RawMessage) UnmarshalJSON(data []byte) error {
  215. if m == nil {
  216. return errors.New("json.RawMessage: UnmarshalJSON on nil pointer")
  217. }
  218. *m = append((*m)[0:0], data...)
  219. return nil
  220. }
  221. var _ Marshaler = (*RawMessage)(nil)
  222. var _ Unmarshaler = (*RawMessage)(nil)
  223. // A Token holds a value of one of these types:
  224. //
  225. // Delim, for the four JSON delimiters [ ] { }
  226. // bool, for JSON booleans
  227. // float64, for JSON numbers
  228. // Number, for JSON numbers
  229. // string, for JSON string literals
  230. // nil, for JSON null
  231. //
  232. type Token interface{}
  233. const (
  234. tokenTopValue = iota
  235. tokenArrayStart
  236. tokenArrayValue
  237. tokenArrayComma
  238. tokenObjectStart
  239. tokenObjectKey
  240. tokenObjectColon
  241. tokenObjectValue
  242. tokenObjectComma
  243. )
  244. // advance tokenstate from a separator state to a value state
  245. func (dec *Decoder) tokenPrepareForDecode() error {
  246. // Note: Not calling peek before switch, to avoid
  247. // putting peek into the standard Decode path.
  248. // peek is only called when using the Token API.
  249. switch dec.tokenState {
  250. case tokenArrayComma:
  251. c, err := dec.peek()
  252. if err != nil {
  253. return err
  254. }
  255. if c != ',' {
  256. return &SyntaxError{"expected comma after array element", 0}
  257. }
  258. dec.scanp++
  259. dec.tokenState = tokenArrayValue
  260. case tokenObjectColon:
  261. c, err := dec.peek()
  262. if err != nil {
  263. return err
  264. }
  265. if c != ':' {
  266. return &SyntaxError{"expected colon after object key", 0}
  267. }
  268. dec.scanp++
  269. dec.tokenState = tokenObjectValue
  270. }
  271. return nil
  272. }
  273. func (dec *Decoder) tokenValueAllowed() bool {
  274. switch dec.tokenState {
  275. case tokenTopValue, tokenArrayStart, tokenArrayValue, tokenObjectValue:
  276. return true
  277. }
  278. return false
  279. }
  280. func (dec *Decoder) tokenValueEnd() {
  281. switch dec.tokenState {
  282. case tokenArrayStart, tokenArrayValue:
  283. dec.tokenState = tokenArrayComma
  284. case tokenObjectValue:
  285. dec.tokenState = tokenObjectComma
  286. }
  287. }
  288. // A Delim is a JSON array or object delimiter, one of [ ] { or }.
  289. type Delim rune
  290. func (d Delim) String() string {
  291. return string(d)
  292. }
  293. // Token returns the next JSON token in the input stream.
  294. // At the end of the input stream, Token returns nil, io.EOF.
  295. //
  296. // Token guarantees that the delimiters [ ] { } it returns are
  297. // properly nested and matched: if Token encounters an unexpected
  298. // delimiter in the input, it will return an error.
  299. //
  300. // The input stream consists of basic JSON values—bool, string,
  301. // number, and null—along with delimiters [ ] { } of type Delim
  302. // to mark the start and end of arrays and objects.
  303. // Commas and colons are elided.
  304. func (dec *Decoder) Token() (Token, error) {
  305. for {
  306. c, err := dec.peek()
  307. if err != nil {
  308. return nil, err
  309. }
  310. switch c {
  311. case '[':
  312. if !dec.tokenValueAllowed() {
  313. return dec.tokenError(c)
  314. }
  315. dec.scanp++
  316. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  317. dec.tokenState = tokenArrayStart
  318. return Delim('['), nil
  319. case ']':
  320. if dec.tokenState != tokenArrayStart && dec.tokenState != tokenArrayComma {
  321. return dec.tokenError(c)
  322. }
  323. dec.scanp++
  324. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  325. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  326. dec.tokenValueEnd()
  327. return Delim(']'), nil
  328. case '{':
  329. if !dec.tokenValueAllowed() {
  330. return dec.tokenError(c)
  331. }
  332. dec.scanp++
  333. dec.tokenStack = append(dec.tokenStack, dec.tokenState)
  334. dec.tokenState = tokenObjectStart
  335. return Delim('{'), nil
  336. case '}':
  337. if dec.tokenState != tokenObjectStart && dec.tokenState != tokenObjectComma {
  338. return dec.tokenError(c)
  339. }
  340. dec.scanp++
  341. dec.tokenState = dec.tokenStack[len(dec.tokenStack)-1]
  342. dec.tokenStack = dec.tokenStack[:len(dec.tokenStack)-1]
  343. dec.tokenValueEnd()
  344. return Delim('}'), nil
  345. case ':':
  346. if dec.tokenState != tokenObjectColon {
  347. return dec.tokenError(c)
  348. }
  349. dec.scanp++
  350. dec.tokenState = tokenObjectValue
  351. continue
  352. case ',':
  353. if dec.tokenState == tokenArrayComma {
  354. dec.scanp++
  355. dec.tokenState = tokenArrayValue
  356. continue
  357. }
  358. if dec.tokenState == tokenObjectComma {
  359. dec.scanp++
  360. dec.tokenState = tokenObjectKey
  361. continue
  362. }
  363. return dec.tokenError(c)
  364. case '"':
  365. if dec.tokenState == tokenObjectStart || dec.tokenState == tokenObjectKey {
  366. var x string
  367. old := dec.tokenState
  368. dec.tokenState = tokenTopValue
  369. err := dec.Decode(&x)
  370. dec.tokenState = old
  371. if err != nil {
  372. clearOffset(err)
  373. return nil, err
  374. }
  375. dec.tokenState = tokenObjectColon
  376. return x, nil
  377. }
  378. fallthrough
  379. default:
  380. if !dec.tokenValueAllowed() {
  381. return dec.tokenError(c)
  382. }
  383. var x interface{}
  384. if err := dec.Decode(&x); err != nil {
  385. clearOffset(err)
  386. return nil, err
  387. }
  388. return x, nil
  389. }
  390. }
  391. }
  392. func clearOffset(err error) {
  393. if s, ok := err.(*SyntaxError); ok {
  394. s.Offset = 0
  395. }
  396. }
  397. func (dec *Decoder) tokenError(c byte) (Token, error) {
  398. var context string
  399. switch dec.tokenState {
  400. case tokenTopValue:
  401. context = " looking for beginning of value"
  402. case tokenArrayStart, tokenArrayValue, tokenObjectValue:
  403. context = " looking for beginning of value"
  404. case tokenArrayComma:
  405. context = " after array element"
  406. case tokenObjectKey:
  407. context = " looking for beginning of object key string"
  408. case tokenObjectColon:
  409. context = " after object key"
  410. case tokenObjectComma:
  411. context = " after object key:value pair"
  412. }
  413. return nil, &SyntaxError{"invalid character " + quoteChar(c) + " " + context, 0}
  414. }
  415. // More reports whether there is another element in the
  416. // current array or object being parsed.
  417. func (dec *Decoder) More() bool {
  418. c, err := dec.peek()
  419. return err == nil && c != ']' && c != '}'
  420. }
  421. func (dec *Decoder) peek() (byte, error) {
  422. var err error
  423. for {
  424. for i := dec.scanp; i < len(dec.buf); i++ {
  425. c := dec.buf[i]
  426. if isSpace(c) {
  427. continue
  428. }
  429. dec.scanp = i
  430. return c, nil
  431. }
  432. // buffer has been scanned, now report any error
  433. if err != nil {
  434. return 0, err
  435. }
  436. err = dec.refill()
  437. }
  438. }
  439. /*
  440. TODO
  441. // EncodeToken writes the given JSON token to the stream.
  442. // It returns an error if the delimiters [ ] { } are not properly used.
  443. //
  444. // EncodeToken does not call Flush, because usually it is part of
  445. // a larger operation such as Encode, and those will call Flush when finished.
  446. // Callers that create an Encoder and then invoke EncodeToken directly,
  447. // without using Encode, need to call Flush when finished to ensure that
  448. // the JSON is written to the underlying writer.
  449. func (e *Encoder) EncodeToken(t Token) error {
  450. ...
  451. }
  452. */