extjson_parser.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732
  1. // Copyright (C) MongoDB, Inc. 2017-present.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License"); you may
  4. // not use this file except in compliance with the License. You may obtain
  5. // a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
  6. package bsonrw
  7. import (
  8. "errors"
  9. "fmt"
  10. "io"
  11. "go.mongodb.org/mongo-driver/bson/bsontype"
  12. )
  13. const maxNestingDepth = 200
  14. // ErrInvalidJSON indicates the JSON input is invalid
  15. var ErrInvalidJSON = errors.New("invalid JSON input")
  16. type jsonParseState byte
  17. const (
  18. jpsStartState jsonParseState = iota
  19. jpsSawBeginObject
  20. jpsSawEndObject
  21. jpsSawBeginArray
  22. jpsSawEndArray
  23. jpsSawColon
  24. jpsSawComma
  25. jpsSawKey
  26. jpsSawValue
  27. jpsDoneState
  28. jpsInvalidState
  29. )
  30. type jsonParseMode byte
  31. const (
  32. jpmInvalidMode jsonParseMode = iota
  33. jpmObjectMode
  34. jpmArrayMode
  35. )
  36. type extJSONValue struct {
  37. t bsontype.Type
  38. v interface{}
  39. }
  40. type extJSONObject struct {
  41. keys []string
  42. values []*extJSONValue
  43. }
  44. type extJSONParser struct {
  45. js *jsonScanner
  46. s jsonParseState
  47. m []jsonParseMode
  48. k string
  49. v *extJSONValue
  50. err error
  51. canonical bool
  52. depth int
  53. maxDepth int
  54. emptyObject bool
  55. }
  56. // newExtJSONParser returns a new extended JSON parser, ready to to begin
  57. // parsing from the first character of the argued json input. It will not
  58. // perform any read-ahead and will therefore not report any errors about
  59. // malformed JSON at this point.
  60. func newExtJSONParser(r io.Reader, canonical bool) *extJSONParser {
  61. return &extJSONParser{
  62. js: &jsonScanner{r: r},
  63. s: jpsStartState,
  64. m: []jsonParseMode{},
  65. canonical: canonical,
  66. maxDepth: maxNestingDepth,
  67. }
  68. }
  69. // peekType examines the next value and returns its BSON Type
  70. func (ejp *extJSONParser) peekType() (bsontype.Type, error) {
  71. var t bsontype.Type
  72. var err error
  73. ejp.advanceState()
  74. switch ejp.s {
  75. case jpsSawValue:
  76. t = ejp.v.t
  77. case jpsSawBeginArray:
  78. t = bsontype.Array
  79. case jpsInvalidState:
  80. err = ejp.err
  81. case jpsSawComma:
  82. // in array mode, seeing a comma means we need to progress again to actually observe a type
  83. if ejp.peekMode() == jpmArrayMode {
  84. return ejp.peekType()
  85. }
  86. case jpsSawEndArray:
  87. // this would only be a valid state if we were in array mode, so return end-of-array error
  88. err = ErrEOA
  89. case jpsSawBeginObject:
  90. // peek key to determine type
  91. ejp.advanceState()
  92. switch ejp.s {
  93. case jpsSawEndObject: // empty embedded document
  94. t = bsontype.EmbeddedDocument
  95. ejp.emptyObject = true
  96. case jpsInvalidState:
  97. err = ejp.err
  98. case jpsSawKey:
  99. t = wrapperKeyBSONType(ejp.k)
  100. if t == bsontype.JavaScript {
  101. // just saw $code, need to check for $scope at same level
  102. _, err := ejp.readValue(bsontype.JavaScript)
  103. if err != nil {
  104. break
  105. }
  106. switch ejp.s {
  107. case jpsSawEndObject: // type is TypeJavaScript
  108. case jpsSawComma:
  109. ejp.advanceState()
  110. if ejp.s == jpsSawKey && ejp.k == "$scope" {
  111. t = bsontype.CodeWithScope
  112. } else {
  113. err = fmt.Errorf("invalid extended JSON: unexpected key %s in CodeWithScope object", ejp.k)
  114. }
  115. case jpsInvalidState:
  116. err = ejp.err
  117. default:
  118. err = ErrInvalidJSON
  119. }
  120. }
  121. }
  122. }
  123. return t, err
  124. }
  125. // readKey parses the next key and its type and returns them
  126. func (ejp *extJSONParser) readKey() (string, bsontype.Type, error) {
  127. if ejp.emptyObject {
  128. ejp.emptyObject = false
  129. return "", 0, ErrEOD
  130. }
  131. // advance to key (or return with error)
  132. switch ejp.s {
  133. case jpsStartState:
  134. ejp.advanceState()
  135. if ejp.s == jpsSawBeginObject {
  136. ejp.advanceState()
  137. }
  138. case jpsSawBeginObject:
  139. ejp.advanceState()
  140. case jpsSawValue, jpsSawEndObject, jpsSawEndArray:
  141. ejp.advanceState()
  142. switch ejp.s {
  143. case jpsSawBeginObject, jpsSawComma:
  144. ejp.advanceState()
  145. case jpsSawEndObject:
  146. return "", 0, ErrEOD
  147. case jpsDoneState:
  148. return "", 0, io.EOF
  149. case jpsInvalidState:
  150. return "", 0, ejp.err
  151. default:
  152. return "", 0, ErrInvalidJSON
  153. }
  154. case jpsSawKey: // do nothing (key was peeked before)
  155. default:
  156. return "", 0, invalidRequestError("key")
  157. }
  158. // read key
  159. var key string
  160. switch ejp.s {
  161. case jpsSawKey:
  162. key = ejp.k
  163. case jpsSawEndObject:
  164. return "", 0, ErrEOD
  165. case jpsInvalidState:
  166. return "", 0, ejp.err
  167. default:
  168. return "", 0, invalidRequestError("key")
  169. }
  170. // check for colon
  171. ejp.advanceState()
  172. if err := ensureColon(ejp.s, key); err != nil {
  173. return "", 0, err
  174. }
  175. // peek at the value to determine type
  176. t, err := ejp.peekType()
  177. if err != nil {
  178. return "", 0, err
  179. }
  180. return key, t, nil
  181. }
  182. // readValue returns the value corresponding to the Type returned by peekType
  183. func (ejp *extJSONParser) readValue(t bsontype.Type) (*extJSONValue, error) {
  184. if ejp.s == jpsInvalidState {
  185. return nil, ejp.err
  186. }
  187. var v *extJSONValue
  188. switch t {
  189. case bsontype.Null, bsontype.Boolean, bsontype.String:
  190. if ejp.s != jpsSawValue {
  191. return nil, invalidRequestError(t.String())
  192. }
  193. v = ejp.v
  194. case bsontype.Int32, bsontype.Int64, bsontype.Double:
  195. // relaxed version allows these to be literal number values
  196. if ejp.s == jpsSawValue {
  197. v = ejp.v
  198. break
  199. }
  200. fallthrough
  201. case bsontype.Decimal128, bsontype.Symbol, bsontype.ObjectID, bsontype.MinKey, bsontype.MaxKey, bsontype.Undefined:
  202. switch ejp.s {
  203. case jpsSawKey:
  204. // read colon
  205. ejp.advanceState()
  206. if err := ensureColon(ejp.s, ejp.k); err != nil {
  207. return nil, err
  208. }
  209. // read value
  210. ejp.advanceState()
  211. if ejp.s != jpsSawValue || !ejp.ensureExtValueType(t) {
  212. return nil, invalidJSONErrorForType("value", t)
  213. }
  214. v = ejp.v
  215. // read end object
  216. ejp.advanceState()
  217. if ejp.s != jpsSawEndObject {
  218. return nil, invalidJSONErrorForType("} after value", t)
  219. }
  220. default:
  221. return nil, invalidRequestError(t.String())
  222. }
  223. case bsontype.Binary, bsontype.Regex, bsontype.Timestamp, bsontype.DBPointer:
  224. if ejp.s != jpsSawKey {
  225. return nil, invalidRequestError(t.String())
  226. }
  227. // read colon
  228. ejp.advanceState()
  229. if err := ensureColon(ejp.s, ejp.k); err != nil {
  230. return nil, err
  231. }
  232. ejp.advanceState()
  233. if t == bsontype.Binary && ejp.s == jpsSawValue {
  234. // convert legacy $binary format
  235. base64 := ejp.v
  236. ejp.advanceState()
  237. if ejp.s != jpsSawComma {
  238. return nil, invalidJSONErrorForType(",", bsontype.Binary)
  239. }
  240. ejp.advanceState()
  241. key, t, err := ejp.readKey()
  242. if err != nil {
  243. return nil, err
  244. }
  245. if key != "$type" {
  246. return nil, invalidJSONErrorForType("$type", bsontype.Binary)
  247. }
  248. subType, err := ejp.readValue(t)
  249. if err != nil {
  250. return nil, err
  251. }
  252. ejp.advanceState()
  253. if ejp.s != jpsSawEndObject {
  254. return nil, invalidJSONErrorForType("2 key-value pairs and then }", bsontype.Binary)
  255. }
  256. v = &extJSONValue{
  257. t: bsontype.EmbeddedDocument,
  258. v: &extJSONObject{
  259. keys: []string{"base64", "subType"},
  260. values: []*extJSONValue{base64, subType},
  261. },
  262. }
  263. break
  264. }
  265. // read KV pairs
  266. if ejp.s != jpsSawBeginObject {
  267. return nil, invalidJSONErrorForType("{", t)
  268. }
  269. keys, vals, err := ejp.readObject(2, true)
  270. if err != nil {
  271. return nil, err
  272. }
  273. ejp.advanceState()
  274. if ejp.s != jpsSawEndObject {
  275. return nil, invalidJSONErrorForType("2 key-value pairs and then }", t)
  276. }
  277. v = &extJSONValue{t: bsontype.EmbeddedDocument, v: &extJSONObject{keys: keys, values: vals}}
  278. case bsontype.DateTime:
  279. switch ejp.s {
  280. case jpsSawValue:
  281. v = ejp.v
  282. case jpsSawKey:
  283. // read colon
  284. ejp.advanceState()
  285. if err := ensureColon(ejp.s, ejp.k); err != nil {
  286. return nil, err
  287. }
  288. ejp.advanceState()
  289. switch ejp.s {
  290. case jpsSawBeginObject:
  291. keys, vals, err := ejp.readObject(1, true)
  292. if err != nil {
  293. return nil, err
  294. }
  295. v = &extJSONValue{t: bsontype.EmbeddedDocument, v: &extJSONObject{keys: keys, values: vals}}
  296. case jpsSawValue:
  297. if ejp.canonical {
  298. return nil, invalidJSONError("{")
  299. }
  300. v = ejp.v
  301. default:
  302. if ejp.canonical {
  303. return nil, invalidJSONErrorForType("object", t)
  304. }
  305. return nil, invalidJSONErrorForType("ISO-8601 Internet Date/Time Format as decribed in RFC-3339", t)
  306. }
  307. ejp.advanceState()
  308. if ejp.s != jpsSawEndObject {
  309. return nil, invalidJSONErrorForType("value and then }", t)
  310. }
  311. default:
  312. return nil, invalidRequestError(t.String())
  313. }
  314. case bsontype.JavaScript:
  315. switch ejp.s {
  316. case jpsSawKey:
  317. // read colon
  318. ejp.advanceState()
  319. if err := ensureColon(ejp.s, ejp.k); err != nil {
  320. return nil, err
  321. }
  322. // read value
  323. ejp.advanceState()
  324. if ejp.s != jpsSawValue {
  325. return nil, invalidJSONErrorForType("value", t)
  326. }
  327. v = ejp.v
  328. // read end object or comma and just return
  329. ejp.advanceState()
  330. case jpsSawEndObject:
  331. v = ejp.v
  332. default:
  333. return nil, invalidRequestError(t.String())
  334. }
  335. case bsontype.CodeWithScope:
  336. if ejp.s == jpsSawKey && ejp.k == "$scope" {
  337. v = ejp.v // this is the $code string from earlier
  338. // read colon
  339. ejp.advanceState()
  340. if err := ensureColon(ejp.s, ejp.k); err != nil {
  341. return nil, err
  342. }
  343. // read {
  344. ejp.advanceState()
  345. if ejp.s != jpsSawBeginObject {
  346. return nil, invalidJSONError("$scope to be embedded document")
  347. }
  348. } else {
  349. return nil, invalidRequestError(t.String())
  350. }
  351. case bsontype.EmbeddedDocument, bsontype.Array:
  352. return nil, invalidRequestError(t.String())
  353. }
  354. return v, nil
  355. }
  356. // readObject is a utility method for reading full objects of known (or expected) size
  357. // it is useful for extended JSON types such as binary, datetime, regex, and timestamp
  358. func (ejp *extJSONParser) readObject(numKeys int, started bool) ([]string, []*extJSONValue, error) {
  359. keys := make([]string, numKeys)
  360. vals := make([]*extJSONValue, numKeys)
  361. if !started {
  362. ejp.advanceState()
  363. if ejp.s != jpsSawBeginObject {
  364. return nil, nil, invalidJSONError("{")
  365. }
  366. }
  367. for i := 0; i < numKeys; i++ {
  368. key, t, err := ejp.readKey()
  369. if err != nil {
  370. return nil, nil, err
  371. }
  372. switch ejp.s {
  373. case jpsSawKey:
  374. v, err := ejp.readValue(t)
  375. if err != nil {
  376. return nil, nil, err
  377. }
  378. keys[i] = key
  379. vals[i] = v
  380. case jpsSawValue:
  381. keys[i] = key
  382. vals[i] = ejp.v
  383. default:
  384. return nil, nil, invalidJSONError("value")
  385. }
  386. }
  387. ejp.advanceState()
  388. if ejp.s != jpsSawEndObject {
  389. return nil, nil, invalidJSONError("}")
  390. }
  391. return keys, vals, nil
  392. }
  393. // advanceState reads the next JSON token from the scanner and transitions
  394. // from the current state based on that token's type
  395. func (ejp *extJSONParser) advanceState() {
  396. if ejp.s == jpsDoneState || ejp.s == jpsInvalidState {
  397. return
  398. }
  399. jt, err := ejp.js.nextToken()
  400. if err != nil {
  401. ejp.err = err
  402. ejp.s = jpsInvalidState
  403. return
  404. }
  405. valid := ejp.validateToken(jt.t)
  406. if !valid {
  407. ejp.err = unexpectedTokenError(jt)
  408. ejp.s = jpsInvalidState
  409. return
  410. }
  411. switch jt.t {
  412. case jttBeginObject:
  413. ejp.s = jpsSawBeginObject
  414. ejp.pushMode(jpmObjectMode)
  415. ejp.depth++
  416. if ejp.depth > ejp.maxDepth {
  417. ejp.err = nestingDepthError(jt.p, ejp.depth)
  418. ejp.s = jpsInvalidState
  419. }
  420. case jttEndObject:
  421. ejp.s = jpsSawEndObject
  422. ejp.depth--
  423. if ejp.popMode() != jpmObjectMode {
  424. ejp.err = unexpectedTokenError(jt)
  425. ejp.s = jpsInvalidState
  426. }
  427. case jttBeginArray:
  428. ejp.s = jpsSawBeginArray
  429. ejp.pushMode(jpmArrayMode)
  430. case jttEndArray:
  431. ejp.s = jpsSawEndArray
  432. if ejp.popMode() != jpmArrayMode {
  433. ejp.err = unexpectedTokenError(jt)
  434. ejp.s = jpsInvalidState
  435. }
  436. case jttColon:
  437. ejp.s = jpsSawColon
  438. case jttComma:
  439. ejp.s = jpsSawComma
  440. case jttEOF:
  441. ejp.s = jpsDoneState
  442. if len(ejp.m) != 0 {
  443. ejp.err = unexpectedTokenError(jt)
  444. ejp.s = jpsInvalidState
  445. }
  446. case jttString:
  447. switch ejp.s {
  448. case jpsSawComma:
  449. if ejp.peekMode() == jpmArrayMode {
  450. ejp.s = jpsSawValue
  451. ejp.v = extendJSONToken(jt)
  452. return
  453. }
  454. fallthrough
  455. case jpsSawBeginObject:
  456. ejp.s = jpsSawKey
  457. ejp.k = jt.v.(string)
  458. return
  459. }
  460. fallthrough
  461. default:
  462. ejp.s = jpsSawValue
  463. ejp.v = extendJSONToken(jt)
  464. }
  465. }
  466. var jpsValidTransitionTokens = map[jsonParseState]map[jsonTokenType]bool{
  467. jpsStartState: {
  468. jttBeginObject: true,
  469. jttBeginArray: true,
  470. jttInt32: true,
  471. jttInt64: true,
  472. jttDouble: true,
  473. jttString: true,
  474. jttBool: true,
  475. jttNull: true,
  476. jttEOF: true,
  477. },
  478. jpsSawBeginObject: {
  479. jttEndObject: true,
  480. jttString: true,
  481. },
  482. jpsSawEndObject: {
  483. jttEndObject: true,
  484. jttEndArray: true,
  485. jttComma: true,
  486. jttEOF: true,
  487. },
  488. jpsSawBeginArray: {
  489. jttBeginObject: true,
  490. jttBeginArray: true,
  491. jttEndArray: true,
  492. jttInt32: true,
  493. jttInt64: true,
  494. jttDouble: true,
  495. jttString: true,
  496. jttBool: true,
  497. jttNull: true,
  498. },
  499. jpsSawEndArray: {
  500. jttEndObject: true,
  501. jttEndArray: true,
  502. jttComma: true,
  503. jttEOF: true,
  504. },
  505. jpsSawColon: {
  506. jttBeginObject: true,
  507. jttBeginArray: true,
  508. jttInt32: true,
  509. jttInt64: true,
  510. jttDouble: true,
  511. jttString: true,
  512. jttBool: true,
  513. jttNull: true,
  514. },
  515. jpsSawComma: {
  516. jttBeginObject: true,
  517. jttBeginArray: true,
  518. jttInt32: true,
  519. jttInt64: true,
  520. jttDouble: true,
  521. jttString: true,
  522. jttBool: true,
  523. jttNull: true,
  524. },
  525. jpsSawKey: {
  526. jttColon: true,
  527. },
  528. jpsSawValue: {
  529. jttEndObject: true,
  530. jttEndArray: true,
  531. jttComma: true,
  532. jttEOF: true,
  533. },
  534. jpsDoneState: {},
  535. jpsInvalidState: {},
  536. }
  537. func (ejp *extJSONParser) validateToken(jtt jsonTokenType) bool {
  538. switch ejp.s {
  539. case jpsSawEndObject:
  540. // if we are at depth zero and the next token is a '{',
  541. // we can consider it valid only if we are not in array mode.
  542. if jtt == jttBeginObject && ejp.depth == 0 {
  543. return ejp.peekMode() != jpmArrayMode
  544. }
  545. case jpsSawComma:
  546. switch ejp.peekMode() {
  547. // the only valid next token after a comma inside a document is a string (a key)
  548. case jpmObjectMode:
  549. return jtt == jttString
  550. case jpmInvalidMode:
  551. return false
  552. }
  553. }
  554. _, ok := jpsValidTransitionTokens[ejp.s][jtt]
  555. return ok
  556. }
  557. // ensureExtValueType returns true if the current value has the expected
  558. // value type for single-key extended JSON types. For example,
  559. // {"$numberInt": v} v must be TypeString
  560. func (ejp *extJSONParser) ensureExtValueType(t bsontype.Type) bool {
  561. switch t {
  562. case bsontype.MinKey, bsontype.MaxKey:
  563. return ejp.v.t == bsontype.Int32
  564. case bsontype.Undefined:
  565. return ejp.v.t == bsontype.Boolean
  566. case bsontype.Int32, bsontype.Int64, bsontype.Double, bsontype.Decimal128, bsontype.Symbol, bsontype.ObjectID:
  567. return ejp.v.t == bsontype.String
  568. default:
  569. return false
  570. }
  571. }
  572. func (ejp *extJSONParser) pushMode(m jsonParseMode) {
  573. ejp.m = append(ejp.m, m)
  574. }
  575. func (ejp *extJSONParser) popMode() jsonParseMode {
  576. l := len(ejp.m)
  577. if l == 0 {
  578. return jpmInvalidMode
  579. }
  580. m := ejp.m[l-1]
  581. ejp.m = ejp.m[:l-1]
  582. return m
  583. }
  584. func (ejp *extJSONParser) peekMode() jsonParseMode {
  585. l := len(ejp.m)
  586. if l == 0 {
  587. return jpmInvalidMode
  588. }
  589. return ejp.m[l-1]
  590. }
  591. func extendJSONToken(jt *jsonToken) *extJSONValue {
  592. var t bsontype.Type
  593. switch jt.t {
  594. case jttInt32:
  595. t = bsontype.Int32
  596. case jttInt64:
  597. t = bsontype.Int64
  598. case jttDouble:
  599. t = bsontype.Double
  600. case jttString:
  601. t = bsontype.String
  602. case jttBool:
  603. t = bsontype.Boolean
  604. case jttNull:
  605. t = bsontype.Null
  606. default:
  607. return nil
  608. }
  609. return &extJSONValue{t: t, v: jt.v}
  610. }
  611. func ensureColon(s jsonParseState, key string) error {
  612. if s != jpsSawColon {
  613. return fmt.Errorf("invalid JSON input: missing colon after key \"%s\"", key)
  614. }
  615. return nil
  616. }
  617. func invalidRequestError(s string) error {
  618. return fmt.Errorf("invalid request to read %s", s)
  619. }
  620. func invalidJSONError(expected string) error {
  621. return fmt.Errorf("invalid JSON input; expected %s", expected)
  622. }
  623. func invalidJSONErrorForType(expected string, t bsontype.Type) error {
  624. return fmt.Errorf("invalid JSON input; expected %s for %s", expected, t)
  625. }
  626. func unexpectedTokenError(jt *jsonToken) error {
  627. switch jt.t {
  628. case jttInt32, jttInt64, jttDouble:
  629. return fmt.Errorf("invalid JSON input; unexpected number (%v) at position %d", jt.v, jt.p)
  630. case jttString:
  631. return fmt.Errorf("invalid JSON input; unexpected string (\"%v\") at position %d", jt.v, jt.p)
  632. case jttBool:
  633. return fmt.Errorf("invalid JSON input; unexpected boolean literal (%v) at position %d", jt.v, jt.p)
  634. case jttNull:
  635. return fmt.Errorf("invalid JSON input; unexpected null literal at position %d", jt.p)
  636. case jttEOF:
  637. return fmt.Errorf("invalid JSON input; unexpected end of input at position %d", jt.p)
  638. default:
  639. return fmt.Errorf("invalid JSON input; unexpected %c at position %d", jt.v.(byte), jt.p)
  640. }
  641. }
  642. func nestingDepthError(p, depth int) error {
  643. return fmt.Errorf("invalid JSON input; nesting too deep (%d levels) at position %d", depth, p)
  644. }