Skip to content

Commit

Permalink
feat!: switch to cborg & json backend, new strictness
Browse files Browse the repository at this point in the history
  • Loading branch information
rvagg committed Jan 21, 2021
1 parent b067f75 commit cbd57f4
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 72 deletions.
172 changes: 130 additions & 42 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,54 +1,142 @@
import json from 'fast-json-stable-stringify'
import isCircular from '@ipld/is-circular'
import transform from 'lodash.transform'
import { bytes, CID } from 'multiformats'
import { CID } from 'multiformats'
import { base64 } from 'multiformats/bases/base64'
import { Token, Type } from 'cborg'
import * as cborgJson from 'cborg/json'

const _encode = (obj) => transform(obj, (result, value, key) => {
const cid = CID.asCID(value)
if (cid) {
result[key] = { '/': cid.toString() }
} else if (bytes.isBinary(value)) {
value = bytes.coerce(value)
result[key] = { '/': { bytes: base64.encode(value) } }
} else if (typeof value === 'object' && value !== null) {
result[key] = _encode(value)
} else {
result[key] = value
}
})

const encode = (obj) => {
if (typeof obj === 'object' && !bytes.isBinary(obj) && !CID.asCID(obj) && obj) {
if (isCircular(obj, { asCID: true })) {
throw new Error('Object contains circular references')
}
obj = _encode(obj)
function cidEncoder (obj) {
if (obj.asCID !== obj) {
return null // any other kind of object
}
const cid = CID.asCID(obj)
/* c8 ignore next 4 */
// very unlikely case, and it'll probably throw a recursion error in cborg
if (!cid) {
return null
}
const cidString = cid.toString()

return [
new Token(Type.map, Infinity, 1),
new Token(Type.string, '/', 1), // key
new Token(Type.string, cidString, cidString.length), // value
new Token(Type.break, undefined, 1)
]
}

function bytesEncoder (bytes) {
const bytesString = base64.encode(bytes)
return [
new Token(Type.map, Infinity, 1),
new Token(Type.string, '/', 1), // key
new Token(Type.map, Infinity, 1), // value
new Token(Type.string, 'bytes', 5), // inner key
new Token(Type.string, bytesString, bytesString.length), // inner value
new Token(Type.break, undefined, 1),
new Token(Type.break, undefined, 1)
]
}

function undefinedEncoder () {
throw new Error('`undefined` is not supported by the IPLD Data Model and cannot be encoded')
}

function numberEncoder (num) {
if (Number.isNaN(num)) {
throw new Error('`NaN` is not supported by the IPLD Data Model and cannot be encoded')
}
if (num === Infinity || num === -Infinity) {
throw new Error('`Infinity` and `-Infinity` is not supported by the IPLD Data Model and cannot be encoded')
}
}

const encodeOptions = {
typeEncoders: {
Object: cidEncoder,
Uint8Array: bytesEncoder, // TODO: all the typedarrays
Buffer: bytesEncoder, // TODO: all the typedarrays
undefined: undefinedEncoder,
number: numberEncoder
}
return bytes.fromString(json(obj))
}

const _decode = (obj) => transform(obj, (result, value, key) => {
if (typeof value === 'object' && value !== null) {
if (value['/']) {
if (typeof value['/'] === 'string') {
result[key] = CID.parse(value['/'])
} else if (typeof value['/'] === 'object' && value['/'].bytes) {
result[key] = base64.decode(value['/'].bytes)
} else {
result[key] = _decode(value)
function encode (obj) {
return cborgJson.encode(obj, encodeOptions)
}

class DagJsonTokenizer extends cborgJson.Tokenizer {
constructor (data, options) {
super(data, options)
this.tokenBuffer = []
}

done () {
return this.tokenBuffer.length === 0 && super.done()
}

_next () {
return this.tokenBuffer.length ? this.tokenBuffer.pop() : super.next()
}

next () {
const token = this._next()

if (token.type === Type.map) {
const keyToken = this._next()
if (keyToken.type === Type.string && keyToken.value === '/') {
const valueToken = this._next()
if (valueToken.type === Type.string) { // *must* be a CID
const breakToken = this._next() // swallow the end-of-map token
if (breakToken.type !== Type.break) {
throw new Error('Invalid encoded CID form')
}
this.tokenBuffer.push(valueToken) // CID.parse will pick this up after our tag token
return new Token(Type.tag, 42, 0)
}
if (valueToken.type === Type.map) {
const innerKeyToken = this._next()
if (innerKeyToken.type === Type.string && innerKeyToken.value === 'bytes') {
const innerValueToken = this._next()
if (innerValueToken.type === Type.string) { // *must* be Bytes
for (let i = 0; i < 2; i++) {
const breakToken = this._next() // swallow two end-of-map tokens
if (breakToken.type !== Type.break) {
throw new Error('Invalid encoded Bytes form')
}
}
const bytes = base64.decode(innerValueToken.value)
return new Token(Type.bytes, bytes, innerValueToken.value.length)
}
this.tokenBuffer.push(innerValueToken) // bail
}
this.tokenBuffer.push(innerKeyToken) // bail
}
this.tokenBuffer.push(valueToken) // bail
}
} else {
result[key] = _decode(value)
this.tokenBuffer.push(keyToken) // bail
}
} else {
result[key] = value
return token
}
})
}

const decodeOptions = {
allowIndefinite: false,
allowUndefined: false,
allowNaN: false,
allowInfinity: false,
allowBigInt: true, // this will lead to BigInt for ints outside of
// safe-integer range, which may surprise users
strict: true,
useMaps: false,
tags: []
}

// we're going to get TAG(42)STRING("bafy...") from the tokenizer so we only need
// to deal with the STRING("bafy...") at this point
decodeOptions.tags[42] = CID.parse

const decode = (buffer) => {
const obj = JSON.parse(bytes.toString(buffer))
return _decode({ value: obj }).value
function decode (byts) {
const options = Object.assign(decodeOptions, { tokenizer: new DagJsonTokenizer(byts) })
return cborgJson.decode(byts, options)
}

const name = 'dag-json'
Expand Down
13 changes: 6 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,14 @@
},
"homepage": "https://github.com/mikeal/dag-json#readme",
"devDependencies": {
"hundreds": "0.0.8",
"mocha": "^8.1.1",
"chai": "^4.2.0",
"hundreds": "^0.0.9",
"mocha": "^8.2.1",
"polendina": "^1.1.0",
"standard": "^14.3.4"
"standard": "^16.0.3"
},
"dependencies": {
"@ipld/is-circular": "^2.0.0",
"fast-json-stable-stringify": "^2.1.0",
"lodash.transform": "^4.6.0",
"multiformats": "^4.0.0"
"ipld-garbage": "^2.0.0",
"multiformats": "^4.4.3"
}
}
110 changes: 87 additions & 23 deletions test/test-basics.js
Original file line number Diff line number Diff line change
@@ -1,41 +1,39 @@
'use strict'
/* globals describe, it */
import assert from 'assert'
/* eslint-env mocha */
import garbage from 'ipld-garbage'
import chai from 'chai'
import { encode, decode } from '@ipld/dag-json'
import { bytes, CID } from 'multiformats'

const { assert } = chai
const same = assert.deepStrictEqual
const test = it

const recode = buffer => encode(decode(buffer))
const recode = byts => encode(decode(byts))

const link = CID.parse('bafyreifepiu23okq5zuyvyhsoiazv2icw2van3s7ko6d3ixl5jx2yj2yhu')

describe('basic dag-json', () => {
test('encode decode', () => {
let buffer = encode({ hello: 'world' })
same(JSON.parse(bytes.toString(recode(buffer))), { hello: 'world' })
const o = { link, buffer: bytes.fromString('asdf'), n: null, o: {} }
buffer = encode(o)
same(decode(buffer), o)
same(bytes.isBinary(decode(buffer).buffer), true)
})

test('circular failure', () => {
const o1 = { hello: 'world' }
const o2 = { o1 }
o1.o2 = o2
try {
encode(o2)
assert.ok(false)
} catch (e) {
same(e.message, 'Object contains circular references')
}
let byts = encode({ hello: 'world' })
same(JSON.parse(bytes.toString(recode(byts))), { hello: 'world' })
const o = { link, byts: bytes.fromString('asdf'), n: null, o: {} }
byts = encode(o)
same(decode(byts), o)
same(bytes.isBinary(decode(byts).byts), true)
})

test('use reserved space', () => {
const decoded = decode(encode({ '/': { type: 'stringName' } }))
same(decoded['/'].type, 'stringName')
// allowed
same(decode(encode({ '/': { bytes: true } })), { '/': { bytes: true } })
same(decode(encode({ '/': { type: 'stringName' } })), { '/': { type: 'stringName' } })
same(decode(encode({ '/': bytes.fromString('asdf') })), { '/': bytes.fromString('asdf') })

// TODO: test encode() doesn't allow this
assert.throws(() => decode(encode({ '/': link.toString(), bop: 'bip' })))
assert.throws(() => decode(encode({ '/': { bytes: 'mS7ldeA', bop: 'bip' } })))
assert.throws(() => decode(encode({ '/': { bytes: 'mS7ldeA' }, bop: 'bip' })))
assert.throws(() => decode(encode({ '/': { bytes: 'mS7ldeA', bop: 'bip' }, bop: 'bip' })))
})

test('native types', done => {
Expand All @@ -51,4 +49,70 @@ describe('basic dag-json', () => {
same(flip(['asdf']), ['asdf'])
done()
})

test('error on circular references', () => {
const circularObj = {}
circularObj.a = circularObj
assert.throws(() => encode(circularObj), /object contains circular references/)
const circularArr = [circularObj]
circularObj.a = circularArr
assert.throws(() => encode(circularArr), /object contains circular references/)
})

test('error on encoding undefined', () => {
assert.throws(() => encode(undefined), /\Wundefined\W.*not supported/)
const objWithUndefined = { a: 'a', b: undefined }
assert.throws(() => encode(objWithUndefined), /\Wundefined\W.*not supported/)
})

test('error on encoding IEEE 754 specials', () => {
for (const special of [NaN, Infinity, -Infinity]) {
assert.throws(() => encode(special), new RegExp(`\\W${String(special)}\\W.*not supported`))
const objWithSpecial = { a: 'a', b: special }
assert.throws(() => encode(objWithSpecial), new RegExp(`\\W${String(special)}\\W.*not supported`))
const arrWithSpecial = [1, 1.1, -1, -1.1, Number.MAX_SAFE_INTEGER, special, Number.MIN_SAFE_INTEGER]
assert.throws(() => encode(arrWithSpecial), new RegExp(`\\W${String(special)}\\W.*not supported`))
}
})

test('fuzz serialize and deserialize with garbage', function () {
// filter out fuzz garbage for objects that are disqualified by DAG-JSON rules
const checkObj = (obj) => {
if (Array.isArray(obj)) {
return obj.every(checkObj)
}
if (obj && typeof obj === 'object') {
for (const [key, value] of Object.entries(obj)) {
if (key === '/') {
if (typeof value === 'string') {
return false
}
if (value && typeof value === 'object' && value.bytes !== undefined) {
return false
}
}
if (!checkObj(value)) {
return false
}
}
}
return true
}

this.timeout(5000)
for (let ii = 0; ii < 1000; ii++) {
const original = garbage(300)
if (!checkObj(original)) {
continue
}
try {
const encoded = encode(original)
const decoded = decode(encoded)
same(decoded, original)
} catch (err) {
console.log('Failed on fuzz object:', original)
throw err
}
}
})
})

0 comments on commit cbd57f4

Please sign in to comment.