From 261d6d2fd5c376cb118b539fbe5a1f8a53270f38 Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Thu, 26 Jul 2018 12:33:40 -0400 Subject: [PATCH 1/7] feat(core): add option to specify chunking algorithm This allows the chunking algorithm, and options to be specified when using the adding files. Specifying chunker and options are identical to go-ipfs and support the following formats: default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max} This is required to achieve parity with go-ipfs. Fixes #1283 License: MIT Signed-off-by: Dan Ordille --- src/cli/commands/files/add.js | 7 ++- src/core/components/files.js | 6 ++- src/core/utils.js | 88 +++++++++++++++++++++++++++++++++ src/http/api/resources/files.js | 3 +- test/core/utils.js | 47 ++++++++++++++++++ 5 files changed, 147 insertions(+), 4 deletions(-) diff --git a/src/cli/commands/files/add.js b/src/cli/commands/files/add.js index 37732b7ad2..df8af91bac 100644 --- a/src/cli/commands/files/add.js +++ b/src/cli/commands/files/add.js @@ -135,6 +135,10 @@ module.exports = { default: false, describe: 'Only chunk and hash, do not write' }, + chunker: { + default: 'default', + describe: 'Chunking algorithm to use, formatted like [default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}]' + }, 'enable-sharding-experiment': { type: 'boolean', default: false @@ -194,7 +198,8 @@ module.exports = { onlyHash: argv.onlyHash, hashAlg: argv.hash, wrapWithDirectory: argv.wrapWithDirectory, - pin: argv.pin + pin: argv.pin, + chunker: argv.chunker } if (options.enableShardingExperiment && utils.isDaemonOn()) { diff --git a/src/core/components/files.js b/src/core/components/files.js index de0b01c562..8bf66eb972 100644 --- a/src/core/components/files.js +++ b/src/core/components/files.js @@ -18,6 +18,7 @@ const OtherBuffer = require('buffer').Buffer const CID = require('cids') const toB58String = require('multihashes').toB58String const errCode = require('err-code') +const parseChunkerString = require('../utils').parseChunkerString const WRAPPER = 'wrapper/' @@ -148,12 +149,13 @@ class AddHelper extends Duplex { } module.exports = function files (self) { - function _addPullStream (options) { + function _addPullStream (options = {}) { + const chunkerOptions = parseChunkerString(options.chunker) const opts = Object.assign({}, { shardSplitThreshold: self._options.EXPERIMENTAL.sharding ? 1000 : Infinity - }, options) + }, options, chunkerOptions) if (opts.hashAlg && opts.cidVersion !== 1) { opts.cidVersion = 1 diff --git a/src/core/utils.js b/src/core/utils.js index 55ac9be2a2..6c9488e293 100644 --- a/src/core/utils.js +++ b/src/core/utils.js @@ -110,5 +110,93 @@ const resolvePath = promisify(function (objectAPI, ipfsPaths, callback) { }, callback) }) +/** + * Parses chunker string into options used by DAGBuilder in ipfs-unixfs-engine + * + * + * @param {String} chunker Chunker algorithm supported formats: + * "default" ("") + * "size-{size}", + * "rabin" + * "rabin-{avg}" + * "rabin-{min}-{avg}-{max}" + * + * @return {Object} Chunker options for DAGBuilder + */ +function parseChunkerString (chunker) { + if (!chunker || chunker === '' || chunker === 'default') { + return { + chunker: 'fixed' + } + } else if (chunker.startsWith('size-')) { + const sizeStr = chunker.split('-')[1] + const size = parseInt(sizeStr) + if (isNaN(size)) { + throw new Error('Parameter avg must be an integer') + } + return { + chunker: 'fixed', + chunkerOptions: { + maxChunkSize: size + } + } + } else if (chunker.startsWith('rabin')) { + return { + chunker: 'rabin', + chunkerOptions: parseRabinString(chunker) + } + } else { + throw new Error(`unrecognized chunker option: ${chunker}`) + } +} + +/** + * Parses rabin chunker string + * + * @param {String} chunker Chunker algorithm supported formats: + * "rabin" + * "rabin-{avg}" + * "rabin-{min}-{avg}-{max}" + * + * @return {Object} rabin chunker options + */ +function parseRabinString (chunker) { + const options = {} + const parts = chunker.split('-') + switch (parts.length) { + case 1: + options.avgChunkSize = 262144 + break + case 2: + options.avgChunkSize = parseInt(parts[1]) + if (isNaN(options.avgChunkSize)) { + throw new Error('Parameter avg must be an integer') + } + break + case 4: + options.minChunkSize = parseSub(parts[1].split(':'), 'min') + options.avgChunkSize = parseSub(parts[2].split(':'), 'avg') + options.maxChunkSize = parseSub(parts[3].split(':'), 'max') + break + default: + throw new Error('incorrect format (expected "rabin" "rabin-[avg]" or "rabin-[min]-[avg]-[max]"') + } + + return options +} + +function parseSub (sub, name) { + if (sub.length > 1 && sub[0] !== name) { + throw new Error('Parameter order must be min:avg:max') + } + let size = parseInt(sub[sub.length - 1]) + if (isNaN(size)) { + throw new Error(`Parameter ${name} must be an integer`) + } + + return size +} + exports.parseIpfsPath = parseIpfsPath exports.resolvePath = resolvePath +exports.parseChunkerString = parseChunkerString diff --git a/src/http/api/resources/files.js b/src/http/api/resources/files.js index 0370c4b0c2..645ddaceb7 100644 --- a/src/http/api/resources/files.js +++ b/src/http/api/resources/files.js @@ -221,7 +221,8 @@ exports.add = { onlyHash: request.query['only-hash'], hashAlg: request.query['hash'], wrapWithDirectory: request.query['wrap-with-directory'], - pin: request.query.pin + pin: request.query.pin, + chunker: request.query['chunker'] } const aborter = abortable() diff --git a/test/core/utils.js b/test/core/utils.js index b5c84b15c1..0e669d5389 100644 --- a/test/core/utils.js +++ b/test/core/utils.js @@ -157,4 +157,51 @@ describe('utils', () => { }) }) }) + + describe('parseChunkerString', () => { + it('handles an empty string', () => { + const options = utils.parseChunkerString('') + expect(options).to.have.property('chunker').to.equal('fixed') + }) + + it('handles a null chunker string', () => { + const options = utils.parseChunkerString(null) + expect(options).to.have.property('chunker').to.equal('fixed') + }) + + it('parses a fixed size string', () => { + const options = utils.parseChunkerString('size-512') + expect(options).to.have.property('chunker').to.equal('fixed') + expect(options) + .to.have.property('chunkerOptions') + .to.have.property('maxChunkSize') + .to.equal(512) + }) + + it('parses a rabin string without size', () => { + const options = utils.parseChunkerString('rabin') + expect(options).to.have.property('chunker').to.equal('rabin') + expect(options) + .to.have.property('chunkerOptions') + .to.have.property('avgChunkSize') + }) + + it('parses a rabin string with only avg size', () => { + const options = utils.parseChunkerString('rabin-512') + expect(options).to.have.property('chunker').to.equal('rabin') + expect(options) + .to.have.property('chunkerOptions') + .to.have.property('avgChunkSize') + .to.equal(512) + }) + + it('parses a rabin string with min, avg, and max', () => { + const options = utils.parseChunkerString('rabin-42-92-184') + expect(options).to.have.property('chunker').to.equal('rabin') + expect(options).to.have.property('chunkerOptions') + expect(options.chunkerOptions).to.have.property('minChunkSize').to.equal(42) + expect(options.chunkerOptions).to.have.property('avgChunkSize').to.equal(92) + expect(options.chunkerOptions).to.have.property('maxChunkSize').to.equal(184) + }) + }) }) From 5867d52a4a8f96b90a9f625cdb041044ba946da8 Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Tue, 7 Aug 2018 12:26:29 -0400 Subject: [PATCH 2/7] test: add tests for invalid chunker types and parameters License: MIT Signed-off-by: Dan Ordille --- src/cli/commands/files/add.js | 2 +- src/core/utils.js | 12 ++++++------ test/core/utils.js | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/cli/commands/files/add.js b/src/cli/commands/files/add.js index df8af91bac..435f9f05cc 100644 --- a/src/cli/commands/files/add.js +++ b/src/cli/commands/files/add.js @@ -136,7 +136,7 @@ module.exports = { describe: 'Only chunk and hash, do not write' }, chunker: { - default: 'default', + default: 'size-262144', describe: 'Chunking algorithm to use, formatted like [default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}]' }, 'enable-sharding-experiment': { diff --git a/src/core/utils.js b/src/core/utils.js index 6c9488e293..13249809cc 100644 --- a/src/core/utils.js +++ b/src/core/utils.js @@ -132,7 +132,7 @@ function parseChunkerString (chunker) { const sizeStr = chunker.split('-')[1] const size = parseInt(sizeStr) if (isNaN(size)) { - throw new Error('Parameter avg must be an integer') + throw new Error('Chunker parameter size must be an integer') } return { chunker: 'fixed', @@ -146,7 +146,7 @@ function parseChunkerString (chunker) { chunkerOptions: parseRabinString(chunker) } } else { - throw new Error(`unrecognized chunker option: ${chunker}`) + throw new Error(`Unrecognized chunker option: ${chunker}`) } } @@ -170,7 +170,7 @@ function parseRabinString (chunker) { case 2: options.avgChunkSize = parseInt(parts[1]) if (isNaN(options.avgChunkSize)) { - throw new Error('Parameter avg must be an integer') + throw new Error('Chunker parameter avg must be an integer') } break case 4: @@ -179,7 +179,7 @@ function parseRabinString (chunker) { options.maxChunkSize = parseSub(parts[3].split(':'), 'max') break default: - throw new Error('incorrect format (expected "rabin" "rabin-[avg]" or "rabin-[min]-[avg]-[max]"') + throw new Error('Incorrect chunker format (expected "rabin" "rabin-[avg]" or "rabin-[min]-[avg]-[max]"') } return options @@ -187,11 +187,11 @@ function parseRabinString (chunker) { function parseSub (sub, name) { if (sub.length > 1 && sub[0] !== name) { - throw new Error('Parameter order must be min:avg:max') + throw new Error('Chunker parameter order must be min:avg:max') } let size = parseInt(sub[sub.length - 1]) if (isNaN(size)) { - throw new Error(`Parameter ${name} must be an integer`) + throw new Error(`Chunker parameter ${name} must be an integer`) } return size diff --git a/test/core/utils.js b/test/core/utils.js index 0e669d5389..af73c30af6 100644 --- a/test/core/utils.js +++ b/test/core/utils.js @@ -203,5 +203,25 @@ describe('utils', () => { expect(options.chunkerOptions).to.have.property('avgChunkSize').to.equal(92) expect(options.chunkerOptions).to.have.property('maxChunkSize').to.equal(184) }) + + it('throws an error for unsupported chunker type', () => { + const fn = () => utils.parseChunkerString('fake-512') + expect(fn).to.throw(Error) + }) + + it('throws an error for incorrect format string', () => { + const fn = () => utils.parseChunkerString('fixed-abc') + expect(fn).to.throw(Error) + }) + + it('throws an error for incorrect rabin format string', () => { + let fn = () => utils.parseChunkerString('rabin-1-2-3-4') + expect(fn).to.throw(Error) + }) + + it('throws an error for non integer rabin parameters', () => { + const fn = () => utils.parseChunkerString('rabin-abc') + expect(fn).to.throw(Error) + }) }) }) From f9cdfd54ed637eeef5eda43bc9548de5c7080564 Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Tue, 7 Aug 2018 13:58:51 -0400 Subject: [PATCH 3/7] perf(core): remove alternative rabin chunker format License: MIT Signed-off-by: Dan Ordille --- src/core/utils.js | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/core/utils.js b/src/core/utils.js index 13249809cc..b4257328b1 100644 --- a/src/core/utils.js +++ b/src/core/utils.js @@ -168,15 +168,12 @@ function parseRabinString (chunker) { options.avgChunkSize = 262144 break case 2: - options.avgChunkSize = parseInt(parts[1]) - if (isNaN(options.avgChunkSize)) { - throw new Error('Chunker parameter avg must be an integer') - } + options.avgChunkSize = parseChunkSize(parts[1], 'avg') break case 4: - options.minChunkSize = parseSub(parts[1].split(':'), 'min') - options.avgChunkSize = parseSub(parts[2].split(':'), 'avg') - options.maxChunkSize = parseSub(parts[3].split(':'), 'max') + options.minChunkSize = parseChunkSize(parts[1], 'min') + options.avgChunkSize = parseChunkSize(parts[2], 'avg') + options.maxChunkSize = parseChunkSize(parts[3], 'max') break default: throw new Error('Incorrect chunker format (expected "rabin" "rabin-[avg]" or "rabin-[min]-[avg]-[max]"') @@ -185,11 +182,8 @@ function parseRabinString (chunker) { return options } -function parseSub (sub, name) { - if (sub.length > 1 && sub[0] !== name) { - throw new Error('Chunker parameter order must be min:avg:max') - } - let size = parseInt(sub[sub.length - 1]) +function parseChunkSize (str, name) { + let size = parseInt(str) if (isNaN(size)) { throw new Error(`Chunker parameter ${name} must be an integer`) } From d23647081afd01dc96e824f34fb73e6c03cc99ff Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Wed, 8 Aug 2018 12:07:19 -0400 Subject: [PATCH 4/7] perf(core): remove default as an accepted chunker format License: MIT Signed-off-by: Dan Ordille --- src/cli/commands/files/add.js | 2 +- src/core/utils.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cli/commands/files/add.js b/src/cli/commands/files/add.js index 435f9f05cc..e9f87a81d5 100644 --- a/src/cli/commands/files/add.js +++ b/src/cli/commands/files/add.js @@ -137,7 +137,7 @@ module.exports = { }, chunker: { default: 'size-262144', - describe: 'Chunking algorithm to use, formatted like [default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}]' + describe: 'Chunking algorithm to use, formatted like [size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}]' }, 'enable-sharding-experiment': { type: 'boolean', diff --git a/src/core/utils.js b/src/core/utils.js index b4257328b1..fbf0f27266 100644 --- a/src/core/utils.js +++ b/src/core/utils.js @@ -124,7 +124,7 @@ const resolvePath = promisify(function (objectAPI, ipfsPaths, callback) { * @return {Object} Chunker options for DAGBuilder */ function parseChunkerString (chunker) { - if (!chunker || chunker === '' || chunker === 'default') { + if (!chunker || chunker === '') { return { chunker: 'fixed' } From ee070f64f61734e15ac4c37c6e8224789495ccfd Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Wed, 8 Aug 2018 13:09:18 -0400 Subject: [PATCH 5/7] perf(core): remove empty string check in parseChunkerString method License: MIT Signed-off-by: Dan Ordille --- src/core/utils.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/core/utils.js b/src/core/utils.js index fbf0f27266..5c66f76c94 100644 --- a/src/core/utils.js +++ b/src/core/utils.js @@ -115,8 +115,7 @@ const resolvePath = promisify(function (objectAPI, ipfsPaths, callback) { * * * @param {String} chunker Chunker algorithm supported formats: - * "default" ("") - * "size-{size}", + * "size-{size}" * "rabin" * "rabin-{avg}" * "rabin-{min}-{avg}-{max}" @@ -124,7 +123,7 @@ const resolvePath = promisify(function (objectAPI, ipfsPaths, callback) { * @return {Object} Chunker options for DAGBuilder */ function parseChunkerString (chunker) { - if (!chunker || chunker === '') { + if (!chunker) { return { chunker: 'fixed' } From 7b38101376c42791a29b6df326f35ce0c033f0f7 Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Fri, 10 Aug 2018 12:44:04 -0400 Subject: [PATCH 6/7] fix(api): add chunker to validated query keys in files.add License: MIT Signed-off-by: Dan Ordille --- src/http/api/resources/files.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/http/api/resources/files.js b/src/http/api/resources/files.js index 645ddaceb7..3e2fb28d38 100644 --- a/src/http/api/resources/files.js +++ b/src/http/api/resources/files.js @@ -157,7 +157,8 @@ exports.add = { 'raw-leaves': Joi.boolean(), 'only-hash': Joi.boolean(), pin: Joi.boolean().default(true), - 'wrap-with-directory': Joi.boolean() + 'wrap-with-directory': Joi.boolean(), + chunker: Joi.string() }) // TODO: Necessary until validate "recursive", "stream-channels" etc. .options({ allowUnknown: true }) @@ -222,7 +223,7 @@ exports.add = { hashAlg: request.query['hash'], wrapWithDirectory: request.query['wrap-with-directory'], pin: request.query.pin, - chunker: request.query['chunker'] + chunker: request.query.chunker } const aborter = abortable() From f9c9d13bd1e543e8b44a985f5942a38bc0e3b7d8 Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Fri, 10 Aug 2018 12:47:15 -0400 Subject: [PATCH 7/7] fix(core): catch errors when parsing chunkerstring and return error pull stream License: MIT Signed-off-by: Dan Ordille --- src/core/components/files.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/core/components/files.js b/src/core/components/files.js index 8bf66eb972..d575d893fa 100644 --- a/src/core/components/files.js +++ b/src/core/components/files.js @@ -150,7 +150,12 @@ class AddHelper extends Duplex { module.exports = function files (self) { function _addPullStream (options = {}) { - const chunkerOptions = parseChunkerString(options.chunker) + let chunkerOptions + try { + chunkerOptions = parseChunkerString(options.chunker) + } catch (err) { + return pull.map(() => { throw err }) + } const opts = Object.assign({}, { shardSplitThreshold: self._options.EXPERIMENTAL.sharding ? 1000