-
Notifications
You must be signed in to change notification settings - Fork 1.2k
[WIP] feat: support chunked add requests #1540
Changes from all commits
24fb89f
6c85b4e
1234218
95dfd21
eee889b
0d4581a
cd3d25e
405a7e6
e5c1d80
a0d731b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -93,15 +93,16 @@ | |
"byteman": "^1.3.5", | ||
"cids": "~0.5.3", | ||
"debug": "^3.1.0", | ||
"del": "^3.0.0", | ||
"err-code": "^1.1.2", | ||
"fast-glob": "^2.2.2", | ||
"file-type": "^8.1.0", | ||
"filesize": "^3.6.1", | ||
"fnv1a": "^1.0.1", | ||
"fsm-event": "^2.1.0", | ||
"get-folder-size": "^2.0.0", | ||
"glob": "^7.1.2", | ||
"hapi": "^16.6.2", | ||
"hapi-set-header": "^1.0.2", | ||
"hoek": "^5.0.3", | ||
"human-to-milliseconds": "^1.0.0", | ||
"interface-datastore": "~0.4.2", | ||
|
@@ -167,9 +168,11 @@ | |
"read-pkg-up": "^4.0.0", | ||
"readable-stream": "2.3.6", | ||
"receptacle": "^1.3.2", | ||
"stream-concat": "^0.3.0", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use |
||
"stream-to-pull-stream": "^1.7.2", | ||
"tar-stream": "^1.6.1", | ||
"temp": "~0.8.3", | ||
"tempy": "^0.2.1", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we just use |
||
"through2": "^2.0.3", | ||
"update-notifier": "^2.5.0", | ||
"yargs": "^12.0.1", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
'use strict' | ||
|
||
const path = require('path') | ||
const fs = require('fs') | ||
const mh = require('multihashes') | ||
const multipart = require('ipfs-multipart') | ||
const debug = require('debug') | ||
|
@@ -10,10 +12,21 @@ const pull = require('pull-stream') | |
const toPull = require('stream-to-pull-stream') | ||
const pushable = require('pull-pushable') | ||
const each = require('async/each') | ||
const content = require('content') | ||
const toStream = require('pull-stream-to-stream') | ||
const abortable = require('pull-abortable') | ||
const Joi = require('joi') | ||
const pump = require('pump') | ||
const tempy = require('tempy') | ||
const ndjson = require('pull-ndjson') | ||
const { | ||
parseChunkedInput, | ||
createMultipartReply, | ||
matchMultipartEnd, | ||
processAndAdd | ||
} = require('../utils/add-experimental') | ||
|
||
const filesDir = tempy.directory() | ||
|
||
exports = module.exports | ||
|
||
|
@@ -182,6 +195,7 @@ exports.add = { | |
|
||
parser.on('file', (fileName, fileStream) => { | ||
fileName = decodeURIComponent(fileName) | ||
|
||
const filePair = { | ||
path: fileName, | ||
content: toPull(fileStream) | ||
|
@@ -192,7 +206,6 @@ exports.add = { | |
|
||
parser.on('directory', (directory) => { | ||
directory = decodeURIComponent(directory) | ||
|
||
fileAdder.push({ | ||
path: directory, | ||
content: '' | ||
|
@@ -220,7 +233,7 @@ exports.add = { | |
rawLeaves: request.query['raw-leaves'], | ||
progress: request.query.progress ? progressHandler : null, | ||
onlyHash: request.query['only-hash'], | ||
hashAlg: request.query['hash'], | ||
hashAlg: request.query.hash, | ||
wrapWithDirectory: request.query['wrap-with-directory'], | ||
pin: request.query.pin, | ||
chunker: request.query.chunker | ||
|
@@ -282,6 +295,78 @@ exports.add = { | |
} | ||
} | ||
|
||
exports.addExperimental = { | ||
validate: { | ||
query: Joi.object() | ||
.keys({ | ||
'cid-version': Joi.number().integer().min(0).max(1).default(0), | ||
'raw-leaves': Joi.boolean(), | ||
'only-hash': Joi.boolean(), | ||
pin: Joi.boolean().default(true), | ||
'wrap-with-directory': Joi.boolean(), | ||
chunker: Joi.string() | ||
}) | ||
// TODO: Necessary until validate "recursive", "stream-channels" etc. | ||
.options({ allowUnknown: true }), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we please reuse from |
||
headers: { | ||
'content-range': Joi.string().regex(/(\d+)-(\d+)\/(\d+|\*)/), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
'x-chunked-input': Joi.string().regex(/^uuid="([^"]+)";\s*index=(\d*)/i) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to validate the UUID much better here as it is concatenated with the temporary directory path and we don't want directory traversal attacks. |
||
}, | ||
options: { | ||
allowUnknown: true | ||
} | ||
}, | ||
|
||
handler: (request, reply) => { | ||
const chunkedInput = parseChunkedInput(request) | ||
|
||
// non chunked | ||
if (!chunkedInput) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's safe to remove this - we won't get here if the headers are being validated using Joi. |
||
createMultipartReply( | ||
request.payload, | ||
request, | ||
reply, | ||
(err) => { | ||
if (err) { | ||
return reply(err) | ||
} | ||
} | ||
) | ||
|
||
return | ||
} | ||
|
||
// chunked | ||
const [uuid, index] = chunkedInput | ||
const [, , , total] = request.headers['content-range'].match(/(\d+)-(\d+)\/(\d+|\*)/) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggestion: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
const file = path.join(filesDir, uuid) + '-' + index | ||
|
||
// TODO validate duplicates, missing chunks when resumeable and concurrent request are supported | ||
|
||
pump( | ||
request.payload, | ||
fs.createWriteStream(file), | ||
(err) => { | ||
if (err) { | ||
return reply(err) | ||
} | ||
const boundary = content.type(request.headers['content-type']).boundary | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needs a try/catch as it can throw. |
||
matchMultipartEnd(file, boundary, (err, isEnd) => { | ||
if (err) { | ||
return reply(err) | ||
} | ||
|
||
if (isEnd) { | ||
processAndAdd(uuid, filesDir, request, reply) | ||
} else { | ||
reply({ Bytes: total }) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For every chunk we return the total bytes expected to be uploaded? |
||
} | ||
}) | ||
} | ||
) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need a clean up job for failed requests. |
||
} | ||
|
||
exports.immutableLs = { | ||
// uses common parseKey method that returns a `key` | ||
parseArgs: exports.parseKey, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
'use strict' | ||
|
||
const resources = require('./../resources') | ||
const mfs = require('ipfs-mfs/http') | ||
const resources = require('./../resources') | ||
|
||
module.exports = (server) => { | ||
const api = server.select('API') | ||
|
@@ -37,13 +37,28 @@ module.exports = (server) => { | |
config: { | ||
payload: { | ||
parse: false, | ||
output: 'stream' | ||
output: 'stream', | ||
maxBytes: 1000 * 1024 * 1024 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the reason for adding this restriction to |
||
}, | ||
handler: resources.files.add.handler, | ||
validate: resources.files.add.validate | ||
} | ||
}) | ||
|
||
api.route({ | ||
method: 'POST', | ||
path: '/api/v0/add-experimental', | ||
config: { | ||
payload: { | ||
parse: false, | ||
output: 'stream', | ||
maxBytes: 1000 * 1024 * 1024 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
we need this value to be high for the non chunked path
don't know. what you think ? from the hapi documentation when output=stream, maxBytes doesn't matter but this doesnt seem to be the case. Needs further investigation, maybe it's a matter of upgrading hapi dunno There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't think of anything right now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should just be the maximum size we're prepared to accept for |
||
}, | ||
validate: resources.files.addExperimental.validate, | ||
handler: resources.files.addExperimental.handler | ||
} | ||
}) | ||
|
||
api.route({ | ||
// TODO fix method | ||
method: '*', | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
'use strict' | ||
|
||
const fs = require('fs') | ||
const path = require('path') | ||
const { EOL } = require('os') | ||
const { Readable } = require('stream') | ||
const glob = require('fast-glob') | ||
const StreamConcat = require('stream-concat') | ||
const del = require('del') | ||
const content = require('content') | ||
const { Parser } = require('ipfs-multipart') | ||
|
||
const processAndAdd = (uuid, filesDir, request, reply) => { | ||
// all chunks have been received | ||
// TODO : here we have full size we can calculate the number of chunks to validate we have all the bytes | ||
const base = path.join(filesDir, uuid) + '-' | ||
const pattern = base + '*' | ||
const files = glob.sync(pattern) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Async please :) |
||
|
||
files.sort((a, b) => { | ||
return Number(a.replace(base, '')) - Number(b.replace(base, '')) | ||
}) | ||
|
||
let fileIndex = 0 | ||
const nextStream = () => fileIndex === files.length | ||
? null | ||
: fs.createReadStream(files[fileIndex++]) | ||
|
||
createMultipartReply( | ||
new StreamConcat(nextStream), | ||
request, | ||
reply, | ||
(err) => { | ||
if (err) { | ||
return reply(err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think we need a test to ensure the correct thing is being done. |
||
} | ||
del(pattern, { force: true }) | ||
.then(paths => { | ||
console.log('Deleted files and folders:\n', paths.join('\n')) | ||
}) | ||
.catch(console.error) | ||
} | ||
) | ||
} | ||
|
||
const matchMultipartEnd = (file, boundary, cb) => { | ||
const buffer = Buffer.alloc(56) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What are the magic numbers here 56 & 58? Would you mind adding comments to explain or refactor to be more obvious? |
||
const fs = require('fs') | ||
fs.open(file, 'r', (err, fd) => { | ||
if (err) { | ||
cb(err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing |
||
} | ||
|
||
fs.fstat(fd, (err, stats) => { | ||
if (err) { | ||
cb(err) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing |
||
} | ||
|
||
fs.read(fd, buffer, 0, buffer.length, stats.size - 58, function (e, l, b) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you need a guard here incase position is negative? |
||
cb(null, b.toString().includes(boundary)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please handle error and use more descriptive variable names! |
||
}) | ||
fs.close(fd) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move close to read callback? |
||
}) | ||
}) | ||
} | ||
|
||
const parseChunkedInput = (request) => { | ||
const input = request.headers['x-chunked-input'] | ||
const regex = /^uuid="([^"]+)";\s*index=(\d*)/i | ||
|
||
if (!input) { | ||
return null | ||
} | ||
const match = input.match(regex) | ||
|
||
return [match[1], Number(match[2])] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Much better to just return an object here with |
||
} | ||
|
||
const createMultipartReply = (readStream, request, reply, cb) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMHO this should just construct and return a stream that can be passed to reply in the handler. |
||
const boundary = content.type(request.headers['content-type']).boundary | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Needs try/catch |
||
const ipfs = request.server.app.ipfs | ||
const query = request.query | ||
const parser = new Parser({ boundary: boundary }) | ||
const replyStream = new Readable({ read: () => {} }) | ||
const serialize = d => JSON.stringify(d) + EOL | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think the EOL needs to be OS specific here does it? |
||
const progressHandler = (bytes) => { | ||
replyStream.push(serialize({ Bytes: bytes })) | ||
} | ||
// ipfs add options | ||
const options = { | ||
cidVersion: query['cid-version'], | ||
rawLeaves: query['raw-leaves'], | ||
progress: query.progress ? progressHandler : null, | ||
onlyHash: query['only-hash'], | ||
hashAlg: query.hash, | ||
wrapWithDirectory: query['wrap-with-directory'], | ||
pin: query.pin, | ||
chunker: query.chunker | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should extract this logic into a function e.g. |
||
const addStream = ipfs.files.addReadableStream(options) | ||
|
||
// Setup add pipeline | ||
addStream.on('data', file => { | ||
replyStream.push(serialize({ | ||
Name: file.path, | ||
Hash: file.hash, | ||
Size: file.size | ||
})) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should just be a transform, we should just be able to pipe readStream -> parser -> addStream -> replyStream |
||
}) | ||
addStream.on('end', () => replyStream.push(null)) | ||
addStream.on('error', cb) | ||
|
||
// Setup multipart parser | ||
parser.on('file', (fileName, fileStream) => { | ||
addStream.write({ | ||
path: decodeURIComponent(fileName), | ||
content: fileStream | ||
}) | ||
}) | ||
parser.on('directory', (directory) => { | ||
addStream.write({ | ||
path: decodeURIComponent(directory), | ||
content: '' | ||
}) | ||
}) | ||
parser.on('end', () => { | ||
addStream.end() | ||
}) | ||
parser.on('error', cb) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be an abstraction around this that allows us to just pipe it to our add stream. |
||
|
||
// Send replyStream to reply | ||
reply(replyStream) | ||
.header('x-chunked-output', '1') | ||
.header('content-encoding', 'identity') // stop gzip from buffering, see https://github.com/hapijs/hapi/issues/2975 | ||
.header('content-type', 'application/json') | ||
|
||
// start piping data to multipart parser | ||
readStream.pipe(parser) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doesn't look like callback is called when the reply ends and so temporary files don't get cleaned up. Would you mind adding a test to verify? |
||
|
||
module.exports = { | ||
processAndAdd, | ||
matchMultipartEnd, | ||
parseChunkedInput, | ||
createMultipartReply | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we use
glob
or replaceglob
withfast-glob
?