diff --git a/.eslintrc.json b/.eslintrc.json index 78ba093..e5776c6 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -5,6 +5,9 @@ "no-var": 2, "prefer-const": 2 }, + "env": { + "browser": true + }, "parserOptions": { "ecmaVersion": 6, "sourceType": "module" diff --git a/README.md b/README.md index da2311b..898eb1e 100644 --- a/README.md +++ b/README.md @@ -52,16 +52,14 @@ Determine if HTTP cookies will be sent along with the request, one of `same-orig A function which implements the following interface: ```js -(rawChunk, previousChunkSuffix, isFinalChunk) => [ parsedChunk, chunkSuffix ] +(chunkBytes, state, flush) => [ parsed, state ] ``` -The `chunkParser` takes the raw, textual chunk response returned by the server and converts it into the value passed to the `onChunk` callback (see `options.onChunk`). The function may also yield an optional chunkSuffix which will be not be passed to the `onChunk` callback but will instead be supplied as the `previousChunkSuffix` value the next time the `chunkParser` is invoked. +The chunk parser converts the supplied Uint8Array of bytes into structured data which will be supplied to the `onChunk` callback. If no `chunkParser` function is supplied the `defaultChunkParser` will be used which expects the data to be JSON literals delimited by newline (`\\n`) characters. -If the `chunkParser` throws an exception, the chunk will be discarded and the error that was raised will be passed to the `onChunk` callback augmented with a `rawChunk` property consisting of the textual chunk for logging / recovery. +See [Writing a Custom Chunk Parser](#Writing a Custom Chunk Parser) below for more deatils on how to implement this interface. -If no `chunkParser` is supplied the `defaultChunkParser` will be used which expects the chunks returned by the server to consist of one or more `\n` delimited lines of JSON object literals which are parsed into an Array. - -`chunkParser` will be called with `isFinalChunk` as `true` when the response has completed and there was a non-empty `chunkSuffix` from the last chunk. The `rawChunk` will be an empty string and the `previousChunkSuffix` will be the last returned `chunkSuffix`. +If the `chunkParser` throws an exception, the chunk will be discarded and the error that was raised will be passed to the `onChunk` callback augmented with a `chunkBytes` property that contains the byte Array supplied to the parser and a `parserState` property which contains the state that was supplied (see below). #### onChunk (optional) A function which implements the following interface: @@ -94,6 +92,68 @@ A function which implements the following interface: ({ url, headers, method, body, credentials, onComplete, onRawChunk }) => undefined ``` -The underlying function to use to make the request, see the provided implementations if you wish to provide a custom extension. +The underlying function used to make the request, see the provided implementations if you wish to provide a custom extension. Note that you must supply a Uint8Array to the `onRawChunk` callback. If no value is supplied the `chunkedRequest.transportFactory` function will be invoked to determine which transport method to use. The default `transportFactory` will attempt to select the best available method for the current platform; but you can override this method for substituting a test-double or custom implementation. + + +## Writing a Custom Chunk Parser +The `chunkParser` takes a 'chunk' of bytes in the form of a `Uint8Array` which were provided by the remote server and then converts it into the value passed to the `onChunk` callback (see `options.onChunk`). In it's simplest form the `chunkParser` acts as a passthru; the following example converts the supplied bytes into a string: + +```js +chunkedRequest({ + chunkParser(bytes) { + const str = utf8BytesToString(bytes); + return [ str ]; + } + onChunk(err, str) { + console.log(`Chunk recieved: ${str}`); + } +} +``` + + +Chunk Parsers will typically be dealing with structured data (eg: JSON literals) where a message can only be parsed if it is well formed (ie: a complete JSON literal). Because of the nature of chunked transfer, the server may end up flushing a chunk of data to the browser that contains an incomplete datastructure. The example below illustrates this where the first chunk from the server (Chunk 1) has an incomplete JSON literal which is subsiquently completed by the proceeding chunk (Chunk 2). + +``` +Server (Chunk 1)> { "name": "Jonny" }\n{ "name": "Frank" }\n{ "na +Server (Chunk 2)> me": "Bob" } +``` + +A naieve chunk parser implementation would attempt to parse the JSON literals contained in each chunk like so: + +```js +chunkParser(bytes) { + const jsonLiterals = utf8BytesToString(bytes).split("\n"); + // This will not work; Array index 2 `'{ "nam' is an incomplete JSON + // literal and will cause a SyntaxError from JSON.parse + return [ jsonLiterals.map(v => JSON.parse(v)) ]; +} +``` + +Instead, the chunkParser should make use of the `state` object to retain any incomplete messages so they can be processed in the next pass: + +```js +chunkParser(bytes, state = {}) { + const jsonLiterals = utf8BytesToString(bytes).split("\n"); + + // Does the state object contain any data that was not parsed + // in a previous pass (see below). + if (state.trailer) { + // Glue the data back together for a (potentially) complete literal. + jsonLiterals[0] = `${state.trailer}${jsonLiterals[0]}`; + } + + // Check to see if the last literal parsed from this chunk ended with a + // message delimiter. + if (jsonLiterals[jsonLiterals.length-1] !== "\n") { + // move the last entry into the parser's state as it's incomplete; we + // can process it on the next pass. + state.trailer = jsonLiterals.pop(); + } + + return [ jsonLiterals.map(v => JSON.parse(v)), state ]; +} +``` + +Finally, stateful chunk parsers must observe the third argument, `flush`. This flag will be true when the server has closed the conneciton indicating that there will be no further data. The chunkParser must process any remaining data in the state object at this point. \ No newline at end of file diff --git a/karma.conf.js b/karma.conf.js index f08bffe..d2c9d84 100644 --- a/karma.conf.js +++ b/karma.conf.js @@ -78,6 +78,7 @@ module.exports = function(config) { proxies: { '/chunked-response': 'http://localhost:2001/chunked-response', + '/chunked-utf8-response': 'http://localhost:2001/chunked-utf8-response', '/split-chunked-response': 'http://localhost:2001/split-chunked-response', '/error-response': 'http://localhost:2001/error-response', '/echo-response': 'http://localhost:2001/echo-response' diff --git a/package.json b/package.json index 77760fb..0a966dd 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,14 @@ "jsnext:main": "src/index.js", "repository": "https://github.com/jonnyreeves/chunked-request", "license": "MIT", - "keywords": [ "request", "chunked", "transfer", "comet", "xhr", "fetch" ], + "keywords": [ + "request", + "chunked", + "transfer", + "comet", + "xhr", + "fetch" + ], "scripts": { "prepublish": "npm run clean && npm run build:lib", "clean": "rm -rf build/*", @@ -17,19 +24,22 @@ "release": "./release.sh ${npm_package_version}" }, "devDependencies": { - "babel-cli": "^6.6.5", - "babel-preset-es2015": "^6.6.0", - "babelify": "^7.2.0", - "browserify": "^13.0.0", - "cookie": "^0.2.3", - "eslint": "^2.4.0", + "babel-cli": "^6.11.4", + "babel-preset-es2015": "^6.13.2", + "babelify": "^7.3.0", + "browserify": "^13.1.0", + "cookie": "^0.3.1", + "eslint": "^3.3.1", "jasmine": "^2.4.1", "jasmine-core": "^2.4.1", - "karma": "^0.13.22", - "karma-chrome-launcher": "^0.2.2", - "karma-jasmine": "^0.3.8", - "karma-sauce-launcher": "^0.3.1", - "lodash": "^4.6.1", + "karma": "^1.2.0", + "karma-chrome-launcher": "^1.0.1", + "karma-jasmine": "^1.0.2", + "karma-sauce-launcher": "^1.0.0", + "lodash": "^4.15.0", "url": "^0.11.0" + }, + "dependencies": { + "utf-8": "^1.0.0" } } diff --git a/src/defaultChunkParser.js b/src/defaultChunkParser.js index c6f63c3..7594ef1 100644 --- a/src/defaultChunkParser.js +++ b/src/defaultChunkParser.js @@ -1,28 +1,33 @@ +import { getStringFromBytes } from 'utf-8'; + const entryDelimiter = '\n'; // The defaultChunkParser expects the response from the server to consist of new-line // delimited JSON, eg: // -// { "chunk": "#1", "data": "Hello" } +// { "chunk": "#1", "data": "Hello" }\n // { "chunk": "#2", "data": "World" } // // It will correctly handle the case where a chunk is emitted by the server across // delimiter boundaries. -export default function defaultChunkParser(rawChunk, prevChunkSuffix = '', isFinalChunk = false) { - let chunkSuffix; - - const rawChunks = `${prevChunkSuffix}${rawChunk}` - .split(entryDelimiter); +export default function defaultChunkParser(bytes, state = {}, flush = false) { + const chunkStr = getStringFromBytes(bytes, 0, undefined, true); + const jsonLiterals = chunkStr.split(entryDelimiter); + if (state.trailer) { + jsonLiterals[0] = `${state.trailer}${jsonLiterals[0]}`; + } - if (!isFinalChunk && !hasSuffix(rawChunk, entryDelimiter)) { - chunkSuffix = rawChunks.pop(); + // Is this a complete message? If not; push the trailing (incomplete) string + // into the state. + if (!flush && !hasSuffix(chunkStr, entryDelimiter)) { + state.trailer = jsonLiterals.pop(); } - const processedChunks = rawChunks + const jsonObjects = jsonLiterals .filter(v => v.trim() !== '') .map(v => JSON.parse(v)); - return [ processedChunks, chunkSuffix ]; + return [ jsonObjects, state ]; } function hasSuffix(s, suffix) { diff --git a/src/impl/fetch.js b/src/impl/fetch.js index ad1d818..3d4eaed 100644 --- a/src/impl/fetch.js +++ b/src/impl/fetch.js @@ -3,7 +3,6 @@ import { isObject } from '../util'; export const READABLE_BYTE_STREAM = 'readable-byte-stream'; export default function fetchRequest(options) { - const decoder = new TextDecoder(); const { onRawChunk, onRawComplete, method, body, credentials } = options; const headers = marshallHeaders(options.headers); @@ -17,7 +16,7 @@ export default function fetchRequest(options) { raw: res }); } - onRawChunk(decoder.decode(result.value)); + onRawChunk(result.value); return pump(reader, res); }); } diff --git a/src/impl/mozXhr.js b/src/impl/mozXhr.js index 3b1fb86..c46dc55 100644 --- a/src/impl/mozXhr.js +++ b/src/impl/mozXhr.js @@ -4,14 +4,7 @@ export default function mozXhrRequest(options) { const xhr = new XMLHttpRequest(); function onProgressEvent() { - const view = new Uint8Array(xhr.response); - let len = view.length; - - const rawString = new Array(len); - while(len--) { - rawString[len] = String.fromCharCode(view[len]); - } - options.onRawChunk(rawString.join('')); + options.onRawChunk(new Uint8Array(xhr.response)); } function onLoadEvent() { diff --git a/src/impl/xhr.js b/src/impl/xhr.js index 6db125e..b4b1365 100644 --- a/src/impl/xhr.js +++ b/src/impl/xhr.js @@ -1,3 +1,5 @@ +import { uint8ArrayFromString } from '../util'; + export const XHR = 'xhr'; export default function xhrRequest(options) { @@ -5,9 +7,9 @@ export default function xhrRequest(options) { let index = 0; function onProgressEvent() { - const rawChunk = xhr.responseText.substr(index); + const rawText = xhr.responseText.substr(index); index = xhr.responseText.length; - options.onRawChunk(rawChunk); + options.onRawChunk(uint8ArrayFromString(rawText)); } function onLoadEvent() { diff --git a/src/index.js b/src/index.js index 07a89bb..afc2611 100644 --- a/src/index.js +++ b/src/index.js @@ -20,31 +20,32 @@ export default function chunkedRequest(options) { chunkParser = defaultChunkParser } = options; - let prevChunkSuffix = ""; + // parserState can be utilised by the chunkParser to hold on to state; the + // defaultChunkParser uses it to keep track of any trailing text the last + // delimiter in the chunk. There is no contract for parserState. + let parserState; - function processRawChunk(rawChunk, isFinalChunk = false) { + function processRawChunk(chunkBytes, flush = false) { let parsedChunks = null; let parseError = null; - let suffix = ""; try { - [ parsedChunks, suffix ] = chunkParser(rawChunk, prevChunkSuffix, isFinalChunk); - prevChunkSuffix = suffix || ""; + [ parsedChunks, parserState ] = chunkParser(chunkBytes, parserState, flush); } catch (e) { parseError = e; - parseError.rawChunk = rawChunk; - parseError.prevChunkSuffix = prevChunkSuffix; + parseError.chunkBytes = chunkBytes; + parseError.parserState = parserState; } finally { - if (parseError || (parsedChunks !== null && parsedChunks.length > 0)) { + if (parseError || (parsedChunks && parsedChunks.length > 0)) { onChunk(parseError, parsedChunks); } } } function processRawComplete(rawComplete) { - if (prevChunkSuffix != "") { - // Call the parser with isFinalChunk=true to flush the prevChunkSuffix - processRawChunk("", true); + if (parserState) { + // Flush the parser to process any remaining state. + processRawChunk(new Uint8Array(), true); } onComplete(rawComplete); } diff --git a/src/util.js b/src/util.js index c5dd590..ee630c1 100644 --- a/src/util.js +++ b/src/util.js @@ -1,7 +1,17 @@ +import { getBytesForCharCode, setBytesFromString } from 'utf-8'; + export function isObject(value) { return !!value && typeof value === 'object'; } export function noop() { /* No operation */ +} + +export function uint8ArrayFromString(str) { + let size = 0; + for (let i = 0, len = str.length; i < len; i++) { + size += getBytesForCharCode(str.charCodeAt(i)) + } + return setBytesFromString(str, new Uint8Array(size), 0, size, true); } \ No newline at end of file diff --git a/test/integ/chunked-request.spec.js b/test/integ/chunked-request.spec.js index bf6a8fe..e0c0d24 100644 --- a/test/integ/chunked-request.spec.js +++ b/test/integ/chunked-request.spec.js @@ -1,6 +1,7 @@ import chunkedRequest from '../../src/index'; import isEqual from 'lodash/isEqual'; import isObject from 'lodash/isObject'; +import { getStringFromBytes } from 'utf-8'; // These integration tests run through Karma; check `karma.conf.js` for // configuration. Note that the dev-server which provides the `/chunked-response` @@ -24,7 +25,27 @@ describe('chunked-request', () => { url: `/chunked-response?numChunks=1&entriesPerChunk=1&delimitLast=1`, onChunk: (err, chunk) => receivedChunks.push(err || chunk), onComplete - }) + }); + }); + + it('should parse utf8 responses', done => { + const receivedChunks = []; + + const onComplete = () => { + const chunkErrors = receivedChunks.filter(v => v instanceof Error); + + expect(receivedChunks.length).toBe(1, 'receivedChunks'); + expect(chunkErrors.length).toBe(0, 'of which errors'); + expect(isEqual(receivedChunks, [ [ {message: "𝌆"} ] ])).toBe(true, 'parsed chunks'); + + done(); + }; + + chunkedRequest({ + url: `/chunked-utf8-response`, + onChunk: (err, chunk) => receivedChunks.push(err || chunk), + onComplete + }); }); it('should parse a response that consists of two chunks and ends with a delimiter', done => { @@ -111,15 +132,20 @@ describe('chunked-request', () => { const chunkErrors = receivedChunks.filter(v => v instanceof Error); expect(chunkErrors.length).toBe(1, 'one errors caught'); expect(chunkErrors[0].message).toBe('expected'); - expect(chunkErrors[0].rawChunk).toBe(`{ "chunk": "#1", "data": "#0" }\n`); + + const rawChunkStr = getStringFromBytes(chunkErrors[0].chunkBytes); + expect(rawChunkStr).toBe(`{ "chunk": "#1", "data": "#0" }\n`); done(); }; chunkedRequest({ url: `/chunked-response?numChunks=1&entriesPerChunk=1&delimitLast=1`, - chunkParser: () => { - throw new Error("expected"); + chunkParser: (chunkBytes, state, flush) => { + if (!flush) { + throw new Error("expected"); + } + return []; }, onChunk: (err, chunk) => { receivedChunks.push(err || chunk) diff --git a/test/server/index.js b/test/server/index.js index 92e6304..2c9882d 100644 --- a/test/server/index.js +++ b/test/server/index.js @@ -66,6 +66,13 @@ function serveSplitChunkedResponse(req, res) { }, chunkIntervalMs); } +function serveChunkedUtf8Response(req, res) { + res.setHeader('Content-Type', 'text/html; charset=UTF-8'); + res.setHeader('Transfer-Encoding', 'chunked'); + res.write(JSON.stringify({ "message": "𝌆" }) + "\n"); + res.end(); +} + function serveChunkedResponse(req, res) { const query = url.parse(req.url, true).query; const numChunks = parseInt(query.numChunks, 10) || 4; @@ -108,6 +115,8 @@ function handler(req, res) { switch (req.parsedUrl.pathname) { case '/chunked-response': return serveChunkedResponse(req, res); + case '/chunked-utf8-response': + return serveChunkedUtf8Response(req, res); case '/split-chunked-response': return serveSplitChunkedResponse(req, res); case '/echo-response':