From 50e55f63f4b9265bfa65cafb642cc7ba27b0eeca Mon Sep 17 00:00:00 2001 From: Natalie Weizenbaum Date: Tue, 6 Dec 2016 13:44:02 -0800 Subject: [PATCH] Add a Codec for the chunked transfer coding. (#8) --- CHANGELOG.md | 7 + lib/http_parser.dart | 1 + lib/src/chunked_coding.dart | 39 +++ lib/src/chunked_coding/decoder.dart | 212 ++++++++++++++++ lib/src/chunked_coding/encoder.dart | 72 ++++++ pubspec.yaml | 4 +- test/chunked_coding_test.dart | 364 ++++++++++++++++++++++++++++ 7 files changed, 698 insertions(+), 1 deletion(-) create mode 100644 lib/src/chunked_coding.dart create mode 100644 lib/src/chunked_coding/decoder.dart create mode 100644 lib/src/chunked_coding/encoder.dart create mode 100644 test/chunked_coding_test.dart diff --git a/CHANGELOG.md b/CHANGELOG.md index 9802eb1..c12d2cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## 3.1.0 + +* Add `chunkedCoding`, a `Codec` that supports encoding and decoding the + [chunked transfer coding][]. + +[chunked transfer coding]: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1 + ## 3.0.2 * Support `string_scanner` 1.0.0. diff --git a/lib/http_parser.dart b/lib/http_parser.dart index 684c0b5..77b20c7 100644 --- a/lib/http_parser.dart +++ b/lib/http_parser.dart @@ -4,5 +4,6 @@ export 'src/authentication_challenge.dart'; export 'src/case_insensitive_map.dart'; +export 'src/chunked_coding.dart'; export 'src/http_date.dart'; export 'src/media_type.dart'; diff --git a/lib/src/chunked_coding.dart b/lib/src/chunked_coding.dart new file mode 100644 index 0000000..8d2326c --- /dev/null +++ b/lib/src/chunked_coding.dart @@ -0,0 +1,39 @@ +// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:convert'; + +import 'chunked_coding/encoder.dart'; +import 'chunked_coding/decoder.dart'; + +export 'chunked_coding/encoder.dart' hide chunkedCodingEncoder; +export 'chunked_coding/decoder.dart' hide chunkedCodingDecoder; + +/// The canonical instance of [ChunkedCodec]. +const chunkedCoding = const ChunkedCodingCodec._(); + +/// A codec that encodes and decodes the [chunked transfer coding][]. +/// +/// [chunked transfer coding]: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1 +/// +/// The [encoder] creates a *single* chunked message for each call to +/// [ChunkedEncoder.convert] or [ChunkedEncoder.startChunkedConversion]. This +/// means that it will always add an end-of-message footer once conversion has +/// finished. It doesn't support generating chunk extensions or trailing +/// headers. +/// +/// Similarly, the [decoder] decodes a *single* chunked message into a stream of +/// byte arrays that must be concatenated to get the full list (like most Dart +/// byte streams). It doesn't support decoding a stream that contains multiple +/// chunked messages, nor does it support a stream that contains chunked data +/// mixed with other types of data. +/// +/// Currently, [decoder] will fail to parse chunk extensions and trailing +/// headers. It may be updated to silently ignore them in the future. +class ChunkedCodingCodec extends Codec, List> { + ChunkedCodingEncoder get encoder => chunkedCodingEncoder; + ChunkedCodingDecoder get decoder => chunkedCodingDecoder; + + const ChunkedCodingCodec._(); +} diff --git a/lib/src/chunked_coding/decoder.dart b/lib/src/chunked_coding/decoder.dart new file mode 100644 index 0000000..e2a27fc --- /dev/null +++ b/lib/src/chunked_coding/decoder.dart @@ -0,0 +1,212 @@ +// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:convert'; +import 'dart:math' as math; +import 'dart:typed_data'; + +import 'package:charcode/ascii.dart'; +import 'package:typed_data/typed_data.dart'; + +/// The canonical instance of [ChunkedCodingDecoder]. +const chunkedCodingDecoder = const ChunkedCodingDecoder._(); + +/// A converter that decodes byte arrays into chunks with size tags. +class ChunkedCodingDecoder extends Converter, List> { + const ChunkedCodingDecoder._(); + + List convert(List bytes) { + var sink = new _Sink(null); + var output = sink._decode(bytes, 0, bytes.length); + if (sink._state == _State.end) return output; + + throw new FormatException( + "Input ended unexpectedly.", bytes, bytes.length); + } + + ByteConversionSink startChunkedConversion(Sink> sink) => + new _Sink(sink); +} + +/// A conversion sink for the chunked transfer encoding. +class _Sink extends ByteConversionSinkBase { + /// The underlying sink to which decoded byte arrays will be passed. + final Sink> _sink; + + /// The current state of the sink's parsing. + var _state = _State.boundary; + + /// The size of the chunk being parsed, or `null` if the size hasn't been + /// parsed yet. + int _size; + + _Sink(this._sink); + + void add(List chunk) => addSlice(chunk, 0, chunk.length, false); + + void addSlice(List chunk, int start, int end, bool isLast) { + RangeError.checkValidRange(start, end, chunk.length); + var output = _decode(chunk, start, end); + if (output.isNotEmpty) _sink.add(output); + if (isLast) _close(chunk, end); + } + + void close() => _close(); + + /// Like [close], but includes [chunk] and [index] in the [FormatException] if + /// one is thrown. + void _close([List chunk, int index]) { + if (_state != _State.end) { + throw new FormatException("Input ended unexpectedly.", chunk, index); + } + + _sink.close(); + } + + /// Decodes the data in [bytes] from [start] to [end]. + Uint8List _decode(List bytes, int start, int end) { + /// Throws a [FormatException] if `bytes[start] != $char`. Uses [name] to + /// describe the character in the exception text. + assertCurrentChar(int char, String name) { + if (bytes[start] != char) { + throw new FormatException("Expected LF.", bytes, start); + } + } + + var buffer = new Uint8Buffer(); + while (start != end) { + switch (_state) { + case _State.boundary: + _size = _digitForByte(bytes, start); + _state = _State.size; + start++; + break; + + case _State.size: + if (bytes[start] == $cr) { + _state = _State.beforeLF; + } else { + // Shift four bits left since a single hex digit contains four bits + // of information. + _size = (_size << 4) + _digitForByte(bytes, start); + } + start++; + break; + + case _State.beforeLF: + assertCurrentChar($lf, "LF"); + _state = _size == 0 ? _State.endBeforeCR : _State.body; + start++; + break; + + case _State.body: + var chunkEnd = math.min(end, start + _size); + buffer.addAll(bytes, start, chunkEnd); + _size -= chunkEnd - start; + start = chunkEnd; + if (_size == 0) _state = _State.boundary; + break; + + case _State.endBeforeCR: + assertCurrentChar($cr, "CR"); + _state = _State.endBeforeLF; + start++; + break; + + case _State.endBeforeLF: + assertCurrentChar($lf, "CR"); + _state = _State.end; + start++; + break; + + case _State.end: + throw new FormatException("Expected no more data.", bytes, start); + } + } + return buffer.buffer.asUint8List(0, buffer.length); + } + + /// Returns the hex digit (0 through 15) corresponding to the byte at index + /// [i] in [bytes]. + /// + /// If the given byte isn't a hexadecimal ASCII character, throws a + /// [FormatException]. + int _digitForByte(List bytes, int index) { + // If the byte is a numeral, get its value. XOR works because 0 in ASCII is + // `0b110000` and the other numerals come after it in ascending order and + // take up at most four bits. + // + // We check for digits first because it ensures there's only a single branch + // for 10 out of 16 of the expected cases. We don't count the `digit >= 0` + // check because branch prediction will always work on it for valid data. + var byte = bytes[index]; + var digit = $0 ^ byte; + if (digit <= 9) { + if (digit >= 0) return digit; + } else { + // If the byte is an uppercase letter, convert it to lowercase. This works + // because uppercase letters in ASCII are exactly `0b100000 = 0x20` less + // than lowercase letters, so if we ensure that that bit is 1 we ensure that + // the letter is lowercase. + var letter = 0x20 | byte; + if ($a <= letter && letter <= $f) return letter - $a + 10; + } + + throw new FormatException( + "Invalid hexadecimal byte 0x${byte.toRadixString(16).toUpperCase()}.", + bytes, index); + } +} + +/// An enumeration of states that [_Sink] can exist in when decoded a chunked +/// message. +/// +/// [_SizeState], [_CRState], and [_ChunkState] have additional data attached. +class _State { + /// The parser has fully parsed one chunk and is expecting the header for the + /// next chunk. + /// + /// Transitions to [size]. + static const boundary = const _State._("boundary"); + + /// The parser has parsed at least one digit of the chunk size header, but has + /// not yet parsed the `CR LF` sequence that indicates the end of that header. + /// + /// Transitions to [beforeLF]. + static const size = const _State._("size"); + + /// The parser has parsed the chunk size header and the CR character after it, + /// but not the LF. + /// + /// Transitions to [body] or [endBeforeCR]. + static const beforeLF = const _State._("before LF"); + + /// The parser has parsed a chunk header and possibly some of the body, but + /// still needs to consume more bytes. + /// + /// Transitions to [boundary]. + static const body = const _State._("CR"); + + /// The parser has parsed the final empty chunk but not the CR LF sequence + /// that follows it. + /// + /// Transitions to [endBeforeLF]. + static const endBeforeCR = const _State._("end before CR"); + + /// The parser has parsed the final empty chunk and the CR that follows it, + /// but not the LF after that. + /// + /// Transitions to [end]. + static const endBeforeLF = const _State._("end before LF"); + + /// The parser has parsed the final empty chunk as well as the CR LF that + /// follows, and expects no more data. + static const end = const _State._("end"); + + final String _name; + + const _State._(this._name); + + String toString() => _name; +} diff --git a/lib/src/chunked_coding/encoder.dart b/lib/src/chunked_coding/encoder.dart new file mode 100644 index 0000000..c724700 --- /dev/null +++ b/lib/src/chunked_coding/encoder.dart @@ -0,0 +1,72 @@ +// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:convert'; +import 'dart:typed_data'; + +import 'package:charcode/ascii.dart'; + +/// The canonical instance of [ChunkedCodingEncoder]. +const chunkedCodingEncoder = const ChunkedCodingEncoder._(); + +/// The chunk indicating that the chunked message has finished. +final _doneChunk = new Uint8List.fromList([$0, $cr, $lf, $cr, $lf]); + +/// A converter that encodes byte arrays into chunks with size tags. +class ChunkedCodingEncoder extends Converter, List> { + const ChunkedCodingEncoder._(); + + List convert(List bytes) => + _convert(bytes, 0, bytes.length, isLast: true); + + ByteConversionSink startChunkedConversion(Sink> sink) => + new _Sink(sink); +} + +/// A conversion sink for the chunked transfer encoding. +class _Sink extends ByteConversionSinkBase { + /// The underlying sink to which encoded byte arrays will be passed. + final Sink> _sink; + + _Sink(this._sink); + + void add(List chunk) { + _sink.add(_convert(chunk, 0, chunk.length)); + } + + void addSlice(List chunk, int start, int end, bool isLast) { + RangeError.checkValidRange(start, end, chunk.length); + _sink.add(_convert(chunk, start, end, isLast: isLast)); + if (isLast) _sink.close(); + } + + void close() { + _sink.add(_doneChunk); + _sink.close(); + } +} + +/// Returns a new list a chunked transfer encoding header followed by the slice +/// of [bytes] from [start] to [end]. +/// +/// If [isLast] is `true`, this adds the footer that indicates that the chunked +/// message is complete. +List _convert(List bytes, int start, int end, {bool isLast: false}) { + if (end == start) return isLast ? _doneChunk : const []; + + var size = end - start; + var sizeInHex = size.toRadixString(16); + var footerSize = isLast ? _doneChunk.length : 0; + + // Add 2 for the CRLF sequence that follows the size header. + var list = new Uint8List(sizeInHex.length + 2 + size + footerSize); + list.setRange(0, sizeInHex.length, sizeInHex.codeUnits); + list[sizeInHex.length] = $cr; + list[sizeInHex.length + 1] = $lf; + list.setRange(sizeInHex.length + 2, list.length - footerSize, bytes, start); + if (isLast) { + list.setRange(list.length - footerSize, list.length, _doneChunk); + } + return list; +} diff --git a/pubspec.yaml b/pubspec.yaml index 69ffe2c..d88dec9 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -1,13 +1,15 @@ name: http_parser -version: 3.0.3 +version: 3.1.0 author: "Dart Team " homepage: https://github.com/dart-lang/http_parser description: > A platform-independent package for parsing and serializing HTTP formats. dependencies: + charcode: "^1.1.0" collection: ">=0.9.1 <2.0.0" source_span: "^1.0.0" string_scanner: ">=0.0.0 <2.0.0" + typed_data: "^1.1.0" dev_dependencies: test: "^0.12.0" environment: diff --git a/test/chunked_coding_test.dart b/test/chunked_coding_test.dart new file mode 100644 index 0000000..29fce25 --- /dev/null +++ b/test/chunked_coding_test.dart @@ -0,0 +1,364 @@ +// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +import 'dart:async'; +import 'dart:convert'; + +import 'package:http_parser/http_parser.dart'; + +import 'package:charcode/charcode.dart'; +import 'package:test/test.dart'; + +void main() { + group("encoder", () { + test("adds a header to the chunk of bytes", () { + expect(chunkedCoding.encode([1, 2, 3]), + equals([$3, $cr, $lf, 1, 2, 3, $0, $cr, $lf, $cr, $lf])); + }); + + test("uses hex for chunk size", () { + var data = new Iterable.generate(0xA7).toList(); + expect(chunkedCoding.encode(data), + equals([$a, $7, $cr, $lf] + ..addAll(data) + ..addAll([$0, $cr, $lf, $cr, $lf]))); + }); + + test("just generates a footer for an empty input", () { + expect(chunkedCoding.encode([]), equals([$0, $cr, $lf, $cr, $lf])); + }); + + group("with chunked conversion", () { + List> results; + ByteConversionSink> sink; + setUp(() { + results = []; + var controller = new StreamController>(sync: true); + controller.stream.listen(results.add); + sink = chunkedCoding.encoder.startChunkedConversion(controller.sink); + }); + + test("adds headers to each chunk of bytes", () { + sink.add([1, 2, 3, 4]); + expect(results, equals([[$4, $cr, $lf, 1, 2, 3, 4]])); + + sink.add([5, 6, 7]); + expect(results, equals([ + [$4, $cr, $lf, 1, 2, 3, 4], + [$3, $cr, $lf, 5, 6, 7], + ])); + + sink.close(); + expect(results, equals([ + [$4, $cr, $lf, 1, 2, 3, 4], + [$3, $cr, $lf, 5, 6, 7], + [$0, $cr, $lf, $cr, $lf], + ])); + }); + + test("handles empty chunks", () { + sink.add([]); + expect(results, equals([[]])); + + sink.add([1, 2, 3]); + expect(results, equals([[], [$3, $cr, $lf, 1, 2, 3]])); + + sink.add([]); + expect(results, equals([[], [$3, $cr, $lf, 1, 2, 3], []])); + + sink.close(); + expect(results, equals([ + [], + [$3, $cr, $lf, 1, 2, 3], + [], + [$0, $cr, $lf, $cr, $lf], + ])); + }); + + group("addSlice()", () { + test("adds bytes from the specified slice", () { + sink.addSlice([1, 2, 3, 4, 5], 1, 4, false); + expect(results, equals([[$3, $cr, $lf, 2, 3, 4]])); + }); + + test("doesn't add a header if the slice is empty", () { + sink.addSlice([1, 2, 3, 4, 5], 1, 1, false); + expect(results, equals([[]])); + }); + + test("adds a footer if isLast is true", () { + sink.addSlice([1, 2, 3, 4, 5], 1, 4, true); + expect(results, + equals([[$3, $cr, $lf, 2, 3, 4, $0, $cr, $lf, $cr, $lf]])); + + // Setting isLast shuld close the sink. + expect(() => sink.add([]), throwsStateError); + }); + + group("disallows", () { + test("start < 0", () { + expect(() => sink.addSlice([1, 2, 3, 4, 5], -1, 4, false), + throwsRangeError); + }); + + test("start > end", () { + expect(() => sink.addSlice([1, 2, 3, 4, 5], 3, 2, false), + throwsRangeError); + }); + + test("end > length", () { + expect(() => sink.addSlice([1, 2, 3, 4, 5], 1, 10, false), + throwsRangeError); + }); + }); + }); + }); + }); + + group("decoder", () { + test("parses chunked data", () { + expect(chunkedCoding.decode([ + $3, $cr, $lf, 1, 2, 3, + $4, $cr, $lf, 4, 5, 6, 7, + $0, $cr, $lf, $cr, $lf, + ]), equals([1, 2, 3, 4, 5, 6, 7])); + }); + + test("parses hex size", () { + var data = new Iterable.generate(0xA7).toList(); + expect( + chunkedCoding.decode([$a, $7, $cr, $lf] + ..addAll(data) + ..addAll([$0, $cr, $lf, $cr, $lf])), + equals(data)); + }); + + test("parses capital hex size", () { + var data = new Iterable.generate(0xA7).toList(); + expect( + chunkedCoding.decode([$A, $7, $cr, $lf] + ..addAll(data) + ..addAll([$0, $cr, $lf, $cr, $lf])), + equals(data)); + }); + + test("parses an empty message", () { + expect(chunkedCoding.decode([$0, $cr, $lf, $cr, $lf]), isEmpty); + }); + + group("disallows a message", () { + test("that ends without any input", () { + expect(() => chunkedCoding.decode([]), throwsFormatException); + }); + + test("that ends after the size", () { + expect(() => chunkedCoding.decode([$a]), throwsFormatException); + }); + + test("that ends after CR", () { + expect(() => chunkedCoding.decode([$a, $cr]), throwsFormatException); + }); + + test("that ends after LF", () { + expect(() => chunkedCoding.decode([$a, $cr, $lf]), + throwsFormatException); + }); + + test("that ends after insufficient bytes", () { + expect(() => chunkedCoding.decode([$a, $cr, $lf, 1, 2, 3]), + throwsFormatException); + }); + + test("that ends at a chunk boundary", () { + expect(() => chunkedCoding.decode([$1, $cr, $lf, 1]), + throwsFormatException); + }); + + test("that ends after the empty chunk", () { + expect(() => chunkedCoding.decode([$0, $cr, $lf]), + throwsFormatException); + }); + + test("that ends after the closing CR", () { + expect(() => chunkedCoding.decode([$0, $cr, $lf, $cr]), + throwsFormatException); + }); + + test("with a chunk without a size", () { + expect(() => chunkedCoding.decode([$cr, $lf, $0, $cr, $lf, $cr, $lf]), + throwsFormatException); + }); + + test("with a chunk with a non-hex size", () { + expect( + () => chunkedCoding.decode([$q, $cr, $lf, $0, $cr, $lf, $cr, $lf]), + throwsFormatException); + }); + }); + + group("with chunked conversion", () { + List> results; + ByteConversionSink> sink; + setUp(() { + results = []; + var controller = new StreamController>(sync: true); + controller.stream.listen(results.add); + sink = chunkedCoding.decoder.startChunkedConversion(controller.sink); + }); + + test("decodes each chunk of bytes", () { + sink.add([$4, $cr, $lf, 1, 2, 3, 4]); + expect(results, equals([[1, 2, 3, 4]])); + + sink.add([$3, $cr, $lf, 5, 6, 7]); + expect(results, equals([[1, 2, 3, 4], [5, 6, 7]])); + + sink.add([$0, $cr, $lf, $cr, $lf]); + sink.close(); + expect(results, equals([[1, 2, 3, 4], [5, 6, 7]])); + }); + + test("handles empty chunks", () { + sink.add([]); + expect(results, isEmpty); + + sink.add([$3, $cr, $lf, 1, 2, 3]); + expect(results, equals([[1, 2, 3]])); + + sink.add([]); + expect(results, equals([[1, 2, 3]])); + + sink.add([$0, $cr, $lf, $cr, $lf]); + sink.close(); + expect(results, equals([[1, 2, 3]])); + }); + + test("throws if the sink is closed before the message is done", () { + sink.add([$3, $cr, $lf, 1, 2, 3]); + expect(() => sink.close(), throwsFormatException); + }); + + group("preserves state when a byte array ends", () { + test("within chunk size", () { + sink.add([$a]); + expect(results, isEmpty); + + var data = new Iterable.generate(0xA7).toList(); + sink.add([$7, $cr, $lf]..addAll(data)); + expect(results, equals([data])); + }); + + test("after chunk size", () { + sink.add([$3]); + expect(results, isEmpty); + + sink.add([$cr, $lf, 1, 2, 3]); + expect(results, equals([[1, 2, 3]])); + }); + + test("after CR", () { + sink.add([$3, $cr]); + expect(results, isEmpty); + + sink.add([$lf, 1, 2, 3]); + expect(results, equals([[1, 2, 3]])); + }); + + test("after LF", () { + sink.add([$3, $cr, $lf]); + expect(results, isEmpty); + + sink.add([1, 2, 3]); + expect(results, equals([[1, 2, 3]])); + }); + + test("after some bytes", () { + sink.add([$3, $cr, $lf, 1, 2]); + expect(results, equals([[1, 2]])); + + sink.add([3]); + expect(results, equals([[1, 2], [3]])); + }); + + test("after empty chunk size", () { + sink.add([$0]); + expect(results, isEmpty); + + sink.add([$cr, $lf, $cr, $lf]); + expect(results, isEmpty); + + sink.close(); + expect(results, isEmpty); + }); + + test("after first empty chunk CR", () { + sink.add([$0, $cr]); + expect(results, isEmpty); + + sink.add([$lf, $cr, $lf]); + expect(results, isEmpty); + + sink.close(); + expect(results, isEmpty); + }); + + test("after first empty chunk LF", () { + sink.add([$0, $cr, $lf]); + expect(results, isEmpty); + + sink.add([$cr, $lf]); + expect(results, isEmpty); + + sink.close(); + expect(results, isEmpty); + }); + + test("after second empty chunk CR", () { + sink.add([$0, $cr, $lf, $cr]); + expect(results, isEmpty); + + sink.add([$lf]); + expect(results, isEmpty); + + sink.close(); + expect(results, isEmpty); + }); + }); + + group("addSlice()", () { + test("adds bytes from the specified slice", () { + sink.addSlice([1, $3, $cr, $lf, 2, 3, 4, 5], 1, 7, false); + expect(results, equals([[2, 3, 4]])); + }); + + test("doesn't decode if the slice is empty", () { + sink.addSlice([1, 2, 3, 4, 5], 1, 1, false); + expect(results, isEmpty); + }); + + test("closes the sink if isLast is true", () { + sink.addSlice([1, $0, $cr, $lf, $cr, $lf, 7], 1, 6, true); + expect(results, isEmpty); + }); + + group("disallows", () { + test("start < 0", () { + expect(() => sink.addSlice([1, 2, 3, 4, 5], -1, 4, false), + throwsRangeError); + }); + + test("start > end", () { + expect(() => sink.addSlice([1, 2, 3, 4, 5], 3, 2, false), + throwsRangeError); + }); + + test("end > length", () { + expect(() => sink.addSlice([1, 2, 3, 4, 5], 1, 10, false), + throwsRangeError); + }); + }); + }); + }); + }); +}