Add a Codec for the chunked transfer coding. (#8)

dart-archive · Dec 6, 2016 · 50e55f6 · 50e55f6
1 parent 7f0467d
commit 50e55f6
Show file tree

Hide file tree

Showing 7 changed files with 698 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## 3.1.0
+
+* Add `chunkedCoding`, a `Codec` that supports encoding and decoding the
+  [chunked transfer coding][].
+
+[chunked transfer coding]: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1
+
 ## 3.0.2
 
 * Support `string_scanner` 1.0.0.

diff --git a/lib/http_parser.dart b/lib/http_parser.dart
@@ -4,5 +4,6 @@
 
 export 'src/authentication_challenge.dart';
 export 'src/case_insensitive_map.dart';
+export 'src/chunked_coding.dart';
 export 'src/http_date.dart';
 export 'src/media_type.dart';
diff --git a/lib/src/chunked_coding.dart b/lib/src/chunked_coding.dart
@@ -0,0 +1,39 @@
+// Copyright (c) 2016, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:convert';
+
+import 'chunked_coding/encoder.dart';
+import 'chunked_coding/decoder.dart';
+
+export 'chunked_coding/encoder.dart' hide chunkedCodingEncoder;
+export 'chunked_coding/decoder.dart' hide chunkedCodingDecoder;
+
+/// The canonical instance of [ChunkedCodec].
+const chunkedCoding = const ChunkedCodingCodec._();
+
+/// A codec that encodes and decodes the [chunked transfer coding][].
+///
+/// [chunked transfer coding]: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1
+///
+/// The [encoder] creates a *single* chunked message for each call to
+/// [ChunkedEncoder.convert] or [ChunkedEncoder.startChunkedConversion]. This
+/// means that it will always add an end-of-message footer once conversion has
+/// finished. It doesn't support generating chunk extensions or trailing
+/// headers.
+///
+/// Similarly, the [decoder] decodes a *single* chunked message into a stream of
+/// byte arrays that must be concatenated to get the full list (like most Dart
+/// byte streams). It doesn't support decoding a stream that contains multiple
+/// chunked messages, nor does it support a stream that contains chunked data
+/// mixed with other types of data.
+///
+/// Currently, [decoder] will fail to parse chunk extensions and trailing
+/// headers. It may be updated to silently ignore them in the future.
+class ChunkedCodingCodec extends Codec<List<int>, List<int>> {
+  ChunkedCodingEncoder get encoder => chunkedCodingEncoder;
+  ChunkedCodingDecoder get decoder => chunkedCodingDecoder;
+
+  const ChunkedCodingCodec._();
+}
diff --git a/lib/src/chunked_coding/decoder.dart b/lib/src/chunked_coding/decoder.dart
@@ -0,0 +1,212 @@
+// Copyright (c) 2016, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:convert';
+import 'dart:math' as math;
+import 'dart:typed_data';
+
+import 'package:charcode/ascii.dart';
+import 'package:typed_data/typed_data.dart';
+
+/// The canonical instance of [ChunkedCodingDecoder].
+const chunkedCodingDecoder = const ChunkedCodingDecoder._();
+
+/// A converter that decodes byte arrays into chunks with size tags.
+class ChunkedCodingDecoder extends Converter<List<int>, List<int>> {
+  const ChunkedCodingDecoder._();
+
+  List<int> convert(List<int> bytes) {
+    var sink = new _Sink(null);
+    var output = sink._decode(bytes, 0, bytes.length);
+    if (sink._state == _State.end) return output;
+
+    throw new FormatException(
+        "Input ended unexpectedly.", bytes, bytes.length);
+  }
+
+  ByteConversionSink startChunkedConversion(Sink<List<int>> sink) =>
+      new _Sink(sink);
+}
+
+/// A conversion sink for the chunked transfer encoding.
+class _Sink extends ByteConversionSinkBase {
+  /// The underlying sink to which decoded byte arrays will be passed.
+  final Sink<List<int>> _sink;
+
+  /// The current state of the sink's parsing.
+  var _state = _State.boundary;
+
+  /// The size of the chunk being parsed, or `null` if the size hasn't been
+  /// parsed yet.
+  int _size;
+
+  _Sink(this._sink);
+
+  void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);
+
+  void addSlice(List<int> chunk, int start, int end, bool isLast) {
+    RangeError.checkValidRange(start, end, chunk.length);
+    var output = _decode(chunk, start, end);
+    if (output.isNotEmpty) _sink.add(output);
+    if (isLast) _close(chunk, end);
+  }
+
+  void close() => _close();
+
+  /// Like [close], but includes [chunk] and [index] in the [FormatException] if
+  /// one is thrown.
+  void _close([List<int> chunk, int index]) {
+    if (_state != _State.end) {
+      throw new FormatException("Input ended unexpectedly.", chunk, index);
+    }
+
+    _sink.close();
+  }
+
+  /// Decodes the data in [bytes] from [start] to [end].
+  Uint8List _decode(List<int> bytes, int start, int end) {
+    /// Throws a [FormatException] if `bytes[start] != $char`. Uses [name] to
+    /// describe the character in the exception text.
+    assertCurrentChar(int char, String name) {
+      if (bytes[start] != char) {
+        throw new FormatException("Expected LF.", bytes, start);
+      }
+    }
+
+    var buffer = new Uint8Buffer();
+    while (start != end) {
+      switch (_state) {
+        case _State.boundary:
+          _size = _digitForByte(bytes, start);
+          _state = _State.size;
+          start++;
+          break;
+
+        case _State.size:
+          if (bytes[start] == $cr) {
+            _state = _State.beforeLF;
+          } else {
+            // Shift four bits left since a single hex digit contains four bits
+            // of information.
+            _size = (_size << 4) + _digitForByte(bytes, start);
+          }
+          start++;
+          break;
+
+        case _State.beforeLF:
+          assertCurrentChar($lf, "LF");
+          _state = _size == 0 ? _State.endBeforeCR : _State.body;
+          start++;
+          break;
+
+        case _State.body:
+          var chunkEnd = math.min(end, start + _size);
+          buffer.addAll(bytes, start, chunkEnd);
+          _size -= chunkEnd - start;
+          start = chunkEnd;
+          if (_size == 0) _state = _State.boundary;
+          break;
+
+        case _State.endBeforeCR:
+          assertCurrentChar($cr, "CR");
+          _state = _State.endBeforeLF;
+          start++;
+          break;
+
+        case _State.endBeforeLF:
+          assertCurrentChar($lf, "CR");
+          _state = _State.end;
+          start++;
+          break;
+
+        case _State.end:
+          throw new FormatException("Expected no more data.", bytes, start);
+      }
+    }
+    return buffer.buffer.asUint8List(0, buffer.length);
+  }
+
+  /// Returns the hex digit (0 through 15) corresponding to the byte at index
+  /// [i] in [bytes].
+  ///
+  /// If the given byte isn't a hexadecimal ASCII character, throws a
+  /// [FormatException].
+  int _digitForByte(List<int> bytes, int index) {
+    // If the byte is a numeral, get its value. XOR works because 0 in ASCII is
+    // `0b110000` and the other numerals come after it in ascending order and
+    // take up at most four bits.
+    //
+    // We check for digits first because it ensures there's only a single branch
+    // for 10 out of 16 of the expected cases. We don't count the `digit >= 0`
+    // check because branch prediction will always work on it for valid data.
+    var byte = bytes[index];
+    var digit = $0 ^ byte;
+    if (digit <= 9) {
+      if (digit >= 0) return digit;
+    } else {
+      // If the byte is an uppercase letter, convert it to lowercase. This works
+      // because uppercase letters in ASCII are exactly `0b100000 = 0x20` less
+      // than lowercase letters, so if we ensure that that bit is 1 we ensure that
+      // the letter is lowercase.
+      var letter = 0x20 | byte;
+      if ($a <= letter && letter <= $f) return letter - $a + 10;
+    }
+
+    throw new FormatException(
+        "Invalid hexadecimal byte 0x${byte.toRadixString(16).toUpperCase()}.",
+        bytes, index);
+  }
+}
+
+/// An enumeration of states that [_Sink] can exist in when decoded a chunked
+/// message.
+///
+/// [_SizeState], [_CRState], and [_ChunkState] have additional data attached.
+class _State {
+  /// The parser has fully parsed one chunk and is expecting the header for the
+  /// next chunk.
+  ///
+  /// Transitions to [size].
+  static const boundary = const _State._("boundary");
+
+  /// The parser has parsed at least one digit of the chunk size header, but has
+  /// not yet parsed the `CR LF` sequence that indicates the end of that header.
+  ///
+  /// Transitions to [beforeLF].
+  static const size = const _State._("size");
+
+  /// The parser has parsed the chunk size header and the CR character after it,
+  /// but not the LF.
+  ///
+  /// Transitions to [body] or [endBeforeCR].
+  static const beforeLF = const _State._("before LF");
+
+  /// The parser has parsed a chunk header and possibly some of the body, but
+  /// still needs to consume more bytes.
+  ///
+  /// Transitions to [boundary].
+  static const body = const _State._("CR");
+
+  /// The parser has parsed the final empty chunk but not the CR LF sequence
+  /// that follows it.
+  ///
+  /// Transitions to [endBeforeLF].
+  static const endBeforeCR = const _State._("end before CR");
+
+  /// The parser has parsed the final empty chunk and the CR that follows it,
+  /// but not the LF after that.
+  ///
+  /// Transitions to [end].
+  static const endBeforeLF = const _State._("end before LF");
+
+  /// The parser has parsed the final empty chunk as well as the CR LF that
+  /// follows, and expects no more data.
+  static const end = const _State._("end");
+
+  final String _name;
+
+  const _State._(this._name);
+
+  String toString() => _name;
+}
diff --git a/lib/src/chunked_coding/encoder.dart b/lib/src/chunked_coding/encoder.dart
@@ -0,0 +1,72 @@
+// Copyright (c) 2016, the Dart project authors.  Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'dart:convert';
+import 'dart:typed_data';
+
+import 'package:charcode/ascii.dart';
+
+/// The canonical instance of [ChunkedCodingEncoder].
+const chunkedCodingEncoder = const ChunkedCodingEncoder._();
+
+/// The chunk indicating that the chunked message has finished.
+final _doneChunk = new Uint8List.fromList([$0, $cr, $lf, $cr, $lf]);
+
+/// A converter that encodes byte arrays into chunks with size tags.
+class ChunkedCodingEncoder extends Converter<List<int>, List<int>> {
+  const ChunkedCodingEncoder._();
+
+  List<int> convert(List<int> bytes) =>
+      _convert(bytes, 0, bytes.length, isLast: true);
+
+  ByteConversionSink startChunkedConversion(Sink<List<int>> sink) =>
+      new _Sink(sink);
+}
+
+/// A conversion sink for the chunked transfer encoding.
+class _Sink extends ByteConversionSinkBase {
+  /// The underlying sink to which encoded byte arrays will be passed.
+  final Sink<List<int>> _sink;
+
+  _Sink(this._sink);
+
+  void add(List<int> chunk) {
+    _sink.add(_convert(chunk, 0, chunk.length));
+  }
+
+  void addSlice(List<int> chunk, int start, int end, bool isLast) {
+    RangeError.checkValidRange(start, end, chunk.length);
+    _sink.add(_convert(chunk, start, end, isLast: isLast));
+    if (isLast) _sink.close();
+  }
+
+  void close() {
+    _sink.add(_doneChunk);
+    _sink.close();
+  }
+}
+
+/// Returns a new list a chunked transfer encoding header followed by the slice
+/// of [bytes] from [start] to [end].
+///
+/// If [isLast] is `true`, this adds the footer that indicates that the chunked
+/// message is complete.
+List<int> _convert(List<int> bytes, int start, int end, {bool isLast: false}) {
+  if (end == start) return isLast ? _doneChunk : const [];
+
+  var size = end - start;
+  var sizeInHex = size.toRadixString(16);
+  var footerSize = isLast ? _doneChunk.length : 0;
+
+  // Add 2 for the CRLF sequence that follows the size header.
+  var list = new Uint8List(sizeInHex.length + 2 + size + footerSize);
+  list.setRange(0, sizeInHex.length, sizeInHex.codeUnits);
+  list[sizeInHex.length] = $cr;
+  list[sizeInHex.length + 1] = $lf;
+  list.setRange(sizeInHex.length + 2, list.length - footerSize, bytes, start);
+  if (isLast) {
+    list.setRange(list.length - footerSize, list.length, _doneChunk);
+  }
+  return list;
+}
diff --git a/pubspec.yaml b/pubspec.yaml
@@ -1,13 +1,15 @@
 name: http_parser
-version: 3.0.3
+version: 3.1.0
 author: "Dart Team <[email protected]>"
 homepage: https://github.com/dart-lang/http_parser
 description: >
   A platform-independent package for parsing and serializing HTTP formats.
 dependencies:
+  charcode: "^1.1.0"
   collection: ">=0.9.1 <2.0.0"
   source_span: "^1.0.0"
   string_scanner: ">=0.0.0 <2.0.0"
+  typed_data: "^1.1.0"
 dev_dependencies:
   test: "^0.12.0"
 environment: