Skip to content
This repository has been archived by the owner on Oct 22, 2024. It is now read-only.

Commit

Permalink
Add a Codec for the chunked transfer coding. (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
nex3 authored Dec 6, 2016
1 parent 7f0467d commit 50e55f6
Show file tree
Hide file tree
Showing 7 changed files with 698 additions and 1 deletion.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## 3.1.0

* Add `chunkedCoding`, a `Codec` that supports encoding and decoding the
[chunked transfer coding][].

[chunked transfer coding]: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1

## 3.0.2

* Support `string_scanner` 1.0.0.
Expand Down
1 change: 1 addition & 0 deletions lib/http_parser.dart
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@

export 'src/authentication_challenge.dart';
export 'src/case_insensitive_map.dart';
export 'src/chunked_coding.dart';
export 'src/http_date.dart';
export 'src/media_type.dart';
39 changes: 39 additions & 0 deletions lib/src/chunked_coding.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:convert';

import 'chunked_coding/encoder.dart';
import 'chunked_coding/decoder.dart';

export 'chunked_coding/encoder.dart' hide chunkedCodingEncoder;
export 'chunked_coding/decoder.dart' hide chunkedCodingDecoder;

/// The canonical instance of [ChunkedCodec].
const chunkedCoding = const ChunkedCodingCodec._();

/// A codec that encodes and decodes the [chunked transfer coding][].
///
/// [chunked transfer coding]: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1
///
/// The [encoder] creates a *single* chunked message for each call to
/// [ChunkedEncoder.convert] or [ChunkedEncoder.startChunkedConversion]. This
/// means that it will always add an end-of-message footer once conversion has
/// finished. It doesn't support generating chunk extensions or trailing
/// headers.
///
/// Similarly, the [decoder] decodes a *single* chunked message into a stream of
/// byte arrays that must be concatenated to get the full list (like most Dart
/// byte streams). It doesn't support decoding a stream that contains multiple
/// chunked messages, nor does it support a stream that contains chunked data
/// mixed with other types of data.
///
/// Currently, [decoder] will fail to parse chunk extensions and trailing
/// headers. It may be updated to silently ignore them in the future.
class ChunkedCodingCodec extends Codec<List<int>, List<int>> {
ChunkedCodingEncoder get encoder => chunkedCodingEncoder;
ChunkedCodingDecoder get decoder => chunkedCodingDecoder;

const ChunkedCodingCodec._();
}
212 changes: 212 additions & 0 deletions lib/src/chunked_coding/decoder.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:convert';
import 'dart:math' as math;
import 'dart:typed_data';

import 'package:charcode/ascii.dart';
import 'package:typed_data/typed_data.dart';

/// The canonical instance of [ChunkedCodingDecoder].
const chunkedCodingDecoder = const ChunkedCodingDecoder._();

/// A converter that decodes byte arrays into chunks with size tags.
class ChunkedCodingDecoder extends Converter<List<int>, List<int>> {
const ChunkedCodingDecoder._();

List<int> convert(List<int> bytes) {
var sink = new _Sink(null);
var output = sink._decode(bytes, 0, bytes.length);
if (sink._state == _State.end) return output;

throw new FormatException(
"Input ended unexpectedly.", bytes, bytes.length);
}

ByteConversionSink startChunkedConversion(Sink<List<int>> sink) =>
new _Sink(sink);
}

/// A conversion sink for the chunked transfer encoding.
class _Sink extends ByteConversionSinkBase {
/// The underlying sink to which decoded byte arrays will be passed.
final Sink<List<int>> _sink;

/// The current state of the sink's parsing.
var _state = _State.boundary;

/// The size of the chunk being parsed, or `null` if the size hasn't been
/// parsed yet.
int _size;

_Sink(this._sink);

void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false);

void addSlice(List<int> chunk, int start, int end, bool isLast) {
RangeError.checkValidRange(start, end, chunk.length);
var output = _decode(chunk, start, end);
if (output.isNotEmpty) _sink.add(output);
if (isLast) _close(chunk, end);
}

void close() => _close();

/// Like [close], but includes [chunk] and [index] in the [FormatException] if
/// one is thrown.
void _close([List<int> chunk, int index]) {
if (_state != _State.end) {
throw new FormatException("Input ended unexpectedly.", chunk, index);
}

_sink.close();
}

/// Decodes the data in [bytes] from [start] to [end].
Uint8List _decode(List<int> bytes, int start, int end) {
/// Throws a [FormatException] if `bytes[start] != $char`. Uses [name] to
/// describe the character in the exception text.
assertCurrentChar(int char, String name) {
if (bytes[start] != char) {
throw new FormatException("Expected LF.", bytes, start);
}
}

var buffer = new Uint8Buffer();
while (start != end) {
switch (_state) {
case _State.boundary:
_size = _digitForByte(bytes, start);
_state = _State.size;
start++;
break;

case _State.size:
if (bytes[start] == $cr) {
_state = _State.beforeLF;
} else {
// Shift four bits left since a single hex digit contains four bits
// of information.
_size = (_size << 4) + _digitForByte(bytes, start);
}
start++;
break;

case _State.beforeLF:
assertCurrentChar($lf, "LF");
_state = _size == 0 ? _State.endBeforeCR : _State.body;
start++;
break;

case _State.body:
var chunkEnd = math.min(end, start + _size);
buffer.addAll(bytes, start, chunkEnd);
_size -= chunkEnd - start;
start = chunkEnd;
if (_size == 0) _state = _State.boundary;
break;

case _State.endBeforeCR:
assertCurrentChar($cr, "CR");
_state = _State.endBeforeLF;
start++;
break;

case _State.endBeforeLF:
assertCurrentChar($lf, "CR");
_state = _State.end;
start++;
break;

case _State.end:
throw new FormatException("Expected no more data.", bytes, start);
}
}
return buffer.buffer.asUint8List(0, buffer.length);
}

/// Returns the hex digit (0 through 15) corresponding to the byte at index
/// [i] in [bytes].
///
/// If the given byte isn't a hexadecimal ASCII character, throws a
/// [FormatException].
int _digitForByte(List<int> bytes, int index) {
// If the byte is a numeral, get its value. XOR works because 0 in ASCII is
// `0b110000` and the other numerals come after it in ascending order and
// take up at most four bits.
//
// We check for digits first because it ensures there's only a single branch
// for 10 out of 16 of the expected cases. We don't count the `digit >= 0`
// check because branch prediction will always work on it for valid data.
var byte = bytes[index];
var digit = $0 ^ byte;
if (digit <= 9) {
if (digit >= 0) return digit;
} else {
// If the byte is an uppercase letter, convert it to lowercase. This works
// because uppercase letters in ASCII are exactly `0b100000 = 0x20` less
// than lowercase letters, so if we ensure that that bit is 1 we ensure that
// the letter is lowercase.
var letter = 0x20 | byte;
if ($a <= letter && letter <= $f) return letter - $a + 10;
}

throw new FormatException(
"Invalid hexadecimal byte 0x${byte.toRadixString(16).toUpperCase()}.",
bytes, index);
}
}

/// An enumeration of states that [_Sink] can exist in when decoded a chunked
/// message.
///
/// [_SizeState], [_CRState], and [_ChunkState] have additional data attached.
class _State {
/// The parser has fully parsed one chunk and is expecting the header for the
/// next chunk.
///
/// Transitions to [size].
static const boundary = const _State._("boundary");

/// The parser has parsed at least one digit of the chunk size header, but has
/// not yet parsed the `CR LF` sequence that indicates the end of that header.
///
/// Transitions to [beforeLF].
static const size = const _State._("size");

/// The parser has parsed the chunk size header and the CR character after it,
/// but not the LF.
///
/// Transitions to [body] or [endBeforeCR].
static const beforeLF = const _State._("before LF");

/// The parser has parsed a chunk header and possibly some of the body, but
/// still needs to consume more bytes.
///
/// Transitions to [boundary].
static const body = const _State._("CR");

/// The parser has parsed the final empty chunk but not the CR LF sequence
/// that follows it.
///
/// Transitions to [endBeforeLF].
static const endBeforeCR = const _State._("end before CR");

/// The parser has parsed the final empty chunk and the CR that follows it,
/// but not the LF after that.
///
/// Transitions to [end].
static const endBeforeLF = const _State._("end before LF");

/// The parser has parsed the final empty chunk as well as the CR LF that
/// follows, and expects no more data.
static const end = const _State._("end");

final String _name;

const _State._(this._name);

String toString() => _name;
}
72 changes: 72 additions & 0 deletions lib/src/chunked_coding/encoder.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:convert';
import 'dart:typed_data';

import 'package:charcode/ascii.dart';

/// The canonical instance of [ChunkedCodingEncoder].
const chunkedCodingEncoder = const ChunkedCodingEncoder._();

/// The chunk indicating that the chunked message has finished.
final _doneChunk = new Uint8List.fromList([$0, $cr, $lf, $cr, $lf]);

/// A converter that encodes byte arrays into chunks with size tags.
class ChunkedCodingEncoder extends Converter<List<int>, List<int>> {
const ChunkedCodingEncoder._();

List<int> convert(List<int> bytes) =>
_convert(bytes, 0, bytes.length, isLast: true);

ByteConversionSink startChunkedConversion(Sink<List<int>> sink) =>
new _Sink(sink);
}

/// A conversion sink for the chunked transfer encoding.
class _Sink extends ByteConversionSinkBase {
/// The underlying sink to which encoded byte arrays will be passed.
final Sink<List<int>> _sink;

_Sink(this._sink);

void add(List<int> chunk) {
_sink.add(_convert(chunk, 0, chunk.length));
}

void addSlice(List<int> chunk, int start, int end, bool isLast) {
RangeError.checkValidRange(start, end, chunk.length);
_sink.add(_convert(chunk, start, end, isLast: isLast));
if (isLast) _sink.close();
}

void close() {
_sink.add(_doneChunk);
_sink.close();
}
}

/// Returns a new list a chunked transfer encoding header followed by the slice
/// of [bytes] from [start] to [end].
///
/// If [isLast] is `true`, this adds the footer that indicates that the chunked
/// message is complete.
List<int> _convert(List<int> bytes, int start, int end, {bool isLast: false}) {
if (end == start) return isLast ? _doneChunk : const [];

var size = end - start;
var sizeInHex = size.toRadixString(16);
var footerSize = isLast ? _doneChunk.length : 0;

// Add 2 for the CRLF sequence that follows the size header.
var list = new Uint8List(sizeInHex.length + 2 + size + footerSize);
list.setRange(0, sizeInHex.length, sizeInHex.codeUnits);
list[sizeInHex.length] = $cr;
list[sizeInHex.length + 1] = $lf;
list.setRange(sizeInHex.length + 2, list.length - footerSize, bytes, start);
if (isLast) {
list.setRange(list.length - footerSize, list.length, _doneChunk);
}
return list;
}
4 changes: 3 additions & 1 deletion pubspec.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
name: http_parser
version: 3.0.3
version: 3.1.0
author: "Dart Team <[email protected]>"
homepage: https://github.com/dart-lang/http_parser
description: >
A platform-independent package for parsing and serializing HTTP formats.
dependencies:
charcode: "^1.1.0"
collection: ">=0.9.1 <2.0.0"
source_span: "^1.0.0"
string_scanner: ">=0.0.0 <2.0.0"
typed_data: "^1.1.0"
dev_dependencies:
test: "^0.12.0"
environment:
Expand Down
Loading

0 comments on commit 50e55f6

Please sign in to comment.