This repository has been archived by the owner on Oct 22, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a Codec for the chunked transfer coding. (#8)
- Loading branch information
Showing
7 changed files
with
698 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file | ||
// for details. All rights reserved. Use of this source code is governed by a | ||
// BSD-style license that can be found in the LICENSE file. | ||
|
||
import 'dart:convert'; | ||
|
||
import 'chunked_coding/encoder.dart'; | ||
import 'chunked_coding/decoder.dart'; | ||
|
||
export 'chunked_coding/encoder.dart' hide chunkedCodingEncoder; | ||
export 'chunked_coding/decoder.dart' hide chunkedCodingDecoder; | ||
|
||
/// The canonical instance of [ChunkedCodec]. | ||
const chunkedCoding = const ChunkedCodingCodec._(); | ||
|
||
/// A codec that encodes and decodes the [chunked transfer coding][]. | ||
/// | ||
/// [chunked transfer coding]: https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.6.1 | ||
/// | ||
/// The [encoder] creates a *single* chunked message for each call to | ||
/// [ChunkedEncoder.convert] or [ChunkedEncoder.startChunkedConversion]. This | ||
/// means that it will always add an end-of-message footer once conversion has | ||
/// finished. It doesn't support generating chunk extensions or trailing | ||
/// headers. | ||
/// | ||
/// Similarly, the [decoder] decodes a *single* chunked message into a stream of | ||
/// byte arrays that must be concatenated to get the full list (like most Dart | ||
/// byte streams). It doesn't support decoding a stream that contains multiple | ||
/// chunked messages, nor does it support a stream that contains chunked data | ||
/// mixed with other types of data. | ||
/// | ||
/// Currently, [decoder] will fail to parse chunk extensions and trailing | ||
/// headers. It may be updated to silently ignore them in the future. | ||
class ChunkedCodingCodec extends Codec<List<int>, List<int>> { | ||
ChunkedCodingEncoder get encoder => chunkedCodingEncoder; | ||
ChunkedCodingDecoder get decoder => chunkedCodingDecoder; | ||
|
||
const ChunkedCodingCodec._(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file | ||
// for details. All rights reserved. Use of this source code is governed by a | ||
// BSD-style license that can be found in the LICENSE file. | ||
|
||
import 'dart:convert'; | ||
import 'dart:math' as math; | ||
import 'dart:typed_data'; | ||
|
||
import 'package:charcode/ascii.dart'; | ||
import 'package:typed_data/typed_data.dart'; | ||
|
||
/// The canonical instance of [ChunkedCodingDecoder]. | ||
const chunkedCodingDecoder = const ChunkedCodingDecoder._(); | ||
|
||
/// A converter that decodes byte arrays into chunks with size tags. | ||
class ChunkedCodingDecoder extends Converter<List<int>, List<int>> { | ||
const ChunkedCodingDecoder._(); | ||
|
||
List<int> convert(List<int> bytes) { | ||
var sink = new _Sink(null); | ||
var output = sink._decode(bytes, 0, bytes.length); | ||
if (sink._state == _State.end) return output; | ||
|
||
throw new FormatException( | ||
"Input ended unexpectedly.", bytes, bytes.length); | ||
} | ||
|
||
ByteConversionSink startChunkedConversion(Sink<List<int>> sink) => | ||
new _Sink(sink); | ||
} | ||
|
||
/// A conversion sink for the chunked transfer encoding. | ||
class _Sink extends ByteConversionSinkBase { | ||
/// The underlying sink to which decoded byte arrays will be passed. | ||
final Sink<List<int>> _sink; | ||
|
||
/// The current state of the sink's parsing. | ||
var _state = _State.boundary; | ||
|
||
/// The size of the chunk being parsed, or `null` if the size hasn't been | ||
/// parsed yet. | ||
int _size; | ||
|
||
_Sink(this._sink); | ||
|
||
void add(List<int> chunk) => addSlice(chunk, 0, chunk.length, false); | ||
|
||
void addSlice(List<int> chunk, int start, int end, bool isLast) { | ||
RangeError.checkValidRange(start, end, chunk.length); | ||
var output = _decode(chunk, start, end); | ||
if (output.isNotEmpty) _sink.add(output); | ||
if (isLast) _close(chunk, end); | ||
} | ||
|
||
void close() => _close(); | ||
|
||
/// Like [close], but includes [chunk] and [index] in the [FormatException] if | ||
/// one is thrown. | ||
void _close([List<int> chunk, int index]) { | ||
if (_state != _State.end) { | ||
throw new FormatException("Input ended unexpectedly.", chunk, index); | ||
} | ||
|
||
_sink.close(); | ||
} | ||
|
||
/// Decodes the data in [bytes] from [start] to [end]. | ||
Uint8List _decode(List<int> bytes, int start, int end) { | ||
/// Throws a [FormatException] if `bytes[start] != $char`. Uses [name] to | ||
/// describe the character in the exception text. | ||
assertCurrentChar(int char, String name) { | ||
if (bytes[start] != char) { | ||
throw new FormatException("Expected LF.", bytes, start); | ||
} | ||
} | ||
|
||
var buffer = new Uint8Buffer(); | ||
while (start != end) { | ||
switch (_state) { | ||
case _State.boundary: | ||
_size = _digitForByte(bytes, start); | ||
_state = _State.size; | ||
start++; | ||
break; | ||
|
||
case _State.size: | ||
if (bytes[start] == $cr) { | ||
_state = _State.beforeLF; | ||
} else { | ||
// Shift four bits left since a single hex digit contains four bits | ||
// of information. | ||
_size = (_size << 4) + _digitForByte(bytes, start); | ||
} | ||
start++; | ||
break; | ||
|
||
case _State.beforeLF: | ||
assertCurrentChar($lf, "LF"); | ||
_state = _size == 0 ? _State.endBeforeCR : _State.body; | ||
start++; | ||
break; | ||
|
||
case _State.body: | ||
var chunkEnd = math.min(end, start + _size); | ||
buffer.addAll(bytes, start, chunkEnd); | ||
_size -= chunkEnd - start; | ||
start = chunkEnd; | ||
if (_size == 0) _state = _State.boundary; | ||
break; | ||
|
||
case _State.endBeforeCR: | ||
assertCurrentChar($cr, "CR"); | ||
_state = _State.endBeforeLF; | ||
start++; | ||
break; | ||
|
||
case _State.endBeforeLF: | ||
assertCurrentChar($lf, "CR"); | ||
_state = _State.end; | ||
start++; | ||
break; | ||
|
||
case _State.end: | ||
throw new FormatException("Expected no more data.", bytes, start); | ||
} | ||
} | ||
return buffer.buffer.asUint8List(0, buffer.length); | ||
} | ||
|
||
/// Returns the hex digit (0 through 15) corresponding to the byte at index | ||
/// [i] in [bytes]. | ||
/// | ||
/// If the given byte isn't a hexadecimal ASCII character, throws a | ||
/// [FormatException]. | ||
int _digitForByte(List<int> bytes, int index) { | ||
// If the byte is a numeral, get its value. XOR works because 0 in ASCII is | ||
// `0b110000` and the other numerals come after it in ascending order and | ||
// take up at most four bits. | ||
// | ||
// We check for digits first because it ensures there's only a single branch | ||
// for 10 out of 16 of the expected cases. We don't count the `digit >= 0` | ||
// check because branch prediction will always work on it for valid data. | ||
var byte = bytes[index]; | ||
var digit = $0 ^ byte; | ||
if (digit <= 9) { | ||
if (digit >= 0) return digit; | ||
} else { | ||
// If the byte is an uppercase letter, convert it to lowercase. This works | ||
// because uppercase letters in ASCII are exactly `0b100000 = 0x20` less | ||
// than lowercase letters, so if we ensure that that bit is 1 we ensure that | ||
// the letter is lowercase. | ||
var letter = 0x20 | byte; | ||
if ($a <= letter && letter <= $f) return letter - $a + 10; | ||
} | ||
|
||
throw new FormatException( | ||
"Invalid hexadecimal byte 0x${byte.toRadixString(16).toUpperCase()}.", | ||
bytes, index); | ||
} | ||
} | ||
|
||
/// An enumeration of states that [_Sink] can exist in when decoded a chunked | ||
/// message. | ||
/// | ||
/// [_SizeState], [_CRState], and [_ChunkState] have additional data attached. | ||
class _State { | ||
/// The parser has fully parsed one chunk and is expecting the header for the | ||
/// next chunk. | ||
/// | ||
/// Transitions to [size]. | ||
static const boundary = const _State._("boundary"); | ||
|
||
/// The parser has parsed at least one digit of the chunk size header, but has | ||
/// not yet parsed the `CR LF` sequence that indicates the end of that header. | ||
/// | ||
/// Transitions to [beforeLF]. | ||
static const size = const _State._("size"); | ||
|
||
/// The parser has parsed the chunk size header and the CR character after it, | ||
/// but not the LF. | ||
/// | ||
/// Transitions to [body] or [endBeforeCR]. | ||
static const beforeLF = const _State._("before LF"); | ||
|
||
/// The parser has parsed a chunk header and possibly some of the body, but | ||
/// still needs to consume more bytes. | ||
/// | ||
/// Transitions to [boundary]. | ||
static const body = const _State._("CR"); | ||
|
||
/// The parser has parsed the final empty chunk but not the CR LF sequence | ||
/// that follows it. | ||
/// | ||
/// Transitions to [endBeforeLF]. | ||
static const endBeforeCR = const _State._("end before CR"); | ||
|
||
/// The parser has parsed the final empty chunk and the CR that follows it, | ||
/// but not the LF after that. | ||
/// | ||
/// Transitions to [end]. | ||
static const endBeforeLF = const _State._("end before LF"); | ||
|
||
/// The parser has parsed the final empty chunk as well as the CR LF that | ||
/// follows, and expects no more data. | ||
static const end = const _State._("end"); | ||
|
||
final String _name; | ||
|
||
const _State._(this._name); | ||
|
||
String toString() => _name; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
// Copyright (c) 2016, the Dart project authors. Please see the AUTHORS file | ||
// for details. All rights reserved. Use of this source code is governed by a | ||
// BSD-style license that can be found in the LICENSE file. | ||
|
||
import 'dart:convert'; | ||
import 'dart:typed_data'; | ||
|
||
import 'package:charcode/ascii.dart'; | ||
|
||
/// The canonical instance of [ChunkedCodingEncoder]. | ||
const chunkedCodingEncoder = const ChunkedCodingEncoder._(); | ||
|
||
/// The chunk indicating that the chunked message has finished. | ||
final _doneChunk = new Uint8List.fromList([$0, $cr, $lf, $cr, $lf]); | ||
|
||
/// A converter that encodes byte arrays into chunks with size tags. | ||
class ChunkedCodingEncoder extends Converter<List<int>, List<int>> { | ||
const ChunkedCodingEncoder._(); | ||
|
||
List<int> convert(List<int> bytes) => | ||
_convert(bytes, 0, bytes.length, isLast: true); | ||
|
||
ByteConversionSink startChunkedConversion(Sink<List<int>> sink) => | ||
new _Sink(sink); | ||
} | ||
|
||
/// A conversion sink for the chunked transfer encoding. | ||
class _Sink extends ByteConversionSinkBase { | ||
/// The underlying sink to which encoded byte arrays will be passed. | ||
final Sink<List<int>> _sink; | ||
|
||
_Sink(this._sink); | ||
|
||
void add(List<int> chunk) { | ||
_sink.add(_convert(chunk, 0, chunk.length)); | ||
} | ||
|
||
void addSlice(List<int> chunk, int start, int end, bool isLast) { | ||
RangeError.checkValidRange(start, end, chunk.length); | ||
_sink.add(_convert(chunk, start, end, isLast: isLast)); | ||
if (isLast) _sink.close(); | ||
} | ||
|
||
void close() { | ||
_sink.add(_doneChunk); | ||
_sink.close(); | ||
} | ||
} | ||
|
||
/// Returns a new list a chunked transfer encoding header followed by the slice | ||
/// of [bytes] from [start] to [end]. | ||
/// | ||
/// If [isLast] is `true`, this adds the footer that indicates that the chunked | ||
/// message is complete. | ||
List<int> _convert(List<int> bytes, int start, int end, {bool isLast: false}) { | ||
if (end == start) return isLast ? _doneChunk : const []; | ||
|
||
var size = end - start; | ||
var sizeInHex = size.toRadixString(16); | ||
var footerSize = isLast ? _doneChunk.length : 0; | ||
|
||
// Add 2 for the CRLF sequence that follows the size header. | ||
var list = new Uint8List(sizeInHex.length + 2 + size + footerSize); | ||
list.setRange(0, sizeInHex.length, sizeInHex.codeUnits); | ||
list[sizeInHex.length] = $cr; | ||
list[sizeInHex.length + 1] = $lf; | ||
list.setRange(sizeInHex.length + 2, list.length - footerSize, bytes, start); | ||
if (isLast) { | ||
list.setRange(list.length - footerSize, list.length, _doneChunk); | ||
} | ||
return list; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,15 @@ | ||
name: http_parser | ||
version: 3.0.3 | ||
version: 3.1.0 | ||
author: "Dart Team <[email protected]>" | ||
homepage: https://github.com/dart-lang/http_parser | ||
description: > | ||
A platform-independent package for parsing and serializing HTTP formats. | ||
dependencies: | ||
charcode: "^1.1.0" | ||
collection: ">=0.9.1 <2.0.0" | ||
source_span: "^1.0.0" | ||
string_scanner: ">=0.0.0 <2.0.0" | ||
typed_data: "^1.1.0" | ||
dev_dependencies: | ||
test: "^0.12.0" | ||
environment: | ||
|
Oops, something went wrong.