Skip to content
This repository has been archived by the owner on Oct 17, 2024. It is now read-only.

Commit

Permalink
Add chunked decoding support to CodePage (#91)
Browse files Browse the repository at this point in the history
  • Loading branch information
brianquinlan authored Nov 6, 2023
1 parent f24afa7 commit 3503170
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
matrix:
# Add macos-latest and/or windows-latest if relevant for this package.
os: [ubuntu-latest]
sdk: [2.19.0, dev]
sdk: [3.0.0, dev]
steps:
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
- uses: dart-lang/setup-dart@b64355ae6ca0b5d484f0106a033dd1388965d06d
Expand Down
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
## 3.1.2-dev

- Require Dart 2.19
- Require Dart 3.0
- Add chunked decoding support (`startChunkedConversion`) for `CodePage`
encodings.

## 3.1.1

Expand Down
27 changes: 27 additions & 0 deletions lib/src/codepage.dart
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,25 @@ CodePageDecoder _createDecoder(String characters) {
return _NonBmpCodePageDecoder._(result);
}

/// An input [ByteConversionSink] for decoders where each input byte can be be
/// considered independantly.
class _CodePageDecoderSink extends ByteConversionSink {
final Sink<String> _output;
final Converter<List<int>, String> _decoder;

_CodePageDecoderSink(this._output, this._decoder);

@override
void add(List<int> chunk) {
_output.add(_decoder.convert(chunk));
}

@override
void close() {
_output.close();
}
}

/// Code page with non-BMP characters.
class _NonBmpCodePageDecoder extends Converter<List<int>, String>
implements CodePageDecoder {
Expand Down Expand Up @@ -326,6 +345,10 @@ class _NonBmpCodePageDecoder extends Converter<List<int>, String>
}
return String.fromCharCodes(buffer);
}

@override
Sink<List<int>> startChunkedConversion(Sink<String> sink) =>
_CodePageDecoderSink(sink, this);
}

class _BmpCodePageDecoder extends Converter<List<int>, String>
Expand Down Expand Up @@ -360,6 +383,10 @@ class _BmpCodePageDecoder extends Converter<List<int>, String>
return String.fromCharCodes(codeUnits);
}

@override
Sink<List<int>> startChunkedConversion(Sink<String> sink) =>
_CodePageDecoderSink(sink, this);

String _convertAllowInvalid(List<int> bytes) {
var count = bytes.length;
var codeUnits = Uint16List(count);
Expand Down
2 changes: 1 addition & 1 deletion pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ description: >-
repository: https://github.com/dart-lang/convert

environment:
sdk: '>=2.19.0 <3.0.0'
sdk: '^3.0.0'

dependencies:
typed_data: ^1.3.0
Expand Down
131 changes: 105 additions & 26 deletions test/codepage_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:convert';
import 'dart:core';
import 'dart:typed_data';

import 'package:convert/convert.dart';
Expand All @@ -25,24 +27,52 @@ void main() {
latinThai,
latinArabic
]) {
test('${cp.name} codepage', () {
// All ASCII compatible.
for (var byte = 0x20; byte < 0x7f; byte++) {
expect(cp[byte], byte);
}
// Maps both directions.
for (var byte = 0; byte < 256; byte++) {
var char = cp[byte];
if (char != 0xFFFD) {
var string = String.fromCharCode(char);
expect(cp.encode(string), [byte]);
expect(cp.decode([byte]), string);
group('${cp.name} codepage', () {
test('ascii compatible', () {
for (var byte = 0x20; byte < 0x7f; byte++) {
expect(cp[byte], byte);
}
}
expect(() => cp.decode([0xfffd]), throwsA(isA<FormatException>()));
// Decode works like operator[].
expect(cp.decode(bytes, allowInvalid: true),
String.fromCharCodes([for (var i = 0; i < 256; i++) cp[i]]));
});

test('bidirectional mapping', () {
// Maps both directions.
for (var byte = 0; byte < 256; byte++) {
var char = cp[byte];
if (char != 0xFFFD) {
var string = String.fromCharCode(char);
expect(cp.encode(string), [byte]);
expect(cp.decode([byte]), string);
}
}
});

test('decode invalid characters not allowed', () {
expect(() => cp.decode([0xfffd]), throwsA(isA<FormatException>()));
});

test('decode invalid characters allowed', () {
// Decode works like operator[].
expect(cp.decode(bytes, allowInvalid: true),
String.fromCharCodes([for (var i = 0; i < 256; i++) cp[i]]));
});

test('chunked conversion', () {
late final String decodedString;
final outputSink = StringConversionSink.withCallback(
(accumulated) => decodedString = accumulated);
final inputSink = cp.decoder.startChunkedConversion(outputSink);
final expected = StringBuffer();

for (var byte = 0; byte < 256; byte++) {
var char = cp[byte];
if (char != 0xFFFD) {
inputSink.add([byte]);
expected.writeCharCode(char);
}
}
inputSink.close();
expect(decodedString, expected.toString());
});
});
}
test('latin-2 roundtrip', () {
Expand All @@ -62,14 +92,63 @@ void main() {
expect(decoded, latin2text);
});

test('Custom code page', () {
var cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}");
var result = cp.encode('BADCAFE');
expect(result, [1, 0, 3, 2, 0, 5, 4]);
expect(() => cp.encode('GAD'), throwsFormatException);
expect(cp.encode('GAD', invalidCharacter: 0x3F), [0x3F, 0, 3]);
expect(cp.decode([1, 0, 3, 2, 0, 5, 4]), 'BADCAFE');
expect(() => cp.decode([6, 1, 255]), throwsFormatException);
expect(cp.decode([6, 1, 255], allowInvalid: true), '\u{FFFD}B\u{FFFD}');
group('Custom code page', () {
late final cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}");

test('simple encode', () {
var result = cp.encode('BADCAFE');
expect(result, [1, 0, 3, 2, 0, 5, 4]);
});

test('unencodable character', () {
expect(() => cp.encode('GAD'), throwsFormatException);
});

test('unencodable character with invalidCharacter', () {
expect(cp.encode('GAD', invalidCharacter: 0x3F), [0x3F, 0, 3]);
});

test('simple decode', () {
expect(cp.decode([1, 0, 3, 2, 0, 5, 4]), 'BADCAFE');
});

test('undecodable byte', () {
expect(() => cp.decode([6, 1, 255]), throwsFormatException);
});

test('undecodable byte with allowInvalid', () {
expect(cp.decode([6, 1, 255], allowInvalid: true), '\u{FFFD}B\u{FFFD}');
});

test('chunked conversion', () {
late final String decodedString;
final outputSink = StringConversionSink.withCallback(
(accumulated) => decodedString = accumulated);
final inputSink = cp.decoder.startChunkedConversion(outputSink);

inputSink
..add([1])
..add([0])
..add([3])
..close();
expect(decodedString, 'BAD');
});

test('chunked conversion - byte conversion sink', () {
late final String decodedString;
final outputSink = StringConversionSink.withCallback(
(accumulated) => decodedString = accumulated);
final bytes = [1, 0, 3, 2, 0, 5, 4];

final inputSink = cp.decoder.startChunkedConversion(outputSink);
expect(inputSink, isA<ByteConversionSink>());

(inputSink as ByteConversionSink)
..addSlice(bytes, 1, 3, false)
..addSlice(bytes, 4, 5, false)
..addSlice(bytes, 6, 6, true);

expect(decodedString, 'ADA');
});
});
}

0 comments on commit 3503170

Please sign in to comment.