Skip to content
This repository has been archived by the owner on Oct 17, 2024. It is now read-only.

Commit

Permalink
Add chunked decoding support to CodePage
Browse files Browse the repository at this point in the history
Add chunked decoding support (`startChunkedConversion`) for `CodePage` encodings
  • Loading branch information
brianquinlan committed Nov 3, 2023
1 parent f24afa7 commit c88fb2b
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 26 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
## 3.1.2-dev

- Require Dart 2.19
- Add chunked decoding support (`startChunkedConversion`) for `CodePage`
encodings.

## 3.1.1

Expand Down
27 changes: 27 additions & 0 deletions lib/src/codepage.dart
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,25 @@ CodePageDecoder _createDecoder(String characters) {
return _NonBmpCodePageDecoder._(result);
}

/// An input Sink for decoders where each input byte can be be considered
/// independantly.
class _CodePageSink implements Sink<List<int>> {
final Sink<String> _output;
final String Function(List<int> input) _convert;

_CodePageSink(this._output, this._convert);

@override
void add(List<int> chunk) {
_output.add(_convert(chunk));
}

@override
void close() {
_output.close();
}
}

/// Code page with non-BMP characters.
class _NonBmpCodePageDecoder extends Converter<List<int>, String>
implements CodePageDecoder {
Expand Down Expand Up @@ -326,6 +345,10 @@ class _NonBmpCodePageDecoder extends Converter<List<int>, String>
}
return String.fromCharCodes(buffer);
}

@override
Sink<List<int>> startChunkedConversion(Sink<String> sink) =>
_CodePageSink(sink, convert);
}

class _BmpCodePageDecoder extends Converter<List<int>, String>
Expand Down Expand Up @@ -360,6 +383,10 @@ class _BmpCodePageDecoder extends Converter<List<int>, String>
return String.fromCharCodes(codeUnits);
}

@override
Sink<List<int>> startChunkedConversion(Sink<String> sink) =>
_CodePageSink(sink, convert);

String _convertAllowInvalid(List<int> bytes) {
var count = bytes.length;
var codeUnits = Uint16List(count);
Expand Down
116 changes: 90 additions & 26 deletions test/codepage_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.

import 'dart:convert';
import 'dart:core';
import 'dart:typed_data';

import 'package:convert/convert.dart';
Expand All @@ -25,24 +27,52 @@ void main() {
latinThai,
latinArabic
]) {
test('${cp.name} codepage', () {
// All ASCII compatible.
for (var byte = 0x20; byte < 0x7f; byte++) {
expect(cp[byte], byte);
}
// Maps both directions.
for (var byte = 0; byte < 256; byte++) {
var char = cp[byte];
if (char != 0xFFFD) {
var string = String.fromCharCode(char);
expect(cp.encode(string), [byte]);
expect(cp.decode([byte]), string);
group('${cp.name} codepage', () {
test('ascii compatible', () {
for (var byte = 0x20; byte < 0x7f; byte++) {
expect(cp[byte], byte);
}
}
expect(() => cp.decode([0xfffd]), throwsA(isA<FormatException>()));
// Decode works like operator[].
expect(cp.decode(bytes, allowInvalid: true),
String.fromCharCodes([for (var i = 0; i < 256; i++) cp[i]]));
});

test('bidirectional mapping', () {
// Maps both directions.
for (var byte = 0; byte < 256; byte++) {
var char = cp[byte];
if (char != 0xFFFD) {
var string = String.fromCharCode(char);
expect(cp.encode(string), [byte]);
expect(cp.decode([byte]), string);
}
}
});

test('decode invalid characters not allowed', () {
expect(() => cp.decode([0xfffd]), throwsA(isA<FormatException>()));
});

test('decode invalid characters allowed', () {
// Decode works like operator[].
expect(cp.decode(bytes, allowInvalid: true),
String.fromCharCodes([for (var i = 0; i < 256; i++) cp[i]]));
});

test('chunked conversion', () {
late final String decodedString;
final outputSink = StringConversionSink.withCallback(
(accumulated) => decodedString = accumulated);
final inputSink = cp.decoder.startChunkedConversion(outputSink);
final expected = StringBuffer();

for (var byte = 0; byte < 256; byte++) {
var char = cp[byte];
if (char != 0xFFFD) {
inputSink.add([byte]);
expected.writeCharCode(char);
}
}
inputSink.close();
expect(decodedString, expected.toString());
});
});
}
test('latin-2 roundtrip', () {
Expand All @@ -62,14 +92,48 @@ void main() {
expect(decoded, latin2text);
});

test('Custom code page', () {
var cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}");
var result = cp.encode('BADCAFE');
expect(result, [1, 0, 3, 2, 0, 5, 4]);
expect(() => cp.encode('GAD'), throwsFormatException);
expect(cp.encode('GAD', invalidCharacter: 0x3F), [0x3F, 0, 3]);
expect(cp.decode([1, 0, 3, 2, 0, 5, 4]), 'BADCAFE');
expect(() => cp.decode([6, 1, 255]), throwsFormatException);
expect(cp.decode([6, 1, 255], allowInvalid: true), '\u{FFFD}B\u{FFFD}');
group('Custom code page', () {
late final CodePage cp;

setUpAll(() => cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}"));

test('simple encode', () {
var result = cp.encode('BADCAFE');
expect(result, [1, 0, 3, 2, 0, 5, 4]);
});

test('unencodable character', () {
expect(() => cp.encode('GAD'), throwsFormatException);
});

test('unencodable character with invalidCharacter', () {
expect(cp.encode('GAD', invalidCharacter: 0x3F), [0x3F, 0, 3]);
});

test('simple decode', () {
expect(cp.decode([1, 0, 3, 2, 0, 5, 4]), 'BADCAFE');
});

test('undecode byte', () {
expect(() => cp.decode([6, 1, 255]), throwsFormatException);
});

test('undecode byte with allowInvalid', () {
expect(cp.decode([6, 1, 255], allowInvalid: true), '\u{FFFD}B\u{FFFD}');
});

test('chunked conversion', () {
late final String decodedString;
final outputSink = StringConversionSink.withCallback(
(accumulated) => decodedString = accumulated);
final inputSink = cp.decoder.startChunkedConversion(outputSink);
inputSink
..add([1])
..add([0])
..add([3]);

inputSink.close();
expect(decodedString, 'BAD');
});
});
}

0 comments on commit c88fb2b

Please sign in to comment.