From c88fb2b8e299460e695911b485f10f5c6a229e49 Mon Sep 17 00:00:00 2001 From: Brian Quinlan Date: Fri, 3 Nov 2023 16:20:12 -0700 Subject: [PATCH] Add chunked decoding support to CodePage Add chunked decoding support (`startChunkedConversion`) for `CodePage` encodings --- CHANGELOG.md | 2 + lib/src/codepage.dart | 27 ++++++++++ test/codepage_test.dart | 116 +++++++++++++++++++++++++++++++--------- 3 files changed, 119 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 415ade5..180f3dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ ## 3.1.2-dev - Require Dart 2.19 +- Add chunked decoding support (`startChunkedConversion`) for `CodePage` + encodings. ## 3.1.1 diff --git a/lib/src/codepage.dart b/lib/src/codepage.dart index 50941b4..e541297 100644 --- a/lib/src/codepage.dart +++ b/lib/src/codepage.dart @@ -277,6 +277,25 @@ CodePageDecoder _createDecoder(String characters) { return _NonBmpCodePageDecoder._(result); } +/// An input Sink for decoders where each input byte can be be considered +/// independantly. +class _CodePageSink implements Sink> { + final Sink _output; + final String Function(List input) _convert; + + _CodePageSink(this._output, this._convert); + + @override + void add(List chunk) { + _output.add(_convert(chunk)); + } + + @override + void close() { + _output.close(); + } +} + /// Code page with non-BMP characters. class _NonBmpCodePageDecoder extends Converter, String> implements CodePageDecoder { @@ -326,6 +345,10 @@ class _NonBmpCodePageDecoder extends Converter, String> } return String.fromCharCodes(buffer); } + + @override + Sink> startChunkedConversion(Sink sink) => + _CodePageSink(sink, convert); } class _BmpCodePageDecoder extends Converter, String> @@ -360,6 +383,10 @@ class _BmpCodePageDecoder extends Converter, String> return String.fromCharCodes(codeUnits); } + @override + Sink> startChunkedConversion(Sink sink) => + _CodePageSink(sink, convert); + String _convertAllowInvalid(List bytes) { var count = bytes.length; var codeUnits = Uint16List(count); diff --git a/test/codepage_test.dart b/test/codepage_test.dart index c0fa45f..e729dc0 100644 --- a/test/codepage_test.dart +++ b/test/codepage_test.dart @@ -2,6 +2,8 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. +import 'dart:convert'; +import 'dart:core'; import 'dart:typed_data'; import 'package:convert/convert.dart'; @@ -25,24 +27,52 @@ void main() { latinThai, latinArabic ]) { - test('${cp.name} codepage', () { - // All ASCII compatible. - for (var byte = 0x20; byte < 0x7f; byte++) { - expect(cp[byte], byte); - } - // Maps both directions. - for (var byte = 0; byte < 256; byte++) { - var char = cp[byte]; - if (char != 0xFFFD) { - var string = String.fromCharCode(char); - expect(cp.encode(string), [byte]); - expect(cp.decode([byte]), string); + group('${cp.name} codepage', () { + test('ascii compatible', () { + for (var byte = 0x20; byte < 0x7f; byte++) { + expect(cp[byte], byte); } - } - expect(() => cp.decode([0xfffd]), throwsA(isA())); - // Decode works like operator[]. - expect(cp.decode(bytes, allowInvalid: true), - String.fromCharCodes([for (var i = 0; i < 256; i++) cp[i]])); + }); + + test('bidirectional mapping', () { + // Maps both directions. + for (var byte = 0; byte < 256; byte++) { + var char = cp[byte]; + if (char != 0xFFFD) { + var string = String.fromCharCode(char); + expect(cp.encode(string), [byte]); + expect(cp.decode([byte]), string); + } + } + }); + + test('decode invalid characters not allowed', () { + expect(() => cp.decode([0xfffd]), throwsA(isA())); + }); + + test('decode invalid characters allowed', () { + // Decode works like operator[]. + expect(cp.decode(bytes, allowInvalid: true), + String.fromCharCodes([for (var i = 0; i < 256; i++) cp[i]])); + }); + + test('chunked conversion', () { + late final String decodedString; + final outputSink = StringConversionSink.withCallback( + (accumulated) => decodedString = accumulated); + final inputSink = cp.decoder.startChunkedConversion(outputSink); + final expected = StringBuffer(); + + for (var byte = 0; byte < 256; byte++) { + var char = cp[byte]; + if (char != 0xFFFD) { + inputSink.add([byte]); + expected.writeCharCode(char); + } + } + inputSink.close(); + expect(decodedString, expected.toString()); + }); }); } test('latin-2 roundtrip', () { @@ -62,14 +92,48 @@ void main() { expect(decoded, latin2text); }); - test('Custom code page', () { - var cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}"); - var result = cp.encode('BADCAFE'); - expect(result, [1, 0, 3, 2, 0, 5, 4]); - expect(() => cp.encode('GAD'), throwsFormatException); - expect(cp.encode('GAD', invalidCharacter: 0x3F), [0x3F, 0, 3]); - expect(cp.decode([1, 0, 3, 2, 0, 5, 4]), 'BADCAFE'); - expect(() => cp.decode([6, 1, 255]), throwsFormatException); - expect(cp.decode([6, 1, 255], allowInvalid: true), '\u{FFFD}B\u{FFFD}'); + group('Custom code page', () { + late final CodePage cp; + + setUpAll(() => cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}")); + + test('simple encode', () { + var result = cp.encode('BADCAFE'); + expect(result, [1, 0, 3, 2, 0, 5, 4]); + }); + + test('unencodable character', () { + expect(() => cp.encode('GAD'), throwsFormatException); + }); + + test('unencodable character with invalidCharacter', () { + expect(cp.encode('GAD', invalidCharacter: 0x3F), [0x3F, 0, 3]); + }); + + test('simple decode', () { + expect(cp.decode([1, 0, 3, 2, 0, 5, 4]), 'BADCAFE'); + }); + + test('undecode byte', () { + expect(() => cp.decode([6, 1, 255]), throwsFormatException); + }); + + test('undecode byte with allowInvalid', () { + expect(cp.decode([6, 1, 255], allowInvalid: true), '\u{FFFD}B\u{FFFD}'); + }); + + test('chunked conversion', () { + late final String decodedString; + final outputSink = StringConversionSink.withCallback( + (accumulated) => decodedString = accumulated); + final inputSink = cp.decoder.startChunkedConversion(outputSink); + inputSink + ..add([1]) + ..add([0]) + ..add([3]); + + inputSink.close(); + expect(decodedString, 'BAD'); + }); }); }