Skip to content

Commit

Permalink
Added additional comment encoding tests
Browse files Browse the repository at this point in the history
  • Loading branch information
edwardneal committed Jul 28, 2024
1 parent bbe41a2 commit 869d311
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 7 deletions.
13 changes: 13 additions & 0 deletions src/libraries/Common/tests/System/IO/Compression/ZipTestHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Xunit;

Expand Down Expand Up @@ -499,5 +500,17 @@ public static IEnumerable<object[]> Latin1Comment_Data()
yield return e;
}
}

// Returns pairs encoded with Latin1, but decoded with UTF8.
// Returns: originalComment, expectedComment, transcoded expectedComment
public static IEnumerable<object[]> MismatchingEncodingComment_Data()
{
foreach (object[] e in Latin1Comment_Data())
{
byte[] expectedBytes = Encoding.Latin1.GetBytes(e[1] as string);

yield return new object[] { e[0], e[1], Encoding.UTF8.GetString(expectedBytes) };
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,30 +41,50 @@ public static void Create_Comment_Utf8EntryName_Latin1Encoding(string originalCo

[Theory]
[MemberData(nameof(Utf8Comment_Data))]
public static void Create_Comment_Utf8EntryName_Varying_Encodings(string originalComment, string expectedComment)
public static void Create_Comment_Utf8EntryName_Utf8Encoding_Prioritised(string originalComment, string expectedComment)
// UTF8 encoding bit is set in the general-purpose bit flags. The verification encoding of Latin1 should be ignored
=> Create_Comment_EntryName_Encoding_Internal(Utf8FileName, originalComment, expectedComment, Encoding.UTF8, Encoding.Latin1);
=> Create_Comment_EntryName_Encoding_Internal(Utf8FileName, originalComment, expectedComment, expectedComment, Encoding.UTF8, Encoding.Latin1);

[Theory]
[MemberData(nameof(MismatchingEncodingComment_Data))]
public static void Create_Comment_AsciiEntryName_Utf8Decoding_Invalid(string originalComment, string expectedPreWriteComment, string expectedPostWriteComment)
// The UTF8 encoding bit in the general-purpose bit flags should not be set, filenames should be encoded with Latin1, and thus
// decoding with UTF8 should result in incorrect filenames. This is because the filenames and comments contain code points in the
// range 0xC0..0xFF (which Latin1 encodes in one byte, and UTF8 encodes in two bytes.)
=> Create_Comment_EntryName_Encoding_Internal(AsciiFileName, originalComment, expectedPreWriteComment, expectedPostWriteComment, Encoding.Latin1, Encoding.UTF8);

[Theory]
[MemberData(nameof(MismatchingEncodingComment_Data))]
public static void Create_Comment_AsciiEntryName_DefaultDecoding_Utf8(string originalComment, string expectedPreWriteComment, string expectedPostWriteComment)
// Filenames should be encoded with Latin1, resulting in the UTF8 encoding bit in the general-purpose bit flags not being set.
// However, failing to specify an encoding (or specifying a null encoding) for the read should result in UTF8 being used anyway.
// This should result in incorrect filenames, since the filenames and comments contain code points in the range 0xC0..0xFF (which
// Latin1 encodes in one byte, and UTF8 encodes in two bytes.)
=> Create_Comment_EntryName_Encoding_Internal(AsciiFileName, originalComment, expectedPreWriteComment, expectedPostWriteComment, Encoding.Latin1, null);

private static void Create_Comment_EntryName_Encoding_Internal(string entryName, string originalComment, string expectedComment, Encoding encoding)
=> Create_Comment_EntryName_Encoding_Internal(entryName, originalComment, expectedComment, encoding, encoding);
=> Create_Comment_EntryName_Encoding_Internal(entryName, originalComment, expectedComment, expectedComment, encoding, encoding);

private static void Create_Comment_EntryName_Encoding_Internal(string entryName, string originalComment, string expectedComment, Encoding creationEncoding, Encoding verificationEencoding)
private static void Create_Comment_EntryName_Encoding_Internal(string entryName, string originalComment,
string expectedPreWriteComment, string expectedPostWriteComment,
Encoding creationEncoding, Encoding verificationEncoding)
{
using var ms = new MemoryStream();

using (var zip = new ZipArchive(ms, ZipArchiveMode.Create, leaveOpen: true, creationEncoding))
{
ZipArchiveEntry entry = zip.CreateEntry(entryName, CompressionLevel.NoCompression);
entry.Comment = originalComment;
Assert.Equal(expectedComment, entry.Comment);
// The expected pre-write and post-write comment can be different when testing encodings which vary between operations.
Assert.Equal(expectedPreWriteComment, entry.Comment);
}

using (var zip = new ZipArchive(ms, ZipArchiveMode.Read, leaveOpen: false, verificationEencoding))
using (var zip = new ZipArchive(ms, ZipArchiveMode.Read, leaveOpen: false, verificationEncoding))
{
foreach (ZipArchiveEntry entry in zip.Entries)
{
Assert.Equal(entryName, entry.Name);
Assert.Equal(expectedComment, entry.Comment);
Assert.Equal(expectedPostWriteComment, entry.Comment);
}
}
}
Expand Down

0 comments on commit 869d311

Please sign in to comment.