diff --git a/src/ImageSharp/Common/Helpers/HexConverter.cs b/src/ImageSharp/Common/Helpers/HexConverter.cs new file mode 100644 index 0000000000..c55e9bbd9d --- /dev/null +++ b/src/ImageSharp/Common/Helpers/HexConverter.cs @@ -0,0 +1,98 @@ +// Copyright (c) Six Labors. +// Licensed under the Apache License, Version 2.0. + +using System; +using System.Runtime.CompilerServices; + +namespace SixLabors.ImageSharp.Common.Helpers +{ + internal static class HexConverter + { + /// + /// Parses a hexadecimal string into a byte array without allocations. Throws on non-hexadecimal character. + /// Adapted from https://source.dot.net/#System.Private.CoreLib/Convert.cs,c9e4fbeaca708991. + /// + /// The hexadecimal string to parse. + /// The destination for the parsed bytes. Must be at least .Length / 2 bytes long. + /// The number of bytes written to . + public static int HexStringToBytes(ReadOnlySpan chars, Span bytes) + { + if ((chars.Length % 2) != 0) + { + throw new ArgumentException("Input string length must be a multiple of 2", nameof(chars)); + } + + if ((bytes.Length * 2) < chars.Length) + { + throw new ArgumentException("Output span must be at least half the length of the input string"); + } + else + { + // Slightly better performance in the loop below, allows us to skip a bounds check + // while still supporting output buffers that are larger than necessary + bytes = bytes.Slice(0, chars.Length / 2); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static int FromChar(int c) + { + // Map from an ASCII char to its hex value, e.g. arr['b'] == 11. 0xFF means it's not a hex digit. + // This doesn't actually allocate. + ReadOnlySpan charToHexLookup = new byte[] + { + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 15 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 31 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 47 + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 63 + 0xFF, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 79 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 95 + 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 111 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 127 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 143 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 159 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 175 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 191 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 207 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 223 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 239 + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 255 + }; + + return c >= charToHexLookup.Length ? 0xFF : charToHexLookup[c]; + } + + // See https://source.dot.net/#System.Private.CoreLib/HexConverter.cs,4681d45a0aa0b361 + int i = 0; + int j = 0; + int byteLo = 0; + int byteHi = 0; + while (j < bytes.Length) + { + byteLo = FromChar(chars[i + 1]); + byteHi = FromChar(chars[i]); + + // byteHi hasn't been shifted to the high half yet, so the only way the bitwise or produces this pattern + // is if either byteHi or byteLo was not a hex character. + if ((byteLo | byteHi) == 0xFF) + { + break; + } + + bytes[j++] = (byte)((byteHi << 4) | byteLo); + i += 2; + } + + if (byteLo == 0xFF) + { + i++; + } + + if ((byteLo | byteHi) == 0xFF) + { + throw new ArgumentException("Input string contained non-hexadecimal characters", nameof(chars)); + } + + return j; + } + } +} diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs index f81cbca211..c9f0ce3755 100644 --- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs +++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs @@ -11,6 +11,7 @@ using System.Runtime.InteropServices; using System.Text; using System.Threading; +using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Compression.Zlib; using SixLabors.ImageSharp.Formats.Png.Chunks; using SixLabors.ImageSharp.Formats.Png.Filters; @@ -187,10 +188,10 @@ public Image Decode(BufferedReadStream stream, CancellationToken this.AssignTransparentMarkers(alpha, pngMetadata); break; case PngChunkType.Text: - this.ReadTextChunk(pngMetadata, chunk.Data.GetSpan()); + this.ReadTextChunk(metadata, pngMetadata, chunk.Data.GetSpan()); break; case PngChunkType.CompressedText: - this.ReadCompressedTextChunk(pngMetadata, chunk.Data.GetSpan()); + this.ReadCompressedTextChunk(metadata, pngMetadata, chunk.Data.GetSpan()); break; case PngChunkType.InternationalText: this.ReadInternationalTextChunk(pngMetadata, chunk.Data.GetSpan()); @@ -200,7 +201,7 @@ public Image Decode(BufferedReadStream stream, CancellationToken { byte[] exifData = new byte[chunk.Length]; chunk.Data.GetSpan().CopyTo(exifData); - metadata.ExifProfile = new ExifProfile(exifData); + this.MergeOrSetExifProfile(metadata, new ExifProfile(exifData), replaceExistingKeys: true); } break; @@ -297,7 +298,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella break; } - this.ReadTextChunk(pngMetadata, chunk.Data.GetSpan()); + this.ReadTextChunk(metadata, pngMetadata, chunk.Data.GetSpan()); break; case PngChunkType.CompressedText: if (this.colorMetadataOnly) @@ -306,7 +307,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella break; } - this.ReadCompressedTextChunk(pngMetadata, chunk.Data.GetSpan()); + this.ReadCompressedTextChunk(metadata, pngMetadata, chunk.Data.GetSpan()); break; case PngChunkType.InternationalText: if (this.colorMetadataOnly) @@ -328,7 +329,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella { byte[] exifData = new byte[chunk.Length]; chunk.Data.GetSpan().CopyTo(exifData); - metadata.ExifProfile = new ExifProfile(exifData); + this.MergeOrSetExifProfile(metadata, new ExifProfile(exifData), replaceExistingKeys: true); } break; @@ -967,9 +968,10 @@ private void ReadHeaderChunk(PngMetadata pngMetadata, ReadOnlySpan data) /// /// Reads a text chunk containing image properties from the data. /// + /// The object. /// The metadata to decode to. /// The containing the data. - private void ReadTextChunk(PngMetadata metadata, ReadOnlySpan data) + private void ReadTextChunk(ImageMetadata baseMetadata, PngMetadata metadata, ReadOnlySpan data) { if (this.ignoreMetadata) { @@ -992,15 +994,19 @@ private void ReadTextChunk(PngMetadata metadata, ReadOnlySpan data) string value = PngConstants.Encoding.GetString(data.Slice(zeroIndex + 1)); - metadata.TextData.Add(new PngTextData(name, value, string.Empty, string.Empty)); + if (!this.TryReadTextChunkMetadata(baseMetadata, name, value)) + { + metadata.TextData.Add(new PngTextData(name, value, string.Empty, string.Empty)); + } } /// /// Reads the compressed text chunk. Contains a uncompressed keyword and a compressed text string. /// + /// The object. /// The metadata to decode to. /// The containing the data. - private void ReadCompressedTextChunk(PngMetadata metadata, ReadOnlySpan data) + private void ReadCompressedTextChunk(ImageMetadata baseMetadata, PngMetadata metadata, ReadOnlySpan data) { if (this.ignoreMetadata) { @@ -1028,12 +1034,185 @@ private void ReadCompressedTextChunk(PngMetadata metadata, ReadOnlySpan da ReadOnlySpan compressedData = data.Slice(zeroIndex + 2); - if (this.TryUncompressTextData(compressedData, PngConstants.Encoding, out string uncompressed)) + if (this.TryUncompressTextData(compressedData, PngConstants.Encoding, out string uncompressed) && + !this.TryReadTextChunkMetadata(baseMetadata, name, uncompressed)) { metadata.TextData.Add(new PngTextData(name, uncompressed, string.Empty, string.Empty)); } } + /// + /// Checks if the given text chunk is actually storing parsable metadata. + /// + /// The object to store the parsed metadata in. + /// The name of the text chunk. + /// The contents of the text chunk. + /// True if metadata was successfully parsed from the text chunk. False if the + /// text chunk was not identified as metadata, and should be stored in the metadata + /// object unmodified. + private bool TryReadTextChunkMetadata(ImageMetadata baseMetadata, string chunkName, string chunkText) + { + if (chunkName.Equals("Raw profile type exif", StringComparison.OrdinalIgnoreCase) && + this.TryReadLegacyExifTextChunk(baseMetadata, chunkText)) + { + // Successfully parsed legacy exif data from text + return true; + } + + // TODO: "Raw profile type iptc", potentially others? + + // No special chunk data identified + return false; + } + + /// + /// Reads exif data encoded into a text chunk with the name "raw profile type exif". + /// This method was used by ImageMagick, exiftool, exiv2, digiKam, etc, before the + /// 2017 update to png that allowed a true exif chunk. + /// + /// The to store the decoded exif tags into. + /// The contents of the "raw profile type exif" text chunk. + private bool TryReadLegacyExifTextChunk(ImageMetadata metadata, string data) + { + ReadOnlySpan dataSpan = data.AsSpan(); + dataSpan = dataSpan.TrimStart(); + + if (!StringEqualsInsensitive(dataSpan.Slice(0, 4), "exif".AsSpan())) + { + // "exif" identifier is missing from the beginning of the text chunk + return false; + } + + // Skip to the data length + dataSpan = dataSpan.Slice(4).TrimStart(); + int dataLengthEnd = dataSpan.IndexOf('\n'); + int dataLength = ParseInt32(dataSpan.Slice(0, dataSpan.IndexOf('\n'))); + + // Skip to the hex-encoded data + dataSpan = dataSpan.Slice(dataLengthEnd).Trim(); + + // Sequence of bytes for the exif header ("Exif" ASCII and two zero bytes). + // This doesn't actually allocate. + ReadOnlySpan exifHeader = new byte[] { 0x45, 0x78, 0x69, 0x66, 0x00, 0x00 }; + + if (dataLength < exifHeader.Length) + { + // Not enough room for the required exif header, this data couldn't possibly be valid + return false; + } + + // Parse the hex-encoded data into the byte array we are going to hand off to ExifProfile + byte[] exifBlob = new byte[dataLength - exifHeader.Length]; + + try + { + // Check for the presence of the exif header in the hex-encoded binary data + byte[] tempExifBuf = exifBlob; + if (exifBlob.Length < exifHeader.Length) + { + // Need to allocate a temporary array, this should be an extremely uncommon (TODO: impossible?) case + tempExifBuf = new byte[exifHeader.Length]; + } + + HexConverter.HexStringToBytes(dataSpan.Slice(0, exifHeader.Length * 2), tempExifBuf); + if (!tempExifBuf.AsSpan().Slice(0, exifHeader.Length).SequenceEqual(exifHeader)) + { + // Exif header in the hex data is not valid + return false; + } + + // Skip over the exif header we just tested + dataSpan = dataSpan.Slice(exifHeader.Length * 2); + dataLength -= exifHeader.Length; + + // Load the hex-encoded data, one line at a time + for (int i = 0; i < dataLength;) + { + ReadOnlySpan lineSpan = dataSpan; + + int newlineIndex = dataSpan.IndexOf('\n'); + if (newlineIndex != -1) + { + lineSpan = dataSpan.Slice(0, newlineIndex); + } + + i += HexConverter.HexStringToBytes(lineSpan, exifBlob.AsSpan().Slice(i)); + + dataSpan = dataSpan.Slice(newlineIndex + 1); + } + } + catch + { + return false; + } + + this.MergeOrSetExifProfile(metadata, new ExifProfile(exifBlob), replaceExistingKeys: false); + return true; + } + + /// + /// Compares two ReadOnlySpan<char>s in a case-insensitive method. + /// This is only needed because older frameworks are missing the extension method. + /// + /// The first to compare. + /// The second to compare. + /// True if the spans were identical, false otherwise. + private static bool StringEqualsInsensitive(ReadOnlySpan span1, ReadOnlySpan span2) + { +#pragma warning disable IDE0022 // Use expression body for methods +#if NETSTANDARD2_1 || NETCOREAPP2_1_OR_GREATER + return span1.Equals(span2, StringComparison.OrdinalIgnoreCase); +#else + return span1.ToString().Equals(span2.ToString(), StringComparison.OrdinalIgnoreCase); +#endif +#pragma warning restore IDE0022 // Use expression body for methods + } + + /// + /// int.Parse() a ReadOnlySpan<char>, with a fallback for older frameworks. + /// + /// The to parse. + /// The parsed . + private static int ParseInt32(ReadOnlySpan span) + { +#pragma warning disable IDE0022 // Use expression body for methods +#if NETSTANDARD2_1 || NETCOREAPP2_1_OR_GREATER + return int.Parse(span); +#else + return int.Parse(span.ToString()); +#endif +#pragma warning restore IDE0022 // Use expression body for methods + } + + /// + /// Sets the in to , + /// or copies exif tags if already contains an . + /// + /// The to store the exif data in. + /// The to copy exif tags from. + /// If already contains an , + /// controls whether existing exif tags in will be overwritten with any conflicting + /// tags from . + private void MergeOrSetExifProfile(ImageMetadata metadata, ExifProfile newProfile, bool replaceExistingKeys) + { + if (metadata.ExifProfile is null) + { + // No exif metadata was loaded yet, so just assign it + metadata.ExifProfile = newProfile; + } + else + { + // Try to merge existing keys with the ones from the new profile + foreach (IExifValue newKey in newProfile.Values) + { + if (replaceExistingKeys || metadata.ExifProfile.GetValueInternal(newKey.Tag) is null) + { + metadata.ExifProfile.SetValueInternal(newKey.Tag, newKey.GetValue()); + } + } + } + } + /// /// Reads a iTXt chunk, which contains international text data. It contains: /// - A uncompressed keyword. diff --git a/tests/ImageSharp.Tests/Formats/Png/PngMetadataTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngMetadataTests.cs index b4307af5d1..8db1d1aaf2 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngMetadataTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngMetadataTests.cs @@ -1,6 +1,7 @@ // Copyright (c) Six Labors. // Licensed under the Apache License, Version 2.0. +using System; using System.Collections.Generic; using System.IO; using System.Linq; @@ -289,5 +290,34 @@ private static void VerifyTextDataIsPresent(PngMetadata meta) Assert.Contains(meta.TextData, m => m.Keyword is "NoLang" && m.Value is "this text chunk is missing a language tag"); Assert.Contains(meta.TextData, m => m.Keyword is "NoTranslatedKeyword" && m.Value is "dieser chunk hat kein übersetztes Schlüßelwort"); } + + [Theory] + [InlineData(TestImages.Png.Issue1875)] + public void Identify_ReadsLegacyExifData(string imagePath) + { + var testFile = TestFile.Create(imagePath); + using (var stream = new MemoryStream(testFile.Bytes, false)) + { + IImageInfo imageInfo = Image.Identify(stream); + Assert.NotNull(imageInfo); + Assert.NotNull(imageInfo.Metadata.ExifProfile); + + PngMetadata meta = imageInfo.Metadata.GetFormatMetadata(PngFormat.Instance); + Assert.DoesNotContain(meta.TextData, t => t.Keyword.Equals("Raw profile type exif", StringComparison.OrdinalIgnoreCase)); + + ExifProfile exif = imageInfo.Metadata.ExifProfile; + Assert.Equal(0, exif.InvalidTags.Count); + Assert.Equal(3, exif.Values.Count); + + Assert.Equal( + "A colorful tiling of blue, red, yellow, and green 4x4 pixel blocks.", + exif.GetValue(ExifTag.ImageDescription).Value); + Assert.Equal( + "Duplicated from basn3p02.png, then image metadata modified with exiv2", + exif.GetValue(ExifTag.ImageHistory).Value); + + Assert.Equal(42, (int)exif.GetValue(ExifTag.ImageNumber).Value); + } + } } } diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index b87ffc7420..fc63e5b1fc 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -117,6 +117,9 @@ public static class Png // Issue 1765: https://github.com/SixLabors/ImageSharp/issues/1765 public const string Issue1765_Net6DeflateStreamRead = "Png/issues/Issue_1765_Net6DeflateStreamRead.png"; + // Discussion 1875: https://github.com/SixLabors/ImageSharp/discussions/1875 + public const string Issue1875 = "Png/raw-profile-type-exif.png"; + public static class Bad { public const string MissingDataChunk = "Png/xdtn0g01.png"; diff --git a/tests/Images/Input/Png/raw-profile-type-exif.png b/tests/Images/Input/Png/raw-profile-type-exif.png new file mode 100644 index 0000000000..efd9b35aaa --- /dev/null +++ b/tests/Images/Input/Png/raw-profile-type-exif.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2259b08fd0c4681ecd068244df358b486f5eca1fcd18edbc7d9207eeef3ca5ed +size 392