Skip to content

Commit

Permalink
Merge pull request #1877 from jubilant-enigma/je/nonstandard-png-exif
Browse files Browse the repository at this point in the history
Added support for loading exif data from PNG "Raw profile type exif" text chunk
  • Loading branch information
JimBobSquarePants authored Jan 4, 2022
2 parents f7b332a + 3c421bb commit e20410c
Show file tree
Hide file tree
Showing 5 changed files with 323 additions and 10 deletions.
98 changes: 98 additions & 0 deletions src/ImageSharp/Common/Helpers/HexConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Runtime.CompilerServices;

namespace SixLabors.ImageSharp.Common.Helpers
{
internal static class HexConverter
{
/// <summary>
/// Parses a hexadecimal string into a byte array without allocations. Throws on non-hexadecimal character.
/// Adapted from https://source.dot.net/#System.Private.CoreLib/Convert.cs,c9e4fbeaca708991.
/// </summary>
/// <param name="chars">The hexadecimal string to parse.</param>
/// <param name="bytes">The destination for the parsed bytes. Must be at least <paramref name="chars"/>.Length / 2 bytes long.</param>
/// <returns>The number of bytes written to <paramref name="bytes"/>.</returns>
public static int HexStringToBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
{
if ((chars.Length % 2) != 0)
{
throw new ArgumentException("Input string length must be a multiple of 2", nameof(chars));
}

if ((bytes.Length * 2) < chars.Length)
{
throw new ArgumentException("Output span must be at least half the length of the input string");
}
else
{
// Slightly better performance in the loop below, allows us to skip a bounds check
// while still supporting output buffers that are larger than necessary
bytes = bytes.Slice(0, chars.Length / 2);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static int FromChar(int c)
{
// Map from an ASCII char to its hex value, e.g. arr['b'] == 11. 0xFF means it's not a hex digit.
// This doesn't actually allocate.
ReadOnlySpan<byte> charToHexLookup = new byte[]
{
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 15
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 31
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 47
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 63
0xFF, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 79
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 95
0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 111
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 127
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 143
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 159
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 175
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 191
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 207
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 223
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 239
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 255
};

return c >= charToHexLookup.Length ? 0xFF : charToHexLookup[c];
}

// See https://source.dot.net/#System.Private.CoreLib/HexConverter.cs,4681d45a0aa0b361
int i = 0;
int j = 0;
int byteLo = 0;
int byteHi = 0;
while (j < bytes.Length)
{
byteLo = FromChar(chars[i + 1]);
byteHi = FromChar(chars[i]);

// byteHi hasn't been shifted to the high half yet, so the only way the bitwise or produces this pattern
// is if either byteHi or byteLo was not a hex character.
if ((byteLo | byteHi) == 0xFF)
{
break;
}

bytes[j++] = (byte)((byteHi << 4) | byteLo);
i += 2;
}

if (byteLo == 0xFF)
{
i++;
}

if ((byteLo | byteHi) == 0xFF)
{
throw new ArgumentException("Input string contained non-hexadecimal characters", nameof(chars));
}

return j;
}
}
}
199 changes: 189 additions & 10 deletions src/ImageSharp/Formats/Png/PngDecoderCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.Compression.Zlib;
using SixLabors.ImageSharp.Formats.Png.Chunks;
using SixLabors.ImageSharp.Formats.Png.Filters;
Expand Down Expand Up @@ -187,10 +188,10 @@ public Image<TPixel> Decode<TPixel>(BufferedReadStream stream, CancellationToken
this.AssignTransparentMarkers(alpha, pngMetadata);
break;
case PngChunkType.Text:
this.ReadTextChunk(pngMetadata, chunk.Data.GetSpan());
this.ReadTextChunk(metadata, pngMetadata, chunk.Data.GetSpan());
break;
case PngChunkType.CompressedText:
this.ReadCompressedTextChunk(pngMetadata, chunk.Data.GetSpan());
this.ReadCompressedTextChunk(metadata, pngMetadata, chunk.Data.GetSpan());
break;
case PngChunkType.InternationalText:
this.ReadInternationalTextChunk(pngMetadata, chunk.Data.GetSpan());
Expand All @@ -200,7 +201,7 @@ public Image<TPixel> Decode<TPixel>(BufferedReadStream stream, CancellationToken
{
byte[] exifData = new byte[chunk.Length];
chunk.Data.GetSpan().CopyTo(exifData);
metadata.ExifProfile = new ExifProfile(exifData);
this.MergeOrSetExifProfile(metadata, new ExifProfile(exifData), replaceExistingKeys: true);
}

break;
Expand Down Expand Up @@ -297,7 +298,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella
break;
}

this.ReadTextChunk(pngMetadata, chunk.Data.GetSpan());
this.ReadTextChunk(metadata, pngMetadata, chunk.Data.GetSpan());
break;
case PngChunkType.CompressedText:
if (this.colorMetadataOnly)
Expand All @@ -306,7 +307,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella
break;
}

this.ReadCompressedTextChunk(pngMetadata, chunk.Data.GetSpan());
this.ReadCompressedTextChunk(metadata, pngMetadata, chunk.Data.GetSpan());
break;
case PngChunkType.InternationalText:
if (this.colorMetadataOnly)
Expand All @@ -328,7 +329,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella
{
byte[] exifData = new byte[chunk.Length];
chunk.Data.GetSpan().CopyTo(exifData);
metadata.ExifProfile = new ExifProfile(exifData);
this.MergeOrSetExifProfile(metadata, new ExifProfile(exifData), replaceExistingKeys: true);
}

break;
Expand Down Expand Up @@ -967,9 +968,10 @@ private void ReadHeaderChunk(PngMetadata pngMetadata, ReadOnlySpan<byte> data)
/// <summary>
/// Reads a text chunk containing image properties from the data.
/// </summary>
/// <param name="baseMetadata">The <see cref="ImageMetadata"/> object.</param>
/// <param name="metadata">The metadata to decode to.</param>
/// <param name="data">The <see cref="T:Span"/> containing the data.</param>
private void ReadTextChunk(PngMetadata metadata, ReadOnlySpan<byte> data)
private void ReadTextChunk(ImageMetadata baseMetadata, PngMetadata metadata, ReadOnlySpan<byte> data)
{
if (this.ignoreMetadata)
{
Expand All @@ -992,15 +994,19 @@ private void ReadTextChunk(PngMetadata metadata, ReadOnlySpan<byte> data)

string value = PngConstants.Encoding.GetString(data.Slice(zeroIndex + 1));

metadata.TextData.Add(new PngTextData(name, value, string.Empty, string.Empty));
if (!this.TryReadTextChunkMetadata(baseMetadata, name, value))
{
metadata.TextData.Add(new PngTextData(name, value, string.Empty, string.Empty));
}
}

/// <summary>
/// Reads the compressed text chunk. Contains a uncompressed keyword and a compressed text string.
/// </summary>
/// <param name="baseMetadata">The <see cref="ImageMetadata"/> object.</param>
/// <param name="metadata">The metadata to decode to.</param>
/// <param name="data">The <see cref="T:Span"/> containing the data.</param>
private void ReadCompressedTextChunk(PngMetadata metadata, ReadOnlySpan<byte> data)
private void ReadCompressedTextChunk(ImageMetadata baseMetadata, PngMetadata metadata, ReadOnlySpan<byte> data)
{
if (this.ignoreMetadata)
{
Expand Down Expand Up @@ -1028,12 +1034,185 @@ private void ReadCompressedTextChunk(PngMetadata metadata, ReadOnlySpan<byte> da

ReadOnlySpan<byte> compressedData = data.Slice(zeroIndex + 2);

if (this.TryUncompressTextData(compressedData, PngConstants.Encoding, out string uncompressed))
if (this.TryUncompressTextData(compressedData, PngConstants.Encoding, out string uncompressed) &&
!this.TryReadTextChunkMetadata(baseMetadata, name, uncompressed))
{
metadata.TextData.Add(new PngTextData(name, uncompressed, string.Empty, string.Empty));
}
}

/// <summary>
/// Checks if the given text chunk is actually storing parsable metadata.
/// </summary>
/// <param name="baseMetadata">The <see cref="ImageMetadata"/> object to store the parsed metadata in.</param>
/// <param name="chunkName">The name of the text chunk.</param>
/// <param name="chunkText">The contents of the text chunk.</param>
/// <returns>True if metadata was successfully parsed from the text chunk. False if the
/// text chunk was not identified as metadata, and should be stored in the metadata
/// object unmodified.</returns>
private bool TryReadTextChunkMetadata(ImageMetadata baseMetadata, string chunkName, string chunkText)
{
if (chunkName.Equals("Raw profile type exif", StringComparison.OrdinalIgnoreCase) &&
this.TryReadLegacyExifTextChunk(baseMetadata, chunkText))
{
// Successfully parsed legacy exif data from text
return true;
}

// TODO: "Raw profile type iptc", potentially others?

// No special chunk data identified
return false;
}

/// <summary>
/// Reads exif data encoded into a text chunk with the name "raw profile type exif".
/// This method was used by ImageMagick, exiftool, exiv2, digiKam, etc, before the
/// 2017 update to png that allowed a true exif chunk.
/// </summary>
/// <param name="metadata">The <see cref="ImageMetadata"/> to store the decoded exif tags into.</param>
/// <param name="data">The contents of the "raw profile type exif" text chunk.</param>
private bool TryReadLegacyExifTextChunk(ImageMetadata metadata, string data)
{
ReadOnlySpan<char> dataSpan = data.AsSpan();
dataSpan = dataSpan.TrimStart();

if (!StringEqualsInsensitive(dataSpan.Slice(0, 4), "exif".AsSpan()))
{
// "exif" identifier is missing from the beginning of the text chunk
return false;
}

// Skip to the data length
dataSpan = dataSpan.Slice(4).TrimStart();
int dataLengthEnd = dataSpan.IndexOf('\n');
int dataLength = ParseInt32(dataSpan.Slice(0, dataSpan.IndexOf('\n')));

// Skip to the hex-encoded data
dataSpan = dataSpan.Slice(dataLengthEnd).Trim();

// Sequence of bytes for the exif header ("Exif" ASCII and two zero bytes).
// This doesn't actually allocate.
ReadOnlySpan<byte> exifHeader = new byte[] { 0x45, 0x78, 0x69, 0x66, 0x00, 0x00 };

if (dataLength < exifHeader.Length)
{
// Not enough room for the required exif header, this data couldn't possibly be valid
return false;
}

// Parse the hex-encoded data into the byte array we are going to hand off to ExifProfile
byte[] exifBlob = new byte[dataLength - exifHeader.Length];

try
{
// Check for the presence of the exif header in the hex-encoded binary data
byte[] tempExifBuf = exifBlob;
if (exifBlob.Length < exifHeader.Length)
{
// Need to allocate a temporary array, this should be an extremely uncommon (TODO: impossible?) case
tempExifBuf = new byte[exifHeader.Length];
}

HexConverter.HexStringToBytes(dataSpan.Slice(0, exifHeader.Length * 2), tempExifBuf);
if (!tempExifBuf.AsSpan().Slice(0, exifHeader.Length).SequenceEqual(exifHeader))
{
// Exif header in the hex data is not valid
return false;
}

// Skip over the exif header we just tested
dataSpan = dataSpan.Slice(exifHeader.Length * 2);
dataLength -= exifHeader.Length;

// Load the hex-encoded data, one line at a time
for (int i = 0; i < dataLength;)
{
ReadOnlySpan<char> lineSpan = dataSpan;

int newlineIndex = dataSpan.IndexOf('\n');
if (newlineIndex != -1)
{
lineSpan = dataSpan.Slice(0, newlineIndex);
}

i += HexConverter.HexStringToBytes(lineSpan, exifBlob.AsSpan().Slice(i));

dataSpan = dataSpan.Slice(newlineIndex + 1);
}
}
catch
{
return false;
}

this.MergeOrSetExifProfile(metadata, new ExifProfile(exifBlob), replaceExistingKeys: false);
return true;
}

/// <summary>
/// Compares two ReadOnlySpan&lt;char&gt;s in a case-insensitive method.
/// This is only needed because older frameworks are missing the extension method.
/// </summary>
/// <param name="span1">The first <see cref="Span{T}"/> to compare.</param>
/// <param name="span2">The second <see cref="Span{T}"/> to compare.</param>
/// <returns>True if the spans were identical, false otherwise.</returns>
private static bool StringEqualsInsensitive(ReadOnlySpan<char> span1, ReadOnlySpan<char> span2)
{
#pragma warning disable IDE0022 // Use expression body for methods
#if NETSTANDARD2_1 || NETCOREAPP2_1_OR_GREATER
return span1.Equals(span2, StringComparison.OrdinalIgnoreCase);
#else
return span1.ToString().Equals(span2.ToString(), StringComparison.OrdinalIgnoreCase);
#endif
#pragma warning restore IDE0022 // Use expression body for methods
}

/// <summary>
/// int.Parse() a ReadOnlySpan&lt;char&gt;, with a fallback for older frameworks.
/// </summary>
/// <param name="span">The <see cref="int"/> to parse.</param>
/// <returns>The parsed <see cref="int"/>.</returns>
private static int ParseInt32(ReadOnlySpan<char> span)
{
#pragma warning disable IDE0022 // Use expression body for methods
#if NETSTANDARD2_1 || NETCOREAPP2_1_OR_GREATER
return int.Parse(span);
#else
return int.Parse(span.ToString());
#endif
#pragma warning restore IDE0022 // Use expression body for methods
}

/// <summary>
/// Sets the <see cref="ExifProfile"/> in <paramref name="metadata"/> to <paramref name="newProfile"/>,
/// or copies exif tags if <paramref name="metadata"/> already contains an <see cref="ExifProfile"/>.
/// </summary>
/// <param name="metadata">The <see cref="ImageMetadata"/> to store the exif data in.</param>
/// <param name="newProfile">The <see cref="ExifProfile"/> to copy exif tags from.</param>
/// <param name="replaceExistingKeys">If <paramref name="metadata"/> already contains an <see cref="ExifProfile"/>,
/// controls whether existing exif tags in <paramref name="metadata"/> will be overwritten with any conflicting
/// tags from <paramref name="newProfile"/>.</param>
private void MergeOrSetExifProfile(ImageMetadata metadata, ExifProfile newProfile, bool replaceExistingKeys)
{
if (metadata.ExifProfile is null)
{
// No exif metadata was loaded yet, so just assign it
metadata.ExifProfile = newProfile;
}
else
{
// Try to merge existing keys with the ones from the new profile
foreach (IExifValue newKey in newProfile.Values)
{
if (replaceExistingKeys || metadata.ExifProfile.GetValueInternal(newKey.Tag) is null)
{
metadata.ExifProfile.SetValueInternal(newKey.Tag, newKey.GetValue());
}
}
}
}

/// <summary>
/// Reads a iTXt chunk, which contains international text data. It contains:
/// - A uncompressed keyword.
Expand Down
Loading

0 comments on commit e20410c

Please sign in to comment.