Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for loading exif data from PNG "Raw profile type exif" text chunk #1877

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
2c12c78
Added support for loading exif data from pre-2017 pngs from the "raw …
jubilant-enigma Dec 5, 2021
d5ddc46
Fixed an incomplete comment
jubilant-enigma Dec 5, 2021
c8a191d
Update src/ImageSharp/Formats/Png/PngDecoderCore.cs
jubilant-enigma Dec 6, 2021
7f4c9cd
Update src/ImageSharp/Formats/Png/PngDecoderCore.cs
jubilant-enigma Dec 6, 2021
8b9c334
Moved the ExifHeader property to after the constructor to satisfy Sty…
jubilant-enigma Dec 6, 2021
c2b906e
Removed unnecessary temporary allocations.
jubilant-enigma Dec 6, 2021
c8e2902
Moved legacy exif data loading test from PngDecoderTests to PngMetada…
jubilant-enigma Dec 6, 2021
0c65e13
Don't save the exif text chunk if it is successfully parsed
jubilant-enigma Dec 7, 2021
bf3035f
Don't include unnecessary parameters for helper functions that are on…
jubilant-enigma Dec 7, 2021
2a7ec5d
Moved ExifHeader to a local variable since it's only used in one func…
jubilant-enigma Dec 28, 2021
82e664a
New, faster HexStringToBytes implementation based off the reference s…
jubilant-enigma Dec 28, 2021
0409d96
Merge branch 'is_master/master' into je/nonstandard-png-exif
jubilant-enigma Dec 28, 2021
4c0df9f
Merge branch 'master' into je/nonstandard-png-exif
JimBobSquarePants Jan 3, 2022
76261ff
Update shared-infrastructure
JimBobSquarePants Jan 3, 2022
6dba6cf
Moved HexStringToBytes into a SixLabors.ImageSharp.Common.Helpers.Hex…
jubilant-enigma Jan 3, 2022
bdb69d1
Allow reading legacy exif data from uncompressed text chunks as well.
jubilant-enigma Jan 3, 2022
47cd2a4
Merge branch 'je/nonstandard-png-exif' of https://github.com/jubilant…
jubilant-enigma Jan 3, 2022
6cdc595
Merge with remote
jubilant-enigma Jan 3, 2022
7e7ea93
Fixed comments.
jubilant-enigma Jan 3, 2022
95318b1
Update shared-infrastructure
JimBobSquarePants Jan 4, 2022
3c421bb
Merge remote-tracking branch 'upstream/master' into je/nonstandard-pn…
JimBobSquarePants Jan 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions src/ImageSharp/Common/Helpers/HexConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright (c) Six Labors.
// Licensed under the Apache License, Version 2.0.

using System;
using System.Runtime.CompilerServices;

namespace SixLabors.ImageSharp.Common.Helpers
{
internal static class HexConverter
{
/// <summary>
/// Parses a hexadecimal string into a byte array without allocations. Throws on non-hexadecimal character.
/// Adapted from https://source.dot.net/#System.Private.CoreLib/Convert.cs,c9e4fbeaca708991.
/// </summary>
/// <param name="chars">The hexadecimal string to parse.</param>
/// <param name="bytes">The destination for the parsed bytes. Must be at least <paramref name="chars"/>.Length / 2 bytes long.</param>
/// <returns>The number of bytes written to <paramref name="bytes"/>.</returns>
public static int HexStringToBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
{
if ((chars.Length % 2) != 0)
{
throw new ArgumentException("Input string length must be a multiple of 2", nameof(chars));
}

if ((bytes.Length * 2) < chars.Length)
{
throw new ArgumentException("Output span must be at least half the length of the input string");
}
else
{
// Slightly better performance in the loop below, allows us to skip a bounds check
// while still supporting output buffers that are larger than necessary
bytes = bytes.Slice(0, chars.Length / 2);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
static int FromChar(int c)
{
// Map from an ASCII char to its hex value, e.g. arr['b'] == 11. 0xFF means it's not a hex digit.
// This doesn't actually allocate.
ReadOnlySpan<byte> charToHexLookup = new byte[]
{
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 15
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 31
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 47
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 63
0xFF, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 79
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 95
0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 111
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 127
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 143
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 159
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 175
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 191
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 207
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 223
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 239
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 255
};

return c >= charToHexLookup.Length ? 0xFF : charToHexLookup[c];
}

// See https://source.dot.net/#System.Private.CoreLib/HexConverter.cs,4681d45a0aa0b361
int i = 0;
int j = 0;
int byteLo = 0;
int byteHi = 0;
while (j < bytes.Length)
{
byteLo = FromChar(chars[i + 1]);
byteHi = FromChar(chars[i]);

// byteHi hasn't been shifted to the high half yet, so the only way the bitwise or produces this pattern
// is if either byteHi or byteLo was not a hex character.
if ((byteLo | byteHi) == 0xFF)
{
break;
}

bytes[j++] = (byte)((byteHi << 4) | byteLo);
i += 2;
}

if (byteLo == 0xFF)
{
i++;
}

if ((byteLo | byteHi) == 0xFF)
{
throw new ArgumentException("Input string contained non-hexadecimal characters", nameof(chars));
}

return j;
}
}
}
199 changes: 189 additions & 10 deletions src/ImageSharp/Formats/Png/PngDecoderCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.Compression.Zlib;
using SixLabors.ImageSharp.Formats.Png.Chunks;
using SixLabors.ImageSharp.Formats.Png.Filters;
Expand Down Expand Up @@ -187,10 +188,10 @@ public Image<TPixel> Decode<TPixel>(BufferedReadStream stream, CancellationToken
this.AssignTransparentMarkers(alpha, pngMetadata);
break;
case PngChunkType.Text:
this.ReadTextChunk(pngMetadata, chunk.Data.GetSpan());
this.ReadTextChunk(metadata, pngMetadata, chunk.Data.GetSpan());
break;
case PngChunkType.CompressedText:
this.ReadCompressedTextChunk(pngMetadata, chunk.Data.GetSpan());
this.ReadCompressedTextChunk(metadata, pngMetadata, chunk.Data.GetSpan());
break;
case PngChunkType.InternationalText:
this.ReadInternationalTextChunk(pngMetadata, chunk.Data.GetSpan());
Expand All @@ -200,7 +201,7 @@ public Image<TPixel> Decode<TPixel>(BufferedReadStream stream, CancellationToken
{
byte[] exifData = new byte[chunk.Length];
chunk.Data.GetSpan().CopyTo(exifData);
metadata.ExifProfile = new ExifProfile(exifData);
this.MergeOrSetExifProfile(metadata, new ExifProfile(exifData), replaceExistingKeys: true);
}

break;
Expand Down Expand Up @@ -297,7 +298,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella
break;
}

this.ReadTextChunk(pngMetadata, chunk.Data.GetSpan());
this.ReadTextChunk(metadata, pngMetadata, chunk.Data.GetSpan());
break;
case PngChunkType.CompressedText:
if (this.colorMetadataOnly)
Expand All @@ -306,7 +307,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella
break;
}

this.ReadCompressedTextChunk(pngMetadata, chunk.Data.GetSpan());
this.ReadCompressedTextChunk(metadata, pngMetadata, chunk.Data.GetSpan());
break;
case PngChunkType.InternationalText:
if (this.colorMetadataOnly)
Expand All @@ -328,7 +329,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella
{
byte[] exifData = new byte[chunk.Length];
chunk.Data.GetSpan().CopyTo(exifData);
metadata.ExifProfile = new ExifProfile(exifData);
this.MergeOrSetExifProfile(metadata, new ExifProfile(exifData), replaceExistingKeys: true);
}

break;
Expand Down Expand Up @@ -967,9 +968,10 @@ private void ReadHeaderChunk(PngMetadata pngMetadata, ReadOnlySpan<byte> data)
/// <summary>
/// Reads a text chunk containing image properties from the data.
/// </summary>
/// <param name="baseMetadata">The <see cref="ImageMetadata"/> object.</param>
/// <param name="metadata">The metadata to decode to.</param>
/// <param name="data">The <see cref="T:Span"/> containing the data.</param>
private void ReadTextChunk(PngMetadata metadata, ReadOnlySpan<byte> data)
private void ReadTextChunk(ImageMetadata baseMetadata, PngMetadata metadata, ReadOnlySpan<byte> data)
{
if (this.ignoreMetadata)
{
Expand All @@ -992,15 +994,19 @@ private void ReadTextChunk(PngMetadata metadata, ReadOnlySpan<byte> data)

string value = PngConstants.Encoding.GetString(data.Slice(zeroIndex + 1));

metadata.TextData.Add(new PngTextData(name, value, string.Empty, string.Empty));
if (!this.TryReadTextChunkMetadata(baseMetadata, name, value))
{
metadata.TextData.Add(new PngTextData(name, value, string.Empty, string.Empty));
}
}

/// <summary>
/// Reads the compressed text chunk. Contains a uncompressed keyword and a compressed text string.
/// </summary>
/// <param name="baseMetadata">The <see cref="ImageMetadata"/> object.</param>
/// <param name="metadata">The metadata to decode to.</param>
/// <param name="data">The <see cref="T:Span"/> containing the data.</param>
private void ReadCompressedTextChunk(PngMetadata metadata, ReadOnlySpan<byte> data)
private void ReadCompressedTextChunk(ImageMetadata baseMetadata, PngMetadata metadata, ReadOnlySpan<byte> data)
{
if (this.ignoreMetadata)
{
Expand Down Expand Up @@ -1028,12 +1034,185 @@ private void ReadCompressedTextChunk(PngMetadata metadata, ReadOnlySpan<byte> da

ReadOnlySpan<byte> compressedData = data.Slice(zeroIndex + 2);

if (this.TryUncompressTextData(compressedData, PngConstants.Encoding, out string uncompressed))
if (this.TryUncompressTextData(compressedData, PngConstants.Encoding, out string uncompressed) &&
!this.TryReadTextChunkMetadata(baseMetadata, name, uncompressed))
{
metadata.TextData.Add(new PngTextData(name, uncompressed, string.Empty, string.Empty));
}
}

/// <summary>
/// Checks if the given text chunk is actually storing parsable metadata.
/// </summary>
/// <param name="baseMetadata">The <see cref="ImageMetadata"/> object to store the parsed metadata in.</param>
/// <param name="chunkName">The name of the text chunk.</param>
/// <param name="chunkText">The contents of the text chunk.</param>
/// <returns>True if metadata was successfully parsed from the text chunk. False if the
/// text chunk was not identified as metadata, and should be stored in the metadata
/// object unmodified.</returns>
private bool TryReadTextChunkMetadata(ImageMetadata baseMetadata, string chunkName, string chunkText)
{
if (chunkName.Equals("Raw profile type exif", StringComparison.OrdinalIgnoreCase) &&
this.TryReadLegacyExifTextChunk(baseMetadata, chunkText))
{
// Successfully parsed legacy exif data from text
return true;
}

// TODO: "Raw profile type iptc", potentially others?

// No special chunk data identified
return false;
}

/// <summary>
/// Reads exif data encoded into a text chunk with the name "raw profile type exif".
/// This method was used by ImageMagick, exiftool, exiv2, digiKam, etc, before the
/// 2017 update to png that allowed a true exif chunk.
/// </summary>
/// <param name="metadata">The <see cref="ImageMetadata"/> to store the decoded exif tags into.</param>
/// <param name="data">The contents of the "raw profile type exif" text chunk.</param>
private bool TryReadLegacyExifTextChunk(ImageMetadata metadata, string data)
{
ReadOnlySpan<char> dataSpan = data.AsSpan();
dataSpan = dataSpan.TrimStart();

if (!StringEqualsInsensitive(dataSpan.Slice(0, 4), "exif".AsSpan()))
{
// "exif" identifier is missing from the beginning of the text chunk
return false;
}

// Skip to the data length
dataSpan = dataSpan.Slice(4).TrimStart();
int dataLengthEnd = dataSpan.IndexOf('\n');
int dataLength = ParseInt32(dataSpan.Slice(0, dataSpan.IndexOf('\n')));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

int dataLength = ParseInt32(dataSpan.Slice(0, dataLengthEnd));

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing these issues out @turbedi. Ideally I'll fix them sometime next week when I create a PR for IPTC metadata loading from text chunks.


// Skip to the hex-encoded data
dataSpan = dataSpan.Slice(dataLengthEnd).Trim();

// Sequence of bytes for the exif header ("Exif" ASCII and two zero bytes).
// This doesn't actually allocate.
ReadOnlySpan<byte> exifHeader = new byte[] { 0x45, 0x78, 0x69, 0x66, 0x00, 0x00 };

if (dataLength < exifHeader.Length)
{
// Not enough room for the required exif header, this data couldn't possibly be valid
return false;
}

// Parse the hex-encoded data into the byte array we are going to hand off to ExifProfile
byte[] exifBlob = new byte[dataLength - exifHeader.Length];

try
{
// Check for the presence of the exif header in the hex-encoded binary data
byte[] tempExifBuf = exifBlob;
if (exifBlob.Length < exifHeader.Length)
{
// Need to allocate a temporary array, this should be an extremely uncommon (TODO: impossible?) case
tempExifBuf = new byte[exifHeader.Length];
}

HexConverter.HexStringToBytes(dataSpan.Slice(0, exifHeader.Length * 2), tempExifBuf);
if (!tempExifBuf.AsSpan().Slice(0, exifHeader.Length).SequenceEqual(exifHeader))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (!tempExifBuf.AsSpan(0, exifHeader.Length).SequenceEqual(exifHeader))

{
// Exif header in the hex data is not valid
return false;
}

// Skip over the exif header we just tested
dataSpan = dataSpan.Slice(exifHeader.Length * 2);
dataLength -= exifHeader.Length;

// Load the hex-encoded data, one line at a time
for (int i = 0; i < dataLength;)
{
ReadOnlySpan<char> lineSpan = dataSpan;

int newlineIndex = dataSpan.IndexOf('\n');
if (newlineIndex != -1)
{
lineSpan = dataSpan.Slice(0, newlineIndex);
}

i += HexConverter.HexStringToBytes(lineSpan, exifBlob.AsSpan().Slice(i));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i += HexConverter.HexStringToBytes(lineSpan, exifBlob.AsSpan(i));


dataSpan = dataSpan.Slice(newlineIndex + 1);
}
}
catch
{
return false;
}

this.MergeOrSetExifProfile(metadata, new ExifProfile(exifBlob), replaceExistingKeys: false);
return true;
}

/// <summary>
/// Compares two ReadOnlySpan&lt;char&gt;s in a case-insensitive method.
/// This is only needed because older frameworks are missing the extension method.
/// </summary>
/// <param name="span1">The first <see cref="Span{T}"/> to compare.</param>
/// <param name="span2">The second <see cref="Span{T}"/> to compare.</param>
/// <returns>True if the spans were identical, false otherwise.</returns>
private static bool StringEqualsInsensitive(ReadOnlySpan<char> span1, ReadOnlySpan<char> span2)
{
#pragma warning disable IDE0022 // Use expression body for methods
#if NETSTANDARD2_1 || NETCOREAPP2_1_OR_GREATER
return span1.Equals(span2, StringComparison.OrdinalIgnoreCase);
#else
return span1.ToString().Equals(span2.ToString(), StringComparison.OrdinalIgnoreCase);
#endif
#pragma warning restore IDE0022 // Use expression body for methods
}

/// <summary>
/// int.Parse() a ReadOnlySpan&lt;char&gt;, with a fallback for older frameworks.
/// </summary>
/// <param name="span">The <see cref="int"/> to parse.</param>
/// <returns>The parsed <see cref="int"/>.</returns>
private static int ParseInt32(ReadOnlySpan<char> span)
{
#pragma warning disable IDE0022 // Use expression body for methods
#if NETSTANDARD2_1 || NETCOREAPP2_1_OR_GREATER
return int.Parse(span);
#else
return int.Parse(span.ToString());
#endif
#pragma warning restore IDE0022 // Use expression body for methods
}

/// <summary>
/// Sets the <see cref="ExifProfile"/> in <paramref name="metadata"/> to <paramref name="newProfile"/>,
/// or copies exif tags if <paramref name="metadata"/> already contains an <see cref="ExifProfile"/>.
/// </summary>
/// <param name="metadata">The <see cref="ImageMetadata"/> to store the exif data in.</param>
/// <param name="newProfile">The <see cref="ExifProfile"/> to copy exif tags from.</param>
/// <param name="replaceExistingKeys">If <paramref name="metadata"/> already contains an <see cref="ExifProfile"/>,
/// controls whether existing exif tags in <paramref name="metadata"/> will be overwritten with any conflicting
/// tags from <paramref name="newProfile"/>.</param>
private void MergeOrSetExifProfile(ImageMetadata metadata, ExifProfile newProfile, bool replaceExistingKeys)
{
if (metadata.ExifProfile is null)
{
// No exif metadata was loaded yet, so just assign it
metadata.ExifProfile = newProfile;
}
else
{
// Try to merge existing keys with the ones from the new profile
foreach (IExifValue newKey in newProfile.Values)
{
if (replaceExistingKeys || metadata.ExifProfile.GetValueInternal(newKey.Tag) is null)
{
metadata.ExifProfile.SetValueInternal(newKey.Tag, newKey.GetValue());
jubilant-enigma marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
}

/// <summary>
/// Reads a iTXt chunk, which contains international text data. It contains:
/// - A uncompressed keyword.
Expand Down
Loading