-
-
Notifications
You must be signed in to change notification settings - Fork 857
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added support for loading exif data from PNG "Raw profile type exif" text chunk #1877
Changes from 14 commits
2c12c78
d5ddc46
c8a191d
7f4c9cd
8b9c334
c2b906e
c8e2902
0c65e13
bf3035f
2a7ec5d
82e664a
0409d96
4c0df9f
76261ff
6dba6cf
bdb69d1
47cd2a4
6cdc595
7e7ea93
95318b1
3c421bb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,7 +190,7 @@ public Image<TPixel> Decode<TPixel>(BufferedReadStream stream, CancellationToken | |
this.ReadTextChunk(pngMetadata, chunk.Data.GetSpan()); | ||
break; | ||
case PngChunkType.CompressedText: | ||
this.ReadCompressedTextChunk(pngMetadata, chunk.Data.GetSpan()); | ||
this.ReadCompressedTextChunk(metadata, pngMetadata, chunk.Data.GetSpan()); | ||
break; | ||
case PngChunkType.InternationalText: | ||
this.ReadInternationalTextChunk(pngMetadata, chunk.Data.GetSpan()); | ||
|
@@ -200,7 +200,7 @@ public Image<TPixel> Decode<TPixel>(BufferedReadStream stream, CancellationToken | |
{ | ||
byte[] exifData = new byte[chunk.Length]; | ||
chunk.Data.GetSpan().CopyTo(exifData); | ||
metadata.ExifProfile = new ExifProfile(exifData); | ||
this.MergeOrSetExifProfile(metadata, new ExifProfile(exifData), replaceExistingKeys: true); | ||
} | ||
|
||
break; | ||
|
@@ -306,7 +306,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella | |
break; | ||
} | ||
|
||
this.ReadCompressedTextChunk(pngMetadata, chunk.Data.GetSpan()); | ||
this.ReadCompressedTextChunk(metadata, pngMetadata, chunk.Data.GetSpan()); | ||
break; | ||
case PngChunkType.InternationalText: | ||
if (this.colorMetadataOnly) | ||
|
@@ -328,7 +328,7 @@ public IImageInfo Identify(BufferedReadStream stream, CancellationToken cancella | |
{ | ||
byte[] exifData = new byte[chunk.Length]; | ||
chunk.Data.GetSpan().CopyTo(exifData); | ||
metadata.ExifProfile = new ExifProfile(exifData); | ||
this.MergeOrSetExifProfile(metadata, new ExifProfile(exifData), replaceExistingKeys: true); | ||
} | ||
|
||
break; | ||
|
@@ -998,9 +998,10 @@ private void ReadTextChunk(PngMetadata metadata, ReadOnlySpan<byte> data) | |
/// <summary> | ||
/// Reads the compressed text chunk. Contains a uncompressed keyword and a compressed text string. | ||
/// </summary> | ||
/// <param name="baseMetadata">The <see cref="ImageMetadata"/> object.</param> | ||
/// <param name="metadata">The metadata to decode to.</param> | ||
/// <param name="data">The <see cref="T:Span"/> containing the data.</param> | ||
private void ReadCompressedTextChunk(PngMetadata metadata, ReadOnlySpan<byte> data) | ||
private void ReadCompressedTextChunk(ImageMetadata baseMetadata, PngMetadata metadata, ReadOnlySpan<byte> data) | ||
{ | ||
if (this.ignoreMetadata) | ||
{ | ||
|
@@ -1030,7 +1031,251 @@ private void ReadCompressedTextChunk(PngMetadata metadata, ReadOnlySpan<byte> da | |
|
||
if (this.TryUncompressTextData(compressedData, PngConstants.Encoding, out string uncompressed)) | ||
{ | ||
metadata.TextData.Add(new PngTextData(name, uncompressed, string.Empty, string.Empty)); | ||
if (name.Equals("Raw profile type exif", StringComparison.OrdinalIgnoreCase) && | ||
this.TryReadLegacyExifTextChunk(baseMetadata, uncompressed)) | ||
{ | ||
// Successfully parsed exif data stored as text in this chunk | ||
} | ||
else | ||
{ | ||
// Seems to be regular old text data, or we failed to parse it in any special way | ||
metadata.TextData.Add(new PngTextData(name, uncompressed, string.Empty, string.Empty)); | ||
} | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// Reads exif data encoded into a text chunk with the name "raw profile type exif". | ||
/// This method was used by ImageMagick, exiftool, exiv2, digiKam, etc, before the | ||
/// 2017 update to png that allowed a true exif chunk. | ||
/// </summary> | ||
/// <param name="metadata">The <see cref="ImageMetadata"/> to store the decoded exif tags into.</param> | ||
/// <param name="data">The contents of the "raw profile type exif" text chunk.</param> | ||
private bool TryReadLegacyExifTextChunk(ImageMetadata metadata, string data) | ||
{ | ||
ReadOnlySpan<char> dataSpan = data.AsSpan(); | ||
dataSpan = dataSpan.TrimStart(); | ||
|
||
if (!StringEqualsInsensitive(dataSpan.Slice(0, 4), "exif".AsSpan())) | ||
{ | ||
// "exif" identifier is missing from the beginning of the text chunk | ||
return false; | ||
} | ||
|
||
// Skip to the data length | ||
dataSpan = dataSpan.Slice(4).TrimStart(); | ||
int dataLengthEnd = dataSpan.IndexOf('\n'); | ||
int dataLength = ParseInt32(dataSpan.Slice(0, dataSpan.IndexOf('\n'))); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for pointing these issues out @turbedi. Ideally I'll fix them sometime next week when I create a PR for IPTC metadata loading from text chunks. |
||
|
||
// Skip to the hex-encoded data | ||
dataSpan = dataSpan.Slice(dataLengthEnd).Trim(); | ||
|
||
// Sequence of bytes for the exif header ("Exif" ASCII and two zero bytes). | ||
// This doesn't actually allocate. | ||
ReadOnlySpan<byte> exifHeader = new byte[] { 0x45, 0x78, 0x69, 0x66, 0x00, 0x00 }; | ||
|
||
if (dataLength < exifHeader.Length) | ||
{ | ||
// Not enough room for the required exif header, this data couldn't possibly be valid | ||
return false; | ||
} | ||
|
||
// Parse the hex-encoded data into the byte array we are going to hand off to ExifProfile | ||
byte[] exifBlob = new byte[dataLength - exifHeader.Length]; | ||
|
||
try | ||
{ | ||
// Check for the presence of the exif header in the hex-encoded binary data | ||
byte[] tempExifBuf = exifBlob; | ||
if (exifBlob.Length < exifHeader.Length) | ||
{ | ||
// Need to allocate a temporary array, this should be an extremely uncommon (TODO: impossible?) case | ||
tempExifBuf = new byte[exifHeader.Length]; | ||
} | ||
|
||
HexStringToBytes(dataSpan.Slice(0, exifHeader.Length * 2), tempExifBuf); | ||
if (!tempExifBuf.AsSpan().Slice(0, exifHeader.Length).SequenceEqual(exifHeader)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
{ | ||
// Exif header in the hex data is not valid | ||
return false; | ||
} | ||
|
||
// Skip over the exif header we just tested | ||
dataSpan = dataSpan.Slice(exifHeader.Length * 2); | ||
dataLength -= exifHeader.Length; | ||
|
||
// Load the hex-encoded data, one line at a time | ||
for (int i = 0; i < dataLength;) | ||
{ | ||
ReadOnlySpan<char> lineSpan = dataSpan; | ||
|
||
int newlineIndex = dataSpan.IndexOf('\n'); | ||
if (newlineIndex != -1) | ||
{ | ||
lineSpan = dataSpan.Slice(0, newlineIndex); | ||
} | ||
|
||
i += HexStringToBytes(lineSpan, exifBlob.AsSpan().Slice(i)); | ||
|
||
dataSpan = dataSpan.Slice(newlineIndex + 1); | ||
} | ||
} | ||
catch | ||
{ | ||
return false; | ||
} | ||
|
||
this.MergeOrSetExifProfile(metadata, new ExifProfile(exifBlob), replaceExistingKeys: false); | ||
return true; | ||
} | ||
|
||
/// <summary> | ||
/// Compares two ReadOnlySpan<char>s in a case-insensitive method. | ||
/// This is only needed because older frameworks are missing the extension method. | ||
/// </summary> | ||
/// <param name="span1">The first <see cref="Span{T}"/> to compare.</param> | ||
/// <param name="span2">The second <see cref="Span{T}"/> to compare.</param> | ||
/// <returns>True if the spans were identical, false otherwise.</returns> | ||
private static bool StringEqualsInsensitive(ReadOnlySpan<char> span1, ReadOnlySpan<char> span2) | ||
{ | ||
#pragma warning disable IDE0022 // Use expression body for methods | ||
#if NETSTANDARD2_1 || NETCOREAPP2_1_OR_GREATER | ||
return span1.Equals(span2, StringComparison.OrdinalIgnoreCase); | ||
#else | ||
return span1.ToString().Equals(span2.ToString(), StringComparison.OrdinalIgnoreCase); | ||
#endif | ||
#pragma warning restore IDE0022 // Use expression body for methods | ||
} | ||
|
||
/// <summary> | ||
/// int.Parse() a ReadOnlySpan<char>, with a fallback for older frameworks. | ||
/// </summary> | ||
/// <param name="span">The <see cref="int"/> to parse.</param> | ||
/// <returns>The parsed <see cref="int"/>.</returns> | ||
private static int ParseInt32(ReadOnlySpan<char> span) | ||
{ | ||
#pragma warning disable IDE0022 // Use expression body for methods | ||
#if NETSTANDARD2_1 || NETCOREAPP2_1_OR_GREATER | ||
return int.Parse(span); | ||
#else | ||
return int.Parse(span.ToString()); | ||
#endif | ||
#pragma warning restore IDE0022 // Use expression body for methods | ||
} | ||
|
||
/// <summary> | ||
/// Parses a hexadecimal string into a byte array without allocations. Throws on non-hexadecimal character. | ||
/// Adapted from https://source.dot.net/#System.Private.CoreLib/Convert.cs,c9e4fbeaca708991. | ||
/// </summary> | ||
/// <param name="chars">The hexadecimal string to parse.</param> | ||
/// <param name="bytes">The destination for the parsed bytes. Must be at least <paramref name="chars"/>.Length / 2 bytes long.</param> | ||
/// <returns>The number of bytes written to <paramref name="bytes"/>.</returns> | ||
private static int HexStringToBytes(ReadOnlySpan<char> chars, Span<byte> bytes) | ||
{ | ||
if ((chars.Length % 2) != 0) | ||
{ | ||
throw new ArgumentException("Input string length must be a multiple of 2", nameof(chars)); | ||
} | ||
|
||
if ((bytes.Length * 2) < chars.Length) | ||
{ | ||
throw new ArgumentException("Output span must be at least half the length of the input string"); | ||
} | ||
else | ||
{ | ||
// Slightly better performance in the loop below, allows us to skip a bounds check | ||
// while still supporting output buffers that are larger than necessary | ||
bytes = bytes.Slice(0, chars.Length / 2); | ||
} | ||
|
||
[MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
static int FromChar(int c) | ||
{ | ||
// Map from an ASCII char to its hex value, e.g. arr['b'] == 11. 0xFF means it's not a hex digit. | ||
// This doesn't actually allocate. | ||
ReadOnlySpan<byte> charToHexLookup = new byte[] | ||
{ | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 15 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 31 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 47 | ||
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 63 | ||
0xFF, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 79 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 95 | ||
0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 111 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 127 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 143 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 159 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 175 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 191 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 207 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 223 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 239 | ||
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 255 | ||
}; | ||
|
||
return c >= charToHexLookup.Length ? 0xFF : charToHexLookup[c]; | ||
} | ||
|
||
// See https://source.dot.net/#System.Private.CoreLib/HexConverter.cs,4681d45a0aa0b361 | ||
int i = 0; | ||
int j = 0; | ||
int byteLo = 0; | ||
int byteHi = 0; | ||
while (j < bytes.Length) | ||
{ | ||
byteLo = FromChar(chars[i + 1]); | ||
byteHi = FromChar(chars[i]); | ||
|
||
// byteHi hasn't been shifted to the high half yet, so the only way the bitwise or produces this pattern | ||
// is if either byteHi or byteLo was not a hex character. | ||
if ((byteLo | byteHi) == 0xFF) | ||
{ | ||
break; | ||
} | ||
|
||
bytes[j++] = (byte)((byteHi << 4) | byteLo); | ||
i += 2; | ||
} | ||
|
||
if (byteLo == 0xFF) | ||
{ | ||
i++; | ||
} | ||
|
||
if ((byteLo | byteHi) == 0xFF) | ||
{ | ||
throw new ArgumentException("Input string contained non-hexadecimal characters", nameof(chars)); | ||
} | ||
|
||
return j; | ||
} | ||
|
||
/// <summary> | ||
/// Sets the <see cref="ExifProfile"/> in <paramref name="metadata"/> to <paramref name="newProfile"/>, | ||
/// or copies exif tags if <paramref name="metadata"/> already contains an <see cref="ExifProfile"/>. | ||
/// </summary> | ||
/// <param name="metadata">The <see cref="ImageMetadata"/> to store the exif data in.</param> | ||
/// <param name="newProfile">The <see cref="ExifProfile"/> to copy exif tags from.</param> | ||
/// <param name="replaceExistingKeys">If <paramref name="metadata"/> already contains an <see cref="ExifProfile"/>, | ||
/// controls whether existing exif tags in <paramref name="metadata"/> will be overwritten with any conflicting | ||
/// tags from <paramref name="newProfile"/>.</param> | ||
private void MergeOrSetExifProfile(ImageMetadata metadata, ExifProfile newProfile, bool replaceExistingKeys) | ||
{ | ||
if (metadata.ExifProfile is null) | ||
{ | ||
// No exif metadata was loaded yet, so just assign it | ||
metadata.ExifProfile = newProfile; | ||
} | ||
else | ||
{ | ||
// Try to merge existing keys with the ones from the new profile | ||
foreach (IExifValue newKey in newProfile.Values) | ||
{ | ||
if (replaceExistingKeys || metadata.ExifProfile.GetValueInternal(newKey.Tag) is null) | ||
{ | ||
metadata.ExifProfile.SetValueInternal(newKey.Tag, newKey.GetValue()); | ||
jubilant-enigma marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reverse the condition here so you don't have the odd empty scope.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I ended up moving this into its own function,
TryReadTextChunkMetadata
, since I was missing support for loading legacy exif data from uncompressed text chunks, and I also wanted to have an easy spot to add support for other metadata chunks in the future. I suppose I could also just add IPTC support in this PR too if you want, but I figure it's getting somewhat large and it might be better to just make another PR with that feature after this one gets merged.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, let's keep IPTC separate for now.