Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preamble segment handlers #291

Merged
merged 6 commits into from
May 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MetadataExtractor.Tests/Formats/Exif/ExifReaderTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public void ReadJpegSegmentWithNoExifData()
{
var badExifSegment = new JpegSegment(JpegSegmentType.App1, new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, offset: 0);
var directories = new ExifReader().ReadJpegSegments(new[] { badExifSegment });
Assert.Equal(0, directories.Count);
Assert.Empty(directories);
}

[Fact]
Expand Down
16 changes: 3 additions & 13 deletions MetadataExtractor/Formats/Adobe/AdobeJpegReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@
using MetadataExtractor.Formats.Jpeg;
using MetadataExtractor.IO;

#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
using DirectoryList = System.Collections.Generic.IReadOnlyList<MetadataExtractor.Directory>;
#endif

namespace MetadataExtractor.Formats.Adobe
{
/// <summary>Decodes Adobe formatted data stored in JPEG files, normally in the APPE (App14) segment.</summary>
Expand All @@ -23,17 +17,13 @@ public sealed class AdobeJpegReader : IJpegSegmentMetadataReader
{
public const string JpegSegmentPreamble = "Adobe";

ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.AppE };
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes { get; } = new[] { JpegSegmentType.AppE };

public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
public IEnumerable<Directory> ReadJpegSegments(IEnumerable<JpegSegment> segments)
{
return segments
.Where(segment => segment.Bytes.Length == 12 && JpegSegmentPreamble.Equals(Encoding.UTF8.GetString(segment.Bytes, 0, JpegSegmentPreamble.Length), StringComparison.OrdinalIgnoreCase))
.Select(bytes => Extract(new SequentialByteArrayReader(bytes.Bytes)))
#if NET35
.Cast<Directory>()
#endif
.ToList();
.Select(bytes => (Directory)Extract(new SequentialByteArrayReader(bytes.Bytes)));
}

public AdobeJpegDirectory Extract(SequentialReader reader)
Expand Down
30 changes: 13 additions & 17 deletions MetadataExtractor/Formats/Exif/ExifReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using MetadataExtractor.Formats.Jpeg;
using MetadataExtractor.Formats.Tiff;
using MetadataExtractor.IO;

using MetadataExtractor.Util;
#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
Expand All @@ -22,27 +21,24 @@ namespace MetadataExtractor.Formats.Exif
/// <see cref="GpsDirectory"/>, camera makernote directories and more.
/// </summary>
/// <author>Drew Noakes https://drewnoakes.com</author>
public sealed class ExifReader : IJpegSegmentMetadataReader
public sealed class ExifReader : JpegSegmentWithPreambleMetadataReader
{
/// <summary>Exif data stored in JPEG files' APP1 segment are preceded by this six character preamble "Exif\0\0".</summary>
public const string JpegSegmentPreamble = "Exif\x0\x0";

ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.App1 };
private static readonly byte[] _preambleBytes = Encoding.ASCII.GetBytes(JpegSegmentPreamble);

public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
{
return segments
.Where(segment => StartsWithJpegExifPreamble(segment.Bytes))
.SelectMany(segment => Extract(new ByteArrayReader(segment.Bytes, baseOffset: JpegSegmentPreamble.Length)))
.ToList();
}
public static bool StartsWithJpegExifPreamble(byte[] bytes) => bytes.StartsWith(_preambleBytes);

/// <summary>
/// Indicates whether <paramref name="bytes"/> starts with <see cref="JpegSegmentPreamble"/>.
/// </summary>
public static bool StartsWithJpegExifPreamble(byte[] bytes)
public static int JpegSegmentPreambleLength => _preambleBytes.Length;

/// <summary>Exif data stored in JPEG files' APP1 segment are preceded by this six character preamble "Exif\0\0".</summary>
protected override byte[] PreambleBytes { get; } = _preambleBytes;

public override ICollection<JpegSegmentType> SegmentTypes { get; } = new[] { JpegSegmentType.App1 };

protected override IEnumerable<Directory> Extract(byte[] segmentBytes, int preambleLength)
{
return bytes.Length >= JpegSegmentPreamble.Length && Encoding.UTF8.GetString(bytes, 0, JpegSegmentPreamble.Length) == JpegSegmentPreamble;
return Extract(new ByteArrayReader(segmentBytes, baseOffset: preambleLength));
}

/// <summary>
Expand Down
28 changes: 4 additions & 24 deletions MetadataExtractor/Formats/Icc/IccReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@
using MetadataExtractor.IO;
using MetadataExtractor.Util;

#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
using DirectoryList = System.Collections.Generic.IReadOnlyList<MetadataExtractor.Directory>;
#endif

namespace MetadataExtractor.Formats.Icc
{
/// <summary>Reads ICC profile data.</summary>
Expand All @@ -35,17 +29,17 @@ public sealed class IccReader : IJpegSegmentMetadataReader
// NOTE the header is 14 bytes, while "ICC_PROFILE" is 11
private const int JpegSegmentPreambleLength = 14;

ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.App2 };
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes { get; } = new[] { JpegSegmentType.App2 };

public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
public IEnumerable<Directory> ReadJpegSegments(IEnumerable<JpegSegment> segments)
{
// ICC data can be spread across multiple JPEG segments.

// Skip any segments that do not contain the required preamble
var iccSegments = segments.Where(segment => segment.Bytes.Length > JpegSegmentPreambleLength && IsSubarrayEqualTo(segment.Bytes, 0, _jpegSegmentPreambleBytes)).ToList();
var iccSegments = segments.Where(segment => segment.Bytes.Length > JpegSegmentPreambleLength && segment.Bytes.StartsWith(_jpegSegmentPreambleBytes)).ToList();

if (iccSegments.Count == 0)
return new Directory[0];
return Enumerable.Empty<Directory>();

byte[] buffer;
if (iccSegments.Count == 1)
Expand Down Expand Up @@ -177,19 +171,5 @@ public static string GetStringFromUInt32(uint d)

return Encoding.UTF8.GetString(b, 0, b.Length);
}

private static bool IsSubarrayEqualTo<T>(T[] source, int sourceIndex, T[] pattern) where T : notnull
{
if (sourceIndex + pattern.Length >= source.Length)
return false;

for (int i = sourceIndex, j = 0; j < pattern.Length; i++, j++)
{
if (!source[i].Equals(pattern[j]))
return false;
}

return true;
}
}
}
16 changes: 3 additions & 13 deletions MetadataExtractor/Formats/Iptc/IptcReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@
using MetadataExtractor.Formats.Jpeg;
using MetadataExtractor.IO;

#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
using DirectoryList = System.Collections.Generic.IReadOnlyList<MetadataExtractor.Directory>;
#endif

namespace MetadataExtractor.Formats.Iptc
{
/// <summary>Reads IPTC data.</summary>
Expand Down Expand Up @@ -43,18 +37,14 @@ public sealed class IptcReader : IJpegSegmentMetadataReader

private const byte IptcMarkerByte = 0x1c;

ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.AppD };
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes { get; } = new[] { JpegSegmentType.AppD };

public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
public IEnumerable<Directory> ReadJpegSegments(IEnumerable<JpegSegment> segments)
{
// Ensure data starts with the IPTC marker byte
return segments
.Where(segment => segment.Bytes.Length != 0 && segment.Bytes[0] == IptcMarkerByte)
.Select(segment => Extract(new SequentialByteArrayReader(segment.Bytes), segment.Bytes.Length))
#if NET35
.Cast<Directory>()
#endif
.ToList();
.Select(segment => (Directory)Extract(new SequentialByteArrayReader(segment.Bytes), segment.Bytes.Length));
}

/// <summary>Reads IPTC values and returns them in an <see cref="IptcDirectory"/>.</summary>
Expand Down
24 changes: 6 additions & 18 deletions MetadataExtractor/Formats/Jfif/JfifReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,10 @@

using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using MetadataExtractor.Formats.Jpeg;
using MetadataExtractor.IO;

#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
using DirectoryList = System.Collections.Generic.IReadOnlyList<MetadataExtractor.Directory>;
#endif

namespace MetadataExtractor.Formats.Jfif
{
/// <summary>Reads JFIF (JPEG File Interchange Format) data.</summary>
Expand All @@ -24,22 +17,17 @@ namespace MetadataExtractor.Formats.Jfif
/// </list>
/// </remarks>
/// <author>Yuri Binev, Drew Noakes, Markus Meyer</author>
public sealed class JfifReader : IJpegSegmentMetadataReader
public sealed class JfifReader : JpegSegmentWithPreambleMetadataReader
{
public const string JpegSegmentPreamble = "JFIF";

ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.App0 };
protected override byte[] PreambleBytes { get; } = Encoding.ASCII.GetBytes(JpegSegmentPreamble);

public override ICollection<JpegSegmentType> SegmentTypes { get; } = new[] { JpegSegmentType.App0 };

public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
protected override IEnumerable<Directory> Extract(byte[] segmentBytes, int preambleLength)
{
// Skip segments not starting with the required header
return segments
.Where(segment => segment.Bytes.Length >= JpegSegmentPreamble.Length && JpegSegmentPreamble == Encoding.UTF8.GetString(segment.Bytes, 0, JpegSegmentPreamble.Length))
.Select(segment => Extract(new ByteArrayReader(segment.Bytes)))
#if NET35
.Cast<Directory>()
#endif
.ToList();
yield return Extract(new ByteArrayReader(segmentBytes));
}

/// <summary>Reads JFIF values and returns them in an <see cref="JfifDirectory"/>.</summary>
Expand Down
24 changes: 6 additions & 18 deletions MetadataExtractor/Formats/Jfxx/JfxxReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,10 @@

using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using MetadataExtractor.Formats.Jpeg;
using MetadataExtractor.IO;

#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
using DirectoryList = System.Collections.Generic.IReadOnlyList<MetadataExtractor.Directory>;
#endif

namespace MetadataExtractor.Formats.Jfxx
{
/// <summary>Reads JFXX (JFIF Extensions) data.</summary>
Expand All @@ -24,22 +17,17 @@ namespace MetadataExtractor.Formats.Jfxx
/// </list>
/// </remarks>
/// <author>Drew Noakes</author>
public sealed class JfxxReader : IJpegSegmentMetadataReader
public sealed class JfxxReader : JpegSegmentWithPreambleMetadataReader
{
public const string JpegSegmentPreamble = "JFXX";

ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.App0 };
protected override byte[] PreambleBytes { get; } = Encoding.ASCII.GetBytes(JpegSegmentPreamble);

public override ICollection<JpegSegmentType> SegmentTypes { get; } = new[] { JpegSegmentType.App0 };

public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
protected override IEnumerable<Directory> Extract(byte[] segmentBytes, int preambleLength)
{
// Skip segments not starting with the required header
return segments
.Where(segment => segment.Bytes.Length >= JpegSegmentPreamble.Length && JpegSegmentPreamble == Encoding.UTF8.GetString(segment.Bytes, 0, JpegSegmentPreamble.Length))
.Select(segment => Extract(new ByteArrayReader(segment.Bytes)))
#if NET35
.Cast<Directory>()
#endif
.ToList();
yield return Extract(new ByteArrayReader(segmentBytes));
}

/// <summary>Reads JFXX values and returns them in an <see cref="JfxxDirectory"/>.</summary>
Expand Down
8 changes: 1 addition & 7 deletions MetadataExtractor/Formats/Jpeg/IJpegSegmentMetadataReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,6 @@

using System.Collections.Generic;

#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
using DirectoryList = System.Collections.Generic.IReadOnlyList<MetadataExtractor.Directory>;
#endif

namespace MetadataExtractor.Formats.Jpeg
{
/// <summary>Defines an object that extracts metadata from in JPEG segments.</summary>
Expand All @@ -20,6 +14,6 @@ public interface IJpegSegmentMetadataReader
/// <param name="segments">
/// A sequence of JPEG segments from which the metadata should be extracted. These are in the order encountered in the original file.
/// </param>
DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments);
IEnumerable<Directory> ReadJpegSegments(IEnumerable<JpegSegment> segments);
}
}
16 changes: 3 additions & 13 deletions MetadataExtractor/Formats/Jpeg/JpegCommentReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,20 @@
using System.Linq;
using System.Text;

#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
using DirectoryList = System.Collections.Generic.IReadOnlyList<MetadataExtractor.Directory>;
#endif

namespace MetadataExtractor.Formats.Jpeg
{
/// <summary>Reads JPEG comments.</summary>
/// <remarks>JPEG files can store zero or more comments in COM segments.</remarks>
/// <author>Drew Noakes https://drewnoakes.com</author>
public sealed class JpegCommentReader : IJpegSegmentMetadataReader
{
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.Com };
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes { get; } = new[] { JpegSegmentType.Com };

/// <summary>Reads JPEG comments, returning each in a <see cref="JpegCommentDirectory"/>.</summary>
public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
public IEnumerable<Directory> ReadJpegSegments(IEnumerable<JpegSegment> segments)
{
// The entire contents of the segment are the comment
return segments.Select(segment => new JpegCommentDirectory(new StringValue(segment.Bytes, Encoding.UTF8)))
#if NET35
.Cast<Directory>()
#endif
.ToList();
return segments.Select(segment => (Directory)new JpegCommentDirectory(new StringValue(segment.Bytes, Encoding.UTF8)));
}
}
}
9 changes: 2 additions & 7 deletions MetadataExtractor/Formats/Jpeg/JpegDhtReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
using System.Collections.Generic;
using System.IO;
using MetadataExtractor.IO;
#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
using DirectoryList = System.Collections.Generic.IReadOnlyList<MetadataExtractor.Directory>;
#endif

namespace MetadataExtractor.Formats.Jpeg
{
Expand All @@ -17,9 +12,9 @@ namespace MetadataExtractor.Formats.Jpeg
/// <author>Kevin Mott https://github.com/kwhopper</author>
public sealed class JpegDhtReader : IJpegSegmentMetadataReader
{
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.Dht };
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes { get; } = new[] { JpegSegmentType.Dht };

public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
public IEnumerable<Directory> ReadJpegSegments(IEnumerable<JpegSegment> segments)
{
// This Extract structure is a little different since we only want
// to return one HuffmanTablesDirectory for one-to-many segments
Expand Down
15 changes: 3 additions & 12 deletions MetadataExtractor/Formats/Jpeg/JpegDnlReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@
using System.IO;
using System.Linq;
using MetadataExtractor.IO;
#if NET35
using DirectoryList = System.Collections.Generic.IList<MetadataExtractor.Directory>;
#else
using DirectoryList = System.Collections.Generic.IReadOnlyList<MetadataExtractor.Directory>;
#endif

namespace MetadataExtractor.Formats.Jpeg
{
Expand All @@ -19,15 +14,11 @@ namespace MetadataExtractor.Formats.Jpeg
/// <author>Kevin Mott https://github.com/kwhopper</author>
public sealed class JpegDnlReader : IJpegSegmentMetadataReader
{
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes => new[] { JpegSegmentType.Dnl };
ICollection<JpegSegmentType> IJpegSegmentMetadataReader.SegmentTypes { get; } = new[] { JpegSegmentType.Dnl };

public DirectoryList ReadJpegSegments(IEnumerable<JpegSegment> segments)
public IEnumerable<Directory> ReadJpegSegments(IEnumerable<JpegSegment> segments)
{
return segments.Select(segment => Extract(new SequentialByteArrayReader(segment.Bytes)))
#if NET35
.Cast<Directory>()
#endif
.ToList();
return segments.Select(segment => (Directory)Extract(new SequentialByteArrayReader(segment.Bytes)));
}

public JpegDnlDirectory Extract(SequentialReader reader)
Expand Down
Loading