Skip to content

Commit

Permalink
Merge pull request #727 from Erior/feature/UncompressedZipExtractall
Browse files Browse the repository at this point in the history
Implement Searching Data Descriptor stream issue/pull #680
  • Loading branch information
adamhathcock authored Mar 2, 2023
2 parents d76a473 + f60728b commit 42ddb0d
Show file tree
Hide file tree
Showing 9 changed files with 241 additions and 52 deletions.
6 changes: 3 additions & 3 deletions src/SharpCompress/Archives/Zip/ZipArchive.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ public static bool IsZipFile(FileInfo fileInfo, string? password = null)

public static bool IsZipFile(Stream stream, string? password = null)
{
var headerFactory = new StreamingZipHeaderFactory(password, new ArchiveEncoding());
var headerFactory = new StreamingZipHeaderFactory(password, new ArchiveEncoding(), null);
try
{
var header = headerFactory
Expand All @@ -157,7 +157,7 @@ public static bool IsZipFile(Stream stream, string? password = null)

public static bool IsZipMulti(Stream stream, string? password = null)
{
var headerFactory = new StreamingZipHeaderFactory(password, new ArchiveEncoding());
var headerFactory = new StreamingZipHeaderFactory(password, new ArchiveEncoding(), null);
try
{
var header = headerFactory
Expand Down Expand Up @@ -298,6 +298,6 @@ protected override IReader CreateReaderForSolidExtraction()
{
var stream = Volumes.Single().Stream;
stream.Position = 0;
return ZipReader.Open(stream, ReaderOptions);
return ZipReader.Open(stream, ReaderOptions, Entries);
}
}
51 changes: 7 additions & 44 deletions src/SharpCompress/Common/Zip/StreamingZipFilePart.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.IO;
using System.Net.Sockets;
using SharpCompress.Common.Zip.Headers;
using SharpCompress.Compressors.Deflate;
using SharpCompress.IO;
Expand Down Expand Up @@ -36,57 +37,19 @@ internal BinaryReader FixStreamedFileLocation(ref RewindableStream rewindableStr
{
return new BinaryReader(rewindableStream);
}

if (Header.HasData && !Skipped)
{
_decompressionStream ??= GetCompressedStream();

if (Header.CompressionMethod != ZipCompressionMethod.None)
{
_decompressionStream.Skip();
_decompressionStream.Skip();

// If we had TotalIn / TotalOut we could have used them
Header.CompressedSize = _decompressionStream.Position;
// If we had TotalIn / TotalOut we could have used them
Header.CompressedSize = _decompressionStream.Position;

if (_decompressionStream is DeflateStream deflateStream)
{
rewindableStream.Rewind(deflateStream.InputBuffer);
}
}
else
if (_decompressionStream is DeflateStream deflateStream)
{
// We would need to search for the magic word
rewindableStream.Position -= 4;
var pos = rewindableStream.Position;
while (Utility.Find(rewindableStream, new byte[] { 0x50, 0x4b, 0x07, 0x08 }))
{
// We should probably check CRC32 for positive matching as well
var size = rewindableStream.Position - pos;
var br = new BinaryReader(rewindableStream);
br.ReadUInt32();
br.ReadUInt32(); // CRC32
var compressed_size = br.ReadUInt32();
var uncompressed_size = br.ReadUInt32();
var uncompressed_64bit = br.ReadInt64();

var test_64bit = ((long)uncompressed_size << 32) | compressed_size;

if (test_64bit == size && test_64bit == uncompressed_64bit)
{
Header.CompressedSize = test_64bit;
Header.UncompressedSize = uncompressed_64bit;
rewindableStream.Position -= 24;
break;
}

if (compressed_size == size && compressed_size == uncompressed_size)
{
Header.CompressedSize = compressed_size;
Header.UncompressedSize = uncompressed_size;
rewindableStream.Position -= 24;
break;
}
rewindableStream.Position -= 20;
}
rewindableStream.Rewind(deflateStream.InputBuffer);
}

Skipped = true;
Expand Down
22 changes: 20 additions & 2 deletions src/SharpCompress/Common/Zip/StreamingZipHeaderFactory.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
using System.Collections.Generic;
using System.IO;
using System.Linq;
using SharpCompress.Common.Zip.Headers;
using SharpCompress.IO;

namespace SharpCompress.Common.Zip;

internal class StreamingZipHeaderFactory : ZipHeaderFactory
{
internal StreamingZipHeaderFactory(string? password, ArchiveEncoding archiveEncoding)
: base(StreamingMode.Streaming, password, archiveEncoding) { }
private IEnumerable<ZipEntry>? _entries;

internal StreamingZipHeaderFactory(string? password, ArchiveEncoding archiveEncoding, IEnumerable<ZipEntry>? entries)
: base(StreamingMode.Streaming, password, archiveEncoding) {
_entries = entries;
}

internal IEnumerable<ZipHeader> ReadStreamHeader(Stream stream)
{
Expand Down Expand Up @@ -87,6 +92,19 @@ ref rewindableStream
if (header.ZipHeaderType == ZipHeaderType.LocalEntry)
{
var local_header = ((LocalEntryHeader)header);
var dir_header = _entries?.FirstOrDefault(
entry => entry.Key == local_header.Name
&& local_header.CompressedSize == 0
&& local_header.UncompressedSize== 0
&& local_header.Crc == 0
&& local_header.IsDirectory == false);

if(dir_header!=null)
{
local_header.UncompressedSize = dir_header.Size;
local_header.CompressedSize = dir_header.CompressedSize;
local_header.Crc = (uint)dir_header.Crc;
}

// If we have CompressedSize, there is data to be read
if (local_header.CompressedSize > 0)
Expand Down
12 changes: 11 additions & 1 deletion src/SharpCompress/Common/Zip/ZipFilePart.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,17 @@ protected Stream CreateDecompressionStream(Stream stream, ZipCompressionMethod m
{
case ZipCompressionMethod.None:
{
return stream;
if( stream is ReadOnlySubStream )
{
return stream;
}

if( Header.CompressedSize > 0 )
{
return new ReadOnlySubStream(stream, Header.CompressedSize);
}

return new DataDescriptorStream(stream);
}
case ZipCompressionMethod.Deflate:
{
Expand Down
149 changes: 149 additions & 0 deletions src/SharpCompress/IO/DataDescriptorStream.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
using System;
using System.IO;
using System.Runtime.CompilerServices;

namespace SharpCompress.IO;

public class DataDescriptorStream : Stream
{
private readonly Stream _stream;
private long _start;
private int _search_position;
private bool _isDisposed;
private bool _done;

private static byte[] DataDescriptorMarker = new byte[] { 0x50, 0x4b, 0x07, 0x08 };
private static long DataDescriptorSize = 24;

public DataDescriptorStream(Stream stream)
{
_stream = stream;
_start = _stream.Position;
_search_position = 0;
_done = false;
}

internal bool IsRecording { get; private set; }

protected override void Dispose(bool disposing)
{
if (_isDisposed)
{
return;
}
_isDisposed = true;
base.Dispose(disposing);
if (disposing)
{
_stream.Dispose();
}
}

public override bool CanRead => true;

public override bool CanSeek => _stream.CanSeek;

public override bool CanWrite => false;

public override void Flush() => throw new NotSupportedException();

public override long Length => _stream.Length;

public override long Position
{
get => _stream.Position;
set => _stream.Position = value;
}

private bool validate_data_descriptor(Stream stream, long size)
{
var br = new BinaryReader(stream);
br.ReadUInt32();
br.ReadUInt32(); // CRC32 can be checked if we calculate it
var compressed_size = br.ReadUInt32();
var uncompressed_size = br.ReadUInt32();
var uncompressed_64bit = br.ReadInt64();

stream.Position -= DataDescriptorSize;

var test_64bit = ((long)uncompressed_size << 32) | compressed_size;

if (test_64bit == size && test_64bit == uncompressed_64bit)
{
return true;
}

if (compressed_size == size && compressed_size == uncompressed_size)
{
return true;
}

return false;
}

public override int Read(byte[] buffer, int offset, int count)
{
if (count == 0 || _done)
{
return 0;
}

int read = _stream.Read(buffer, offset, count);

for( int i = 0; i < read; i++)
{
if (buffer[offset + i] == DataDescriptorMarker[_search_position] )
{
_search_position++;

if (_search_position == 4)
{
_search_position = 0;

if ( read - i > DataDescriptorSize)
{
var check = new MemoryStream(buffer, offset + i - 3, (int)DataDescriptorSize);
_done = validate_data_descriptor(check, _stream.Position - read + i - 3 - _start);

if( _done )
{
_stream.Position = _stream.Position - read + i - 3;

return i - 3;
}
}
else
{
_stream.Position = _stream.Position - read + i - 3;

_done = validate_data_descriptor(_stream, _stream.Position - _start);

return i - 3;
}
}
}
else
{
_search_position = 0;
}
}

if(_search_position > 0)
{
read -= _search_position;
_stream.Position -= _search_position;
_search_position = 0;
}

return read;
}

public override long Seek(long offset, SeekOrigin origin) =>
throw new NotSupportedException();

public override void SetLength(long value) =>
throw new NotSupportedException();

public override void Write(byte[] buffer, int offset, int count) =>
throw new NotSupportedException();
}
1 change: 1 addition & 0 deletions src/SharpCompress/IO/ReadOnlySubStream.cs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ public override int Read(Span<byte> buffer)
if (read > 0)
{
BytesLeftToRead -= read;
_position += read;
}
return read;
}
Expand Down
14 changes: 13 additions & 1 deletion src/SharpCompress/Readers/Zip/ZipReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ public class ZipReader : AbstractReader<ZipEntry, ZipVolume>
private ZipReader(Stream stream, ReaderOptions options) : base(options, ArchiveType.Zip)
{
Volume = new ZipVolume(stream, options);
_headerFactory = new StreamingZipHeaderFactory(options.Password, options.ArchiveEncoding);
_headerFactory = new StreamingZipHeaderFactory(options.Password, options.ArchiveEncoding, null);
}

private ZipReader(Stream stream, ReaderOptions options, IEnumerable<ZipEntry> entries) :base(options, ArchiveType.Zip)
{
Volume = new ZipVolume(stream, options);
_headerFactory = new StreamingZipHeaderFactory(options.Password, options.ArchiveEncoding, entries);
}

public override ZipVolume Volume { get; }
Expand All @@ -32,6 +38,12 @@ public static ZipReader Open(Stream stream, ReaderOptions? options = null)
return new ZipReader(stream, options ?? new ReaderOptions());
}

public static ZipReader Open(Stream stream, ReaderOptions? options, IEnumerable<ZipEntry> entries)
{
stream.CheckNotNull(nameof(stream));
return new ZipReader(stream, options ?? new ReaderOptions(), entries);
}

#endregion Open

protected override IEnumerable<ZipEntry> GetEntries(Stream stream)
Expand Down
23 changes: 23 additions & 0 deletions tests/SharpCompress.Test/Zip/ZipArchiveTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,29 @@ public void Zip_Zip64_CompressedSizeExtraOnly_Read()
}
}

[Fact]
public void Zip_Uncompressed_Read_All()
{
string zipPath = Path.Combine(TEST_ARCHIVES_PATH, "Zip.uncompressed.zip");
using (var stream = File.Open(zipPath, FileMode.Open, FileAccess.Read))
{
IArchive archive = ArchiveFactory.Open(stream);
IReader reader = archive.ExtractAllEntries();
int entries = 0;
while (reader.MoveToNextEntry())
{
using (var entryStream = reader.OpenEntryStream())
using (var target = new MemoryStream())
{
entryStream.CopyTo(target);
}

entries++;
}
Assert.Equal(4, entries);
}
}

[Fact]
public void Zip_Uncompressed_Skip_All()
{
Expand Down
15 changes: 14 additions & 1 deletion tests/SharpCompress.Test/Zip/ZipReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,20 @@ public void Issue_685()
}

[Fact]
public void Zip_Uncompressed_Skip_All()
public void Zip_ReaderFactory_Uncompressed_Read_All()
{
var zipPath = Path.Combine(TEST_ARCHIVES_PATH, "Zip.uncompressed.zip");
using var stream = File.Open(zipPath, FileMode.Open, FileAccess.Read);
using var reader = ReaderFactory.Open(stream);
while (reader.MoveToNextEntry())
{
var target = new MemoryStream();
reader.OpenEntryStream().CopyTo(target);
}
}

[Fact]
public void Zip_ReaderFactory_Uncompressed_Skip_All()
{
var zipPath = Path.Combine(TEST_ARCHIVES_PATH, "Zip.uncompressed.zip");
using var stream = File.Open(zipPath, FileMode.Open, FileAccess.Read);
Expand Down

0 comments on commit 42ddb0d

Please sign in to comment.