diff --git a/src/System.IO.Compression/src/System.IO.Compression.csproj b/src/System.IO.Compression/src/System.IO.Compression.csproj index 9fe201ce6db7..b81b48261a00 100644 --- a/src/System.IO.Compression/src/System.IO.Compression.csproj +++ b/src/System.IO.Compression/src/System.IO.Compression.csproj @@ -31,6 +31,23 @@ + + + + + + + + + + + + + + + + + Common\System\IO\PathInternal.cs @@ -39,12 +56,12 @@ - - - - - + + + + + @@ -55,7 +72,7 @@ - + Common\Interop\Windows\Interop.Libraries.cs @@ -63,7 +80,7 @@ - + diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/BlockType.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/BlockType.cs new file mode 100644 index 000000000000..4f39bb835e2c --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/BlockType.cs @@ -0,0 +1,13 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.IO.Compression +{ + internal enum BlockType + { + Uncompressed = 0, + Static = 1, + Dynamic = 2 + } +} diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/CompressionMode.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/CompressionMode.cs new file mode 100644 index 000000000000..08a4ac0a6b10 --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/CompressionMode.cs @@ -0,0 +1,12 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace System.IO.Compression +{ + public enum CompressionMode + { + Decompress = 0, + Compress = 1 + } +} + diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/CopyEncoder.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/CopyEncoder.cs new file mode 100644 index 000000000000..84a47f159cda --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/CopyEncoder.cs @@ -0,0 +1,77 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.IO.Compression +{ + internal class CopyEncoder + { + // padding for copy encoder formatting + // - 1 byte for header + // - 4 bytes for len, nlen + private const int PaddingSize = 5; + + // max uncompressed deflate block size is 64K. + private const int MaxUncompressedBlockSize = 65536; + + + // null input means write an empty payload with formatting info. This is needed for the final block. + public void GetBlock(DeflateInput input, OutputBuffer output, bool isFinal) + { + Debug.Assert(output != null); + Debug.Assert(output.FreeBytes >= PaddingSize); + + // determine number of bytes to write + int count = 0; + if (input != null) + { + // allow space for padding and bits not yet flushed to buffer + count = Math.Min(input.Count, output.FreeBytes - PaddingSize - output.BitsInBuffer); + + // we don't expect the output buffer to ever be this big (currently 4K), but we'll check this + // just in case that changes. + if (count > MaxUncompressedBlockSize - PaddingSize) + { + count = MaxUncompressedBlockSize - PaddingSize; + } + } + + // write header and flush bits + if (isFinal) + { + output.WriteBits(FastEncoderStatics.BFinalNoCompressionHeaderBitCount, + FastEncoderStatics.BFinalNoCompressionHeader); + } + else + { + output.WriteBits(FastEncoderStatics.NoCompressionHeaderBitCount, + FastEncoderStatics.NoCompressionHeader); + } + + // now we're aligned + output.FlushBits(); + + // write len, nlen + WriteLenNLen((ushort)count, output); + + // write uncompressed bytes + if (input != null && count > 0) + { + output.WriteBytes(input.Buffer, input.StartIndex, count); + input.ConsumeBytes(count); + } + } + + private void WriteLenNLen(ushort len, OutputBuffer output) + { + // len + output.WriteUInt16(len); + + // nlen + ushort onesComp = (ushort)(~(ushort)len); + output.WriteUInt16(onesComp); + } + } +} \ No newline at end of file diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/DeflateInput.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/DeflateInput.cs new file mode 100644 index 000000000000..826e4caceb3b --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/DeflateInput.cs @@ -0,0 +1,79 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.IO.Compression +{ + internal class DeflateInput + { + private byte[] _buffer; + private int _count; + private int _startIndex; + + internal byte[] Buffer + { + get + { + return _buffer; + } + set + { + _buffer = value; + } + } + + internal int Count + { + get + { + return _count; + } + set + { + _count = value; + } + } + + internal int StartIndex + { + get + { + return _startIndex; + } + set + { + _startIndex = value; + } + } + + internal void ConsumeBytes(int n) + { + Debug.Assert(n <= _count, "Should use more bytes than what we have in the buffer"); + _startIndex += n; + _count -= n; + Debug.Assert(_startIndex + _count <= _buffer.Length, "Input buffer is in invalid state!"); + } + + internal InputState DumpState() + { + InputState savedState; + savedState.count = _count; + savedState.startIndex = _startIndex; + return savedState; + } + + internal void RestoreState(InputState state) + { + _count = state.count; + _startIndex = state.startIndex; + } + + internal struct InputState + { + internal int count; + internal int startIndex; + } + } +} diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/DeflateManagedStream.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/DeflateManagedStream.cs new file mode 100644 index 000000000000..346d52210ddd --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/DeflateManagedStream.cs @@ -0,0 +1,613 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; +using System.Diagnostics.Contracts; +using System.Runtime.CompilerServices; +using System.Threading; +using System.Threading.Tasks; + +namespace System.IO.Compression +{ + internal partial class DeflateManagedStream : Stream + { + internal const int DefaultBufferSize = 8192; + + private Stream _stream; + private CompressionMode _mode; + private bool _leaveOpen; + private InflaterManaged _inflater; + private DeflaterManaged _deflater; + private byte[] _buffer; + + private int _asyncOperations; + + private IFileFormatWriter _formatWriter; + private bool _wroteHeader; + private bool _wroteBytes; + + public DeflateManagedStream(Stream stream, CompressionMode mode) : this(stream, mode, false) + { + } + + // Since a reader is being taken, CompressionMode.Decompress is implied + internal DeflateManagedStream(Stream stream, bool leaveOpen, IFileFormatReader reader) + { + Debug.Assert(reader != null, "The IFileFormatReader passed to the internal DeflateStream constructor must be non-null"); + if (stream == null) + throw new ArgumentNullException(nameof(stream)); + if (!stream.CanRead) + throw new ArgumentException(SR.NotSupported_UnreadableStream, nameof(stream)); + + InitializeInflater(stream, leaveOpen, reader); + } + + // A specific constructor to allow decompression of Deflate64 + internal DeflateManagedStream(Stream stream, ZipArchiveEntry.CompressionMethodValues method) + { + if (stream == null) + throw new ArgumentNullException(nameof(stream)); + if (!stream.CanRead) + throw new ArgumentException(SR.NotSupported_UnreadableStream, nameof(stream)); + + InitializeInflater(stream, false, null, method); + } + + public DeflateManagedStream(Stream stream, CompressionMode mode, bool leaveOpen) + { + if (stream == null) + throw new ArgumentNullException(nameof(stream)); + + switch (mode) + { + case CompressionMode.Decompress: + InitializeInflater(stream, leaveOpen); + break; + + case CompressionMode.Compress: + InitializeDeflater(stream, leaveOpen, CompressionLevel.Optimal); + break; + + default: + throw new ArgumentException(SR.ArgumentOutOfRange_Enum, nameof(mode)); + } + } + + // Implies mode = Compress + public DeflateManagedStream(Stream stream, CompressionLevel compressionLevel) : this(stream, compressionLevel, false) + { + } + + // Implies mode = Compress + public DeflateManagedStream(Stream stream, CompressionLevel compressionLevel, bool leaveOpen) + { + if (stream == null) + throw new ArgumentNullException(nameof(stream)); + + InitializeDeflater(stream, leaveOpen, compressionLevel); + } + + + /// + /// Sets up this DeflateManagedStream to be used for Inflation/Decompression + /// + internal void InitializeInflater(Stream stream, bool leaveOpen, IFileFormatReader reader = null, ZipArchiveEntry.CompressionMethodValues method = ZipArchiveEntry.CompressionMethodValues.Deflate) + { + Debug.Assert(stream != null); + Debug.Assert(method == ZipArchiveEntry.CompressionMethodValues.Deflate || method == ZipArchiveEntry.CompressionMethodValues.Deflate64); + if (!stream.CanRead) + throw new ArgumentException(SR.NotSupported_UnreadableStream, nameof(stream)); + + _inflater = new InflaterManaged(reader, method == ZipArchiveEntry.CompressionMethodValues.Deflate64 ? true : false); + + _stream = stream; + _mode = CompressionMode.Decompress; + _leaveOpen = leaveOpen; + _buffer = new byte[DefaultBufferSize]; + } + + /// + /// Sets up this DeflateManagedStream to be used for Deflation/Compression + /// + internal void InitializeDeflater(Stream stream, bool leaveOpen, CompressionLevel compressionLevel) + { + Debug.Assert(stream != null); + if (!stream.CanWrite) + throw new ArgumentException(SR.NotSupported_UnwritableStream, nameof(stream)); + + _deflater = new DeflaterManaged(); + + _stream = stream; + _mode = CompressionMode.Compress; + _leaveOpen = leaveOpen; + _buffer = new byte[DefaultBufferSize]; + } + + internal void SetFileFormatWriter(IFileFormatWriter writer) + { + if (writer != null) + { + _formatWriter = writer; + } + } + + public Stream BaseStream + { + get + { + return _stream; + } + } + + public override bool CanRead + { + get + { + if (_stream == null) + { + return false; + } + + return (_mode == CompressionMode.Decompress && _stream.CanRead); + } + } + + public override bool CanWrite + { + get + { + if (_stream == null) + { + return false; + } + + return (_mode == CompressionMode.Compress && _stream.CanWrite); + } + } + + public override bool CanSeek + { + get + { + return false; + } + } + + public override long Length + { + get + { + throw new NotSupportedException(SR.NotSupported); + } + } + + public override long Position + { + get + { + throw new NotSupportedException(SR.NotSupported); + } + + set + { + throw new NotSupportedException(SR.NotSupported); + } + } + + public override void Flush() + { + EnsureNotDisposed(); + } + + public override Task FlushAsync(CancellationToken cancellationToken) + { + EnsureNotDisposed(); + return cancellationToken.IsCancellationRequested ? + Task.FromCanceled(cancellationToken) : + Task.CompletedTask; + } + + public override long Seek(long offset, SeekOrigin origin) + { + throw new NotSupportedException(SR.NotSupported); + } + + public override void SetLength(long value) + { + throw new NotSupportedException(SR.NotSupported); + } + + public override int Read(byte[] array, int offset, int count) + { + EnsureDecompressionMode(); + ValidateParameters(array, offset, count); + EnsureNotDisposed(); + + int bytesRead; + int currentOffset = offset; + int remainingCount = count; + + while (true) + { + bytesRead = _inflater.Inflate(array, currentOffset, remainingCount); + currentOffset += bytesRead; + remainingCount -= bytesRead; + + if (remainingCount == 0) + { + break; + } + + if (_inflater.Finished()) + { + // if we finished decompressing, we can't have anything left in the outputwindow. + Debug.Assert(_inflater.AvailableOutput == 0, "We should have copied all stuff out!"); + break; + } + + int bytes = _stream.Read(_buffer, 0, _buffer.Length); + if (bytes <= 0) + { + break; + } + else if (bytes > _buffer.Length) + { + // The stream is either malicious or poorly implemented and returned a number of + // bytes larger than the buffer supplied to it. + throw new InvalidDataException(SR.GenericInvalidData); + } + + _inflater.SetInput(_buffer, 0, bytes); + } + + return count - remainingCount; + } + + private void ValidateParameters(byte[] array, int offset, int count) + { + if (array == null) + throw new ArgumentNullException(nameof(array)); + + if (offset < 0) + throw new ArgumentOutOfRangeException(nameof(offset)); + + if (count < 0) + throw new ArgumentOutOfRangeException(nameof(count)); + + if (array.Length - offset < count) + throw new ArgumentException(SR.InvalidArgumentOffsetCount); + } + + private void EnsureNotDisposed() + { + if (_stream == null) + ThrowStreamClosedException(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ThrowStreamClosedException() + { + throw new ObjectDisposedException(null, SR.ObjectDisposed_StreamClosed); + } + + private void EnsureDecompressionMode() + { + if (_mode != CompressionMode.Decompress) + ThrowCannotReadFromDeflateManagedStreamException(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ThrowCannotReadFromDeflateManagedStreamException() + { + throw new InvalidOperationException(SR.CannotReadFromDeflateStream); + } + + private void EnsureCompressionMode() + { + if (_mode != CompressionMode.Compress) + ThrowCannotWriteToDeflateManagedStreamException(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ThrowCannotWriteToDeflateManagedStreamException() + { + throw new InvalidOperationException(SR.CannotWriteToDeflateStream); + } + +#if netstandard17 + public override IAsyncResult BeginRead(byte[] buffer, int offset, int count, AsyncCallback asyncCallback, object asyncState) => + TaskToApm.Begin(ReadAsync(buffer, offset, count, CancellationToken.None), asyncCallback, asyncState); + + public override int EndRead(IAsyncResult asyncResult) => + TaskToApm.End(asyncResult); +#endif + + public override Task ReadAsync(Byte[] array, int offset, int count, CancellationToken cancellationToken) + { + EnsureDecompressionMode(); + + // We use this checking order for compat to earlier versions: + if (_asyncOperations != 0) + throw new InvalidOperationException(SR.InvalidBeginCall); + + ValidateParameters(array, offset, count); + EnsureNotDisposed(); + + if (cancellationToken.IsCancellationRequested) + { + return Task.FromCanceled(cancellationToken); + } + + Interlocked.Increment(ref _asyncOperations); + Task readTask = null; + + try + { + // Try to read decompressed data in output buffer + int bytesRead = _inflater.Inflate(array, offset, count); + if (bytesRead != 0) + { + // If decompression output buffer is not empty, return immediately. + return Task.FromResult(bytesRead); + } + + if (_inflater.Finished()) + { + // end of compression stream + return Task.FromResult(0); + } + + // If there is no data on the output buffer and we are not at + // the end of the stream, we need to get more data from the base stream + readTask = _stream.ReadAsync(_buffer, 0, _buffer.Length, cancellationToken); + if (readTask == null) + { + throw new InvalidOperationException(SR.NotSupported_UnreadableStream); + } + + return ReadAsyncCore(readTask, array, offset, count, cancellationToken); + } + finally + { + // if we haven't started any async work, decrement the counter to end the transaction + if (readTask == null) + { + Interlocked.Decrement(ref _asyncOperations); + } + } + } + + private async Task ReadAsyncCore(Task readTask, byte[] array, int offset, int count, CancellationToken cancellationToken) + { + try + { + while (true) + { + int bytesRead = await readTask.ConfigureAwait(false); + EnsureNotDisposed(); + + if (bytesRead <= 0) + { + // This indicates the base stream has received EOF + return 0; + } + else if (bytesRead > _buffer.Length) + { + // The stream is either malicious or poorly implemented and returned a number of + // bytes larger than the buffer supplied to it. + throw new InvalidDataException(SR.GenericInvalidData); + } + + cancellationToken.ThrowIfCancellationRequested(); + + // Feed the data from base stream into decompression engine + _inflater.SetInput(_buffer, 0, bytesRead); + bytesRead = _inflater.Inflate(array, offset, count); + + if (bytesRead == 0 && !_inflater.Finished()) + { + // We could have read in head information and didn't get any data. + // Read from the base stream again. + readTask = _stream.ReadAsync(_buffer, 0, _buffer.Length, cancellationToken); + if (readTask == null) + { + throw new InvalidOperationException(SR.NotSupported_UnreadableStream); + } + } + else + { + return bytesRead; + } + } + } + finally + { + Interlocked.Decrement(ref _asyncOperations); + } + } + + public override void Write(byte[] array, int offset, int count) + { + // Validate the state and the parameters + EnsureCompressionMode(); + ValidateParameters(array, offset, count); + EnsureNotDisposed(); + DoMaintenance(array, offset, count); + + // Write compressed the bytes we already passed to the deflater: + + WriteDeflaterOutput(); + + // Pass new bytes through deflater and write them too: + + _deflater.SetInput(array, offset, count); + WriteDeflaterOutput(); + } + + + private void WriteDeflaterOutput() + { + while (!_deflater.NeedsInput()) + { + int compressedBytes = _deflater.GetDeflateOutput(_buffer); + if (compressedBytes > 0) + { + _stream.Write(_buffer, 0, compressedBytes); + } + } + } + + // Perform deflate-mode maintenance required due to custom header and footer writers + // (e.g. set by GZipStream): + private void DoMaintenance(byte[] array, int offset, int count) + { + // If no bytes written, do nothing: + if (count <= 0) + return; + + // Note that stream contains more than zero data bytes: + _wroteBytes = true; + + // If no header/footer formatter present, nothing else to do: + if (_formatWriter == null) + return; + + // If formatter has not yet written a header, do it now: + if (!_wroteHeader) + { + byte[] b = _formatWriter.GetHeader(); + _stream.Write(b, 0, b.Length); + _wroteHeader = true; + } + + // Inform formatter of the data bytes written: + _formatWriter.UpdateWithBytesRead(array, offset, count); + } + + // This is called by Dispose: + private void PurgeBuffers(bool disposing) + { + if (!disposing) + return; + + if (_stream == null) + return; + + Flush(); + + if (_mode != CompressionMode.Compress) + return; + + // Some deflaters (e.g. ZLib) write more than zero bytes for zero byte inputs. + // This round-trips and we should be ok with this, but our legacy managed deflater + // always wrote zero output for zero input and upstack code (e.g. ZipArchiveEntry) + // took dependencies on it. Thus, make sure to only "flush" when we actually had + // some input: + if (_wroteBytes) + { + // Compress any bytes left + WriteDeflaterOutput(); + + // Pull out any bytes left inside deflater: + bool finished; + do + { + int compressedBytes; + finished = _deflater.Finish(_buffer, out compressedBytes); + + if (compressedBytes > 0) + _stream.Write(_buffer, 0, compressedBytes); + } while (!finished); + } + else + { + // In case of zero length buffer, we still need to clean up the native created stream before + // the object get disposed because eventually ZLibNative.ReleaseHandle will get called during + // the dispose operation and although it frees the stream but it return error code because the + // stream state was still marked as in use. The symptoms of this problem will not be seen except + // if running any diagnostic tools which check for disposing safe handle objects + bool finished; + do + { + int compressedBytes; + finished = _deflater.Finish(_buffer, out compressedBytes); + } while (!finished); + } + + // Write format footer: + if (_formatWriter != null && _wroteHeader) + { + byte[] b = _formatWriter.GetFooter(); + _stream.Write(b, 0, b.Length); + } + } + + protected override void Dispose(bool disposing) + { + try + { + PurgeBuffers(disposing); + } + finally + { + // Close the underlying stream even if PurgeBuffers threw. + // Stream.Close() may throw here (may or may not be due to the same error). + // In this case, we still need to clean up internal resources, hence the inner finally blocks. + try + { + if (disposing && !_leaveOpen && _stream != null) + _stream.Dispose(); + } + finally + { + _stream = null; + + try + { + if (_deflater != null) + _deflater.Dispose(); + if (_inflater != null) + _inflater.Dispose(); + } + finally + { + _deflater = null; + _inflater = null; + base.Dispose(disposing); + } + } + } + } + + public override Task WriteAsync(Byte[] array, int offset, int count, CancellationToken cancellationToken) + { + EnsureCompressionMode(); + + // We use this checking order for compat to earlier versions: + if (_asyncOperations != 0) + throw new InvalidOperationException(SR.InvalidBeginCall); + + ValidateParameters(array, offset, count); + EnsureNotDisposed(); + + if (cancellationToken.IsCancellationRequested) + return Task.FromCanceled(cancellationToken); + + return WriteAsyncCore(array, offset, count, cancellationToken); + } + + private async Task WriteAsyncCore(Byte[] array, int offset, int count, CancellationToken cancellationToken) + { + Interlocked.Increment(ref _asyncOperations); + try + { + await base.WriteAsync(array, offset, count, cancellationToken).ConfigureAwait(false); + } + finally + { + Interlocked.Decrement(ref _asyncOperations); + } + } + } +} diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/DeflaterManaged.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/DeflaterManaged.cs new file mode 100644 index 000000000000..524a764b26e3 --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/DeflaterManaged.cs @@ -0,0 +1,314 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + + +// +// zlib.h -- interface of the 'zlib' general purpose compression library +// version 1.2.1, November 17th, 2003 +// +// Copyright (C) 1995-2003 Jean-loup Gailly and Mark Adler +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. +// +// + +// Compression engine + +using System; +using System.Diagnostics; +using System.Diagnostics.Contracts; + +namespace System.IO.Compression +{ + internal class DeflaterManaged : IDisposable + { + private const int MinBlockSize = 256; + private const int MaxHeaderFooterGoo = 120; + private const int CleanCopySize = DeflateManagedStream.DefaultBufferSize - MaxHeaderFooterGoo; + private const double BadCompressionThreshold = 1.0; + + private FastEncoder _deflateEncoder; + private CopyEncoder _copyEncoder; + + private DeflateInput _input; + private OutputBuffer _output; + private DeflaterState _processingState; + private DeflateInput _inputFromHistory; + + internal DeflaterManaged() + { + _deflateEncoder = new FastEncoder(); + _copyEncoder = new CopyEncoder(); + _input = new DeflateInput(); + _output = new OutputBuffer(); + + _processingState = DeflaterState.NotStarted; + } + + internal bool NeedsInput() + { + return _input.Count == 0 && _deflateEncoder.BytesInHistory == 0; + } + + // Sets the input to compress. The only buffer copy occurs when the input is copied + // to the FastEncoderWindow + internal void SetInput(byte[] inputBuffer, int startIndex, int count) + { + Debug.Assert(_input.Count == 0, "We have something left in previous input!"); + + _input.Buffer = inputBuffer; + _input.Count = count; + _input.StartIndex = startIndex; + + if (count > 0 && count < MinBlockSize) + { + // user is writing small buffers. If buffer size is below MinBlockSize, we + // need to switch to a small data mode, to avoid block headers and footers + // dominating the output. + switch (_processingState) + { + case DeflaterState.NotStarted: + case DeflaterState.CheckingForIncompressible: + // clean states, needs a block header first + _processingState = DeflaterState.StartingSmallData; + break; + case DeflaterState.CompressThenCheck: + // already has correct block header + _processingState = DeflaterState.HandlingSmallData; + break; + } + } + } + + internal int GetDeflateOutput(byte[] outputBuffer) + { + Debug.Assert(outputBuffer != null, "Can't pass in a null output buffer!"); + Debug.Assert(!NeedsInput(), "GetDeflateOutput should only be called after providing input"); + + _output.UpdateBuffer(outputBuffer); + + switch (_processingState) + { + case DeflaterState.NotStarted: + { + // first call. Try to compress but if we get bad compression ratio, switch to uncompressed blocks. + Debug.Assert(_deflateEncoder.BytesInHistory == 0, "have leftover bytes in window"); + + // save these in case we need to switch to uncompressed format + DeflateInput.InputState initialInputState = _input.DumpState(); + OutputBuffer.BufferState initialOutputState = _output.DumpState(); + + _deflateEncoder.GetBlockHeader(_output); + _deflateEncoder.GetCompressedData(_input, _output); + + if (!UseCompressed(_deflateEncoder.LastCompressionRatio)) + { + // we're expanding; restore state and switch to uncompressed + _input.RestoreState(initialInputState); + _output.RestoreState(initialOutputState); + _copyEncoder.GetBlock(_input, _output, false); + FlushInputWindows(); + _processingState = DeflaterState.CheckingForIncompressible; + } + else + { + _processingState = DeflaterState.CompressThenCheck; + } + + break; + } + case DeflaterState.CompressThenCheck: + { + // continue assuming data is compressible. If we reach data that indicates otherwise + // finish off remaining data in history and decide whether to compress on a + // block-by-block basis + _deflateEncoder.GetCompressedData(_input, _output); + + if (!UseCompressed(_deflateEncoder.LastCompressionRatio)) + { + _processingState = DeflaterState.SlowDownForIncompressible1; + _inputFromHistory = _deflateEncoder.UnprocessedInput; + } + break; + } + case DeflaterState.SlowDownForIncompressible1: + { + // finish off previous compressed block + _deflateEncoder.GetBlockFooter(_output); + + _processingState = DeflaterState.SlowDownForIncompressible2; + goto case DeflaterState.SlowDownForIncompressible2; // yeah I know, but there's no fallthrough + } + + case DeflaterState.SlowDownForIncompressible2: + { + // clear out data from history, but add them as uncompressed blocks + if (_inputFromHistory.Count > 0) + { + _copyEncoder.GetBlock(_inputFromHistory, _output, false); + } + + if (_inputFromHistory.Count == 0) + { + // now we're clean + _deflateEncoder.FlushInput(); + _processingState = DeflaterState.CheckingForIncompressible; + } + break; + } + + case DeflaterState.CheckingForIncompressible: + { + // decide whether to compress on a block-by-block basis + Debug.Assert(_deflateEncoder.BytesInHistory == 0, "have leftover bytes in window"); + + // save these in case we need to store as uncompressed + DeflateInput.InputState initialInputState = _input.DumpState(); + OutputBuffer.BufferState initialOutputState = _output.DumpState(); + + // enforce max so we can ensure state between calls + _deflateEncoder.GetBlock(_input, _output, CleanCopySize); + + if (!UseCompressed(_deflateEncoder.LastCompressionRatio)) + { + // we're expanding; restore state and switch to uncompressed + _input.RestoreState(initialInputState); + _output.RestoreState(initialOutputState); + _copyEncoder.GetBlock(_input, _output, false); + FlushInputWindows(); + } + + break; + } + + case DeflaterState.StartingSmallData: + { + // add compressed header and data, but not footer. Subsequent calls will keep + // adding compressed data (no header and no footer). We're doing this to + // avoid overhead of header and footer size relative to compressed payload. + _deflateEncoder.GetBlockHeader(_output); + + _processingState = DeflaterState.HandlingSmallData; + goto case DeflaterState.HandlingSmallData; // yeah I know, but there's no fallthrough + } + + case DeflaterState.HandlingSmallData: + { + // continue adding compressed data + _deflateEncoder.GetCompressedData(_input, _output); + break; + } + } + + return _output.BytesWritten; + } + + internal bool Finish(byte[] outputBuffer, out int bytesRead) + { + Debug.Assert(outputBuffer != null, "Can't pass in a null output buffer!"); + Debug.Assert(_processingState == DeflaterState.NotStarted || + _processingState == DeflaterState.CheckingForIncompressible || + _processingState == DeflaterState.HandlingSmallData || + _processingState == DeflaterState.CompressThenCheck || + _processingState == DeflaterState.SlowDownForIncompressible1, + "got unexpected processing state = " + _processingState); + + Debug.Assert(NeedsInput()); + + // no need to add end of block info if we didn't write anything + if (_processingState == DeflaterState.NotStarted) + { + bytesRead = 0; + return true; + } + + _output.UpdateBuffer(outputBuffer); + + if (_processingState == DeflaterState.CompressThenCheck || + _processingState == DeflaterState.HandlingSmallData || + _processingState == DeflaterState.SlowDownForIncompressible1) + { + // need to finish off block + _deflateEncoder.GetBlockFooter(_output); + } + + // write final block + WriteFinal(); + bytesRead = _output.BytesWritten; + return true; + } + + // Is compression ratio under threshold? + private bool UseCompressed(double ratio) + { + return (ratio <= BadCompressionThreshold); + } + + private void FlushInputWindows() + { + _deflateEncoder.FlushInput(); + } + + private void WriteFinal() + { + _copyEncoder.GetBlock(null, _output, true); + } + + // These states allow us to assume that data is compressible and keep compression ratios at least + // as good as historical values, but switch to different handling if that approach may increase the + // data. If we detect we're getting a bad compression ratio, we switch to CheckingForIncompressible + // state and decide to compress on a block by block basis. + // + // If we're getting small data buffers, we want to avoid overhead of excessive header and footer + // info, so we add one header and keep adding blocks as compressed. This means that if the user uses + // small buffers, they won't get the "don't increase size" improvements. + // + // An earlier iteration of this fix handled that data separately by buffering this data until it + // reached a reasonable size, but given that Flush is not implemented on DeflateManagedStream, this meant + // data could be flushed only on Dispose. In the future, it would be reasonable to revisit this, in + // case this isn't breaking. + // + // NotStarted -> CheckingForIncompressible, CompressThenCheck, StartingSmallData + // CompressThenCheck -> SlowDownForIncompressible1 + // SlowDownForIncompressible1 -> SlowDownForIncompressible2 + // SlowDownForIncompressible2 -> CheckingForIncompressible + // StartingSmallData -> HandlingSmallData + private enum DeflaterState + { + // no bytes to write yet + NotStarted, + + // transient states + SlowDownForIncompressible1, + SlowDownForIncompressible2, + StartingSmallData, + + // stable state: may transition to CheckingForIncompressible (via transient states) if it + // appears we're expanding data + CompressThenCheck, + + // sink states + CheckingForIncompressible, + HandlingSmallData + } + + internal void Dispose() { } + internal void Dispose(bool disposing) { } + void IDisposable.Dispose() { } + } // internal class DeflaterManaged +} // namespace System.IO.Compression \ No newline at end of file diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FastEncoder.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FastEncoder.cs new file mode 100644 index 000000000000..1d7f91781658 --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FastEncoder.cs @@ -0,0 +1,208 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Diagnostics; +using System.Globalization; + +namespace System.IO.Compression +{ + internal class FastEncoder + { + private FastEncoderWindow _inputWindow; // input history window + private Match _currentMatch; // current match in history window + private double _lastCompressionRatio; + + public FastEncoder() + { + _inputWindow = new FastEncoderWindow(); + _currentMatch = new Match(); + } + + internal int BytesInHistory + { + get + { + return _inputWindow.BytesAvailable; + } + } + + internal DeflateInput UnprocessedInput + { + get + { + return _inputWindow.UnprocessedInput; + } + } + + internal void FlushInput() + { + _inputWindow.FlushWindow(); + } + + internal Double LastCompressionRatio + { + get { return _lastCompressionRatio; } + } + + // Copy the compressed bytes to output buffer as a block. maxBytesToCopy limits the number of + // bytes we can copy from input. Set to any value < 1 if no limit + internal void GetBlock(DeflateInput input, OutputBuffer output, int maxBytesToCopy) + { + Debug.Assert(InputAvailable(input), "call SetInput before trying to compress!"); + + WriteDeflatePreamble(output); + GetCompressedOutput(input, output, maxBytesToCopy); + WriteEndOfBlock(output); + } + + // Compress data but don't format as block (doesn't have header and footer) + internal void GetCompressedData(DeflateInput input, OutputBuffer output) + { + GetCompressedOutput(input, output, -1); + } + + internal void GetBlockHeader(OutputBuffer output) + { + WriteDeflatePreamble(output); + } + + internal void GetBlockFooter(OutputBuffer output) + { + WriteEndOfBlock(output); + } + + // maxBytesToCopy limits the number of bytes we can copy from input. Set to any value < 1 if no limit + private void GetCompressedOutput(DeflateInput input, OutputBuffer output, int maxBytesToCopy) + { + // snapshot for compression ratio stats + int bytesWrittenPre = output.BytesWritten; + int bytesConsumedFromInput = 0; + int inputBytesPre = BytesInHistory + input.Count; + + do + { + // read more input data into the window if there is space available + int bytesToCopy = (input.Count < _inputWindow.FreeWindowSpace) ? + input.Count : _inputWindow.FreeWindowSpace; + if (maxBytesToCopy >= 1) + { + bytesToCopy = Math.Min(bytesToCopy, maxBytesToCopy - bytesConsumedFromInput); + } + if (bytesToCopy > 0) + { + // copy data into history window + _inputWindow.CopyBytes(input.Buffer, input.StartIndex, bytesToCopy); + input.ConsumeBytes(bytesToCopy); + bytesConsumedFromInput += bytesToCopy; + } + + GetCompressedOutput(output); + } while (SafeToWriteTo(output) && InputAvailable(input) && (maxBytesToCopy < 1 || bytesConsumedFromInput < maxBytesToCopy)); + + // determine compression ratio, save + int bytesWrittenPost = output.BytesWritten; + int bytesWritten = bytesWrittenPost - bytesWrittenPre; + int inputBytesPost = BytesInHistory + input.Count; + int totalBytesConsumed = inputBytesPre - inputBytesPost; + if (bytesWritten != 0) + { + _lastCompressionRatio = (double)bytesWritten / (double)totalBytesConsumed; + } + } + + // compress the bytes in input history window + private void GetCompressedOutput(OutputBuffer output) + { + while (_inputWindow.BytesAvailable > 0 && SafeToWriteTo(output)) + { + // Find next match. A match can be a symbol, + // a distance/length pair, a symbol followed by a distance/Length pair + _inputWindow.GetNextSymbolOrMatch(_currentMatch); + + if (_currentMatch.State == MatchState.HasSymbol) + { + WriteChar(_currentMatch.Symbol, output); + } + else if (_currentMatch.State == MatchState.HasMatch) + { + WriteMatch(_currentMatch.Length, _currentMatch.Position, output); + } + else + { + WriteChar(_currentMatch.Symbol, output); + WriteMatch(_currentMatch.Length, _currentMatch.Position, output); + } + } + } + + private bool InputAvailable(DeflateInput input) + { + return input.Count > 0 || BytesInHistory > 0; + } + + private bool SafeToWriteTo(OutputBuffer output) + { // can we safely continue writing to output buffer + return output.FreeBytes > FastEncoderStatics.MaxCodeLen; + } + + private void WriteEndOfBlock(OutputBuffer output) + { + // The fast encoder outputs one long block, so it just needs to terminate this block + const int EndOfBlockCode = 256; + uint code_info = FastEncoderStatics.FastEncoderLiteralCodeInfo[EndOfBlockCode]; + int code_len = (int)(code_info & 31); + output.WriteBits(code_len, code_info >> 5); + } + + static internal void WriteMatch(int matchLen, int matchPos, OutputBuffer output) + { + Debug.Assert(matchLen >= FastEncoderWindow.MinMatch && matchLen <= FastEncoderWindow.MaxMatch, "Illegal currentMatch length!"); + + // Get the code information for a match code + uint codeInfo = FastEncoderStatics.FastEncoderLiteralCodeInfo[(FastEncoderStatics.NumChars + 1 - FastEncoderWindow.MinMatch) + matchLen]; + int codeLen = (int)codeInfo & 31; + Debug.Assert(codeLen != 0, "Invalid Match Length!"); + if (codeLen <= 16) + { + output.WriteBits(codeLen, codeInfo >> 5); + } + else + { + output.WriteBits(16, (codeInfo >> 5) & 65535); + output.WriteBits(codeLen - 16, codeInfo >> (5 + 16)); + } + + // Get the code information for a distance code + codeInfo = FastEncoderStatics.FastEncoderDistanceCodeInfo[FastEncoderStatics.GetSlot(matchPos)]; + output.WriteBits((int)(codeInfo & 15), codeInfo >> 8); + int extraBits = (int)(codeInfo >> 4) & 15; + if (extraBits != 0) + { + output.WriteBits(extraBits, (uint)matchPos & FastEncoderStatics.BitMask[extraBits]); + } + } + + static internal void WriteChar(byte b, OutputBuffer output) + { + uint code = FastEncoderStatics.FastEncoderLiteralCodeInfo[b]; + output.WriteBits((int)code & 31, code >> 5); + } + + // Output the block type and tree structure for our hard-coded trees. + // Contains following data: + // "final" block flag 1 bit + // BLOCKTYPE_DYNAMIC 2 bits + // FastEncoderLiteralTreeLength + // FastEncoderDistanceTreeLength + // + static internal void WriteDeflatePreamble(OutputBuffer output) + { + //Debug.Assert( bitCount == 0, "bitCount must be zero before writing tree bit!"); + + output.WriteBytes(FastEncoderStatics.FastEncoderTreeStructureData, 0, FastEncoderStatics.FastEncoderTreeStructureData.Length); + output.WriteBits(FastEncoderStatics.FastEncoderPostTreeBitCount, FastEncoderStatics.FastEncoderPostTreeBitBuf); + } + } +} \ No newline at end of file diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FastEncoderStatus.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FastEncoderStatus.cs new file mode 100644 index 000000000000..44ab16517a5c --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FastEncoderStatus.cs @@ -0,0 +1,242 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; +using System.Globalization; + +namespace System.IO.Compression +{ + internal static class FastEncoderStatics + { + // static information for encoding, DO NOT MODIFY + + internal static readonly byte[] FastEncoderTreeStructureData = { + 0xec,0xbd,0x07,0x60,0x1c,0x49,0x96,0x25,0x26,0x2f,0x6d,0xca, + 0x7b,0x7f,0x4a,0xf5,0x4a,0xd7,0xe0,0x74,0xa1,0x08,0x80,0x60, + 0x13,0x24,0xd8,0x90,0x40,0x10,0xec,0xc1,0x88,0xcd,0xe6,0x92, + 0xec,0x1d,0x69,0x47,0x23,0x29,0xab,0x2a,0x81,0xca,0x65,0x56, + 0x65,0x5d,0x66,0x16,0x40,0xcc,0xed,0x9d,0xbc,0xf7,0xde,0x7b, + 0xef,0xbd,0xf7,0xde,0x7b,0xef,0xbd,0xf7,0xba,0x3b,0x9d,0x4e, + 0x27,0xf7,0xdf,0xff,0x3f,0x5c,0x66,0x64,0x01,0x6c,0xf6,0xce, + 0x4a,0xda,0xc9,0x9e,0x21,0x80,0xaa,0xc8,0x1f,0x3f,0x7e,0x7c, + 0x1f,0x3f, + }; + + internal static readonly byte[] BFinalFastEncoderTreeStructureData = { + 0xed,0xbd,0x07,0x60,0x1c,0x49,0x96,0x25,0x26,0x2f,0x6d,0xca, + 0x7b,0x7f,0x4a,0xf5,0x4a,0xd7,0xe0,0x74,0xa1,0x08,0x80,0x60, + 0x13,0x24,0xd8,0x90,0x40,0x10,0xec,0xc1,0x88,0xcd,0xe6,0x92, + 0xec,0x1d,0x69,0x47,0x23,0x29,0xab,0x2a,0x81,0xca,0x65,0x56, + 0x65,0x5d,0x66,0x16,0x40,0xcc,0xed,0x9d,0xbc,0xf7,0xde,0x7b, + 0xef,0xbd,0xf7,0xde,0x7b,0xef,0xbd,0xf7,0xba,0x3b,0x9d,0x4e, + 0x27,0xf7,0xdf,0xff,0x3f,0x5c,0x66,0x64,0x01,0x6c,0xf6,0xce, + 0x4a,0xda,0xc9,0x9e,0x21,0x80,0xaa,0xc8,0x1f,0x3f,0x7e,0x7c, + 0x1f,0x3f, + }; + + // Output a currentMatch with length matchLen (>= MIN_MATCH) and displacement matchPos + // + // Optimisation: unlike the other encoders, here we have an array of codes for each currentMatch + // length (not just each currentMatch length slot), complete with all the extra bits filled in, in + // a single array element. + // + // There are many advantages to doing this: + // + // 1. A single array lookup on g_FastEncoderLiteralCodeInfo, instead of separate array lookups + // on g_LengthLookup (to get the length slot), g_FastEncoderLiteralTreeLength, + // g_FastEncoderLiteralTreeCode, g_ExtraLengthBits, and g_BitMask + // + // 2. The array is an array of ULONGs, so no access penalty, unlike for accessing those USHORT + // code arrays in the other encoders (although they could be made into ULONGs with some + // modifications to the source). + // + // Note, if we could guarantee that codeLen <= 16 always, then we could skip an if statement here. + // + // A completely different optimisation is used for the distance codes since, obviously, a table for + // all 8192 distances combining their extra bits is not feasible. The distance codeinfo table is + // made up of code[], len[] and # extraBits for this code. + // + // The advantages are similar to the above; a ULONG array instead of a USHORT and BYTE array, better + // cache locality, fewer memory operations. + // + + + // Encoding information for literal and Length. + // The least 5 significant bits are the length + // and the rest is the code bits. + + internal static readonly uint[] FastEncoderLiteralCodeInfo = { + 0x0000d7ee,0x0004d7ee,0x0002d7ee,0x0006d7ee,0x0001d7ee,0x0005d7ee,0x0003d7ee, + 0x0007d7ee,0x000037ee,0x0000c7ec,0x00000126,0x000437ee,0x000237ee,0x000637ee, + 0x000137ee,0x000537ee,0x000337ee,0x000737ee,0x0000b7ee,0x0004b7ee,0x0002b7ee, + 0x0006b7ee,0x0001b7ee,0x0005b7ee,0x0003b7ee,0x0007b7ee,0x000077ee,0x000477ee, + 0x000277ee,0x000677ee,0x000017ed,0x000177ee,0x00000526,0x000577ee,0x000023ea, + 0x0001c7ec,0x000377ee,0x000777ee,0x000217ed,0x000063ea,0x00000b68,0x00000ee9, + 0x00005beb,0x000013ea,0x00000467,0x00001b68,0x00000c67,0x00002ee9,0x00000768, + 0x00001768,0x00000f68,0x00001ee9,0x00001f68,0x00003ee9,0x000053ea,0x000001e9, + 0x000000e8,0x000021e9,0x000011e9,0x000010e8,0x000031e9,0x000033ea,0x000008e8, + 0x0000f7ee,0x0004f7ee,0x000018e8,0x000009e9,0x000004e8,0x000029e9,0x000014e8, + 0x000019e9,0x000073ea,0x0000dbeb,0x00000ce8,0x00003beb,0x0002f7ee,0x000039e9, + 0x00000bea,0x000005e9,0x00004bea,0x000025e9,0x000027ec,0x000015e9,0x000035e9, + 0x00000de9,0x00002bea,0x000127ec,0x0000bbeb,0x0006f7ee,0x0001f7ee,0x0000a7ec, + 0x00007beb,0x0005f7ee,0x0000fbeb,0x0003f7ee,0x0007f7ee,0x00000fee,0x00000326, + 0x00000267,0x00000a67,0x00000667,0x00000726,0x00001ce8,0x000002e8,0x00000e67, + 0x000000a6,0x0001a7ec,0x00002de9,0x000004a6,0x00000167,0x00000967,0x000002a6, + 0x00000567,0x000117ed,0x000006a6,0x000001a6,0x000005a6,0x00000d67,0x000012e8, + 0x00000ae8,0x00001de9,0x00001ae8,0x000007eb,0x000317ed,0x000067ec,0x000097ed, + 0x000297ed,0x00040fee,0x00020fee,0x00060fee,0x00010fee,0x00050fee,0x00030fee, + 0x00070fee,0x00008fee,0x00048fee,0x00028fee,0x00068fee,0x00018fee,0x00058fee, + 0x00038fee,0x00078fee,0x00004fee,0x00044fee,0x00024fee,0x00064fee,0x00014fee, + 0x00054fee,0x00034fee,0x00074fee,0x0000cfee,0x0004cfee,0x0002cfee,0x0006cfee, + 0x0001cfee,0x0005cfee,0x0003cfee,0x0007cfee,0x00002fee,0x00042fee,0x00022fee, + 0x00062fee,0x00012fee,0x00052fee,0x00032fee,0x00072fee,0x0000afee,0x0004afee, + 0x0002afee,0x0006afee,0x0001afee,0x0005afee,0x0003afee,0x0007afee,0x00006fee, + 0x00046fee,0x00026fee,0x00066fee,0x00016fee,0x00056fee,0x00036fee,0x00076fee, + 0x0000efee,0x0004efee,0x0002efee,0x0006efee,0x0001efee,0x0005efee,0x0003efee, + 0x0007efee,0x00001fee,0x00041fee,0x00021fee,0x00061fee,0x00011fee,0x00051fee, + 0x00031fee,0x00071fee,0x00009fee,0x00049fee,0x00029fee,0x00069fee,0x00019fee, + 0x00059fee,0x00039fee,0x00079fee,0x00005fee,0x00045fee,0x00025fee,0x00065fee, + 0x00015fee,0x00055fee,0x00035fee,0x00075fee,0x0000dfee,0x0004dfee,0x0002dfee, + 0x0006dfee,0x0001dfee,0x0005dfee,0x0003dfee,0x0007dfee,0x00003fee,0x00043fee, + 0x00023fee,0x00063fee,0x00013fee,0x00053fee,0x00033fee,0x00073fee,0x0000bfee, + 0x0004bfee,0x0002bfee,0x0006bfee,0x0001bfee,0x0005bfee,0x0003bfee,0x0007bfee, + 0x00007fee,0x00047fee,0x00027fee,0x00067fee,0x00017fee,0x000197ed,0x000397ed, + 0x000057ed,0x00057fee,0x000257ed,0x00037fee,0x000157ed,0x00077fee,0x000357ed, + 0x0000ffee,0x0004ffee,0x0002ffee,0x0006ffee,0x0001ffee,0x00000084,0x00000003, + 0x00000184,0x00000044,0x00000144,0x000000c5,0x000002c5,0x000001c5,0x000003c6, + 0x000007c6,0x00000026,0x00000426,0x000003a7,0x00000ba7,0x000007a7,0x00000fa7, + 0x00000227,0x00000627,0x00000a27,0x00000e27,0x00000068,0x00000868,0x00001068, + 0x00001868,0x00000369,0x00001369,0x00002369,0x00003369,0x000006ea,0x000026ea, + 0x000046ea,0x000066ea,0x000016eb,0x000036eb,0x000056eb,0x000076eb,0x000096eb, + 0x0000b6eb,0x0000d6eb,0x0000f6eb,0x00003dec,0x00007dec,0x0000bdec,0x0000fdec, + 0x00013dec,0x00017dec,0x0001bdec,0x0001fdec,0x00006bed,0x0000ebed,0x00016bed, + 0x0001ebed,0x00026bed,0x0002ebed,0x00036bed,0x0003ebed,0x000003ec,0x000043ec, + 0x000083ec,0x0000c3ec,0x000103ec,0x000143ec,0x000183ec,0x0001c3ec,0x00001bee, + 0x00009bee,0x00011bee,0x00019bee,0x00021bee,0x00029bee,0x00031bee,0x00039bee, + 0x00041bee,0x00049bee,0x00051bee,0x00059bee,0x00061bee,0x00069bee,0x00071bee, + 0x00079bee,0x000167f0,0x000367f0,0x000567f0,0x000767f0,0x000967f0,0x000b67f0, + 0x000d67f0,0x000f67f0,0x001167f0,0x001367f0,0x001567f0,0x001767f0,0x001967f0, + 0x001b67f0,0x001d67f0,0x001f67f0,0x000087ef,0x000187ef,0x000287ef,0x000387ef, + 0x000487ef,0x000587ef,0x000687ef,0x000787ef,0x000887ef,0x000987ef,0x000a87ef, + 0x000b87ef,0x000c87ef,0x000d87ef,0x000e87ef,0x000f87ef,0x0000e7f0,0x0002e7f0, + 0x0004e7f0,0x0006e7f0,0x0008e7f0,0x000ae7f0,0x000ce7f0,0x000ee7f0,0x0010e7f0, + 0x0012e7f0,0x0014e7f0,0x0016e7f0,0x0018e7f0,0x001ae7f0,0x001ce7f0,0x001ee7f0, + 0x0005fff3,0x000dfff3,0x0015fff3,0x001dfff3,0x0025fff3,0x002dfff3,0x0035fff3, + 0x003dfff3,0x0045fff3,0x004dfff3,0x0055fff3,0x005dfff3,0x0065fff3,0x006dfff3, + 0x0075fff3,0x007dfff3,0x0085fff3,0x008dfff3,0x0095fff3,0x009dfff3,0x00a5fff3, + 0x00adfff3,0x00b5fff3,0x00bdfff3,0x00c5fff3,0x00cdfff3,0x00d5fff3,0x00ddfff3, + 0x00e5fff3,0x00edfff3,0x00f5fff3,0x00fdfff3,0x0003fff3,0x000bfff3,0x0013fff3, + 0x001bfff3,0x0023fff3,0x002bfff3,0x0033fff3,0x003bfff3,0x0043fff3,0x004bfff3, + 0x0053fff3,0x005bfff3,0x0063fff3,0x006bfff3,0x0073fff3,0x007bfff3,0x0083fff3, + 0x008bfff3,0x0093fff3,0x009bfff3,0x00a3fff3,0x00abfff3,0x00b3fff3,0x00bbfff3, + 0x00c3fff3,0x00cbfff3,0x00d3fff3,0x00dbfff3,0x00e3fff3,0x00ebfff3,0x00f3fff3, + 0x00fbfff3,0x0007fff3,0x000ffff3,0x0017fff3,0x001ffff3,0x0027fff3,0x002ffff3, + 0x0037fff3,0x003ffff3,0x0047fff3,0x004ffff3,0x0057fff3,0x005ffff3,0x0067fff3, + 0x006ffff3,0x0077fff3,0x007ffff3,0x0087fff3,0x008ffff3,0x0097fff3,0x009ffff3, + 0x00a7fff3,0x00affff3,0x00b7fff3,0x00bffff3,0x00c7fff3,0x00cffff3,0x00d7fff3, + 0x00dffff3,0x00e7fff3,0x00effff3,0x00f7fff3,0x00fffff3,0x0001e7f1,0x0003e7f1, + 0x0005e7f1,0x0007e7f1,0x0009e7f1,0x000be7f1,0x000de7f1,0x000fe7f1,0x0011e7f1, + 0x0013e7f1,0x0015e7f1,0x0017e7f1,0x0019e7f1,0x001be7f1,0x001de7f1,0x001fe7f1, + 0x0021e7f1,0x0023e7f1,0x0025e7f1,0x0027e7f1,0x0029e7f1,0x002be7f1,0x002de7f1, + 0x002fe7f1,0x0031e7f1,0x0033e7f1,0x0035e7f1,0x0037e7f1,0x0039e7f1,0x003be7f1, + 0x003de7f1,0x000047eb, + }; + + internal static readonly uint[] FastEncoderDistanceCodeInfo = { + 0x00000f06,0x0001ff0a,0x0003ff0b,0x0007ff0b,0x0000ff19,0x00003f18,0x0000bf28, + 0x00007f28,0x00001f37,0x00005f37,0x00000d45,0x00002f46,0x00000054,0x00001d55, + 0x00000864,0x00000365,0x00000474,0x00001375,0x00000c84,0x00000284,0x00000a94, + 0x00000694,0x00000ea4,0x000001a4,0x000009b4,0x00000bb5,0x000005c4,0x00001bc5, + 0x000007d5,0x000017d5,0x00000000,0x00000100, + }; + + internal static readonly uint[] BitMask = { 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767 }; + internal static readonly byte[] ExtraLengthBits = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; + internal static readonly byte[] ExtraDistanceBits = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 0, 0 }; + internal const int NumChars = 256; + internal const int NumLengthBaseCodes = 29; + internal const int NumDistBaseCodes = 30; + + internal const uint FastEncoderPostTreeBitBuf = 0x0022; + internal const int FastEncoderPostTreeBitCount = 9; + + internal const uint NoCompressionHeader = 0x0; + internal const int NoCompressionHeaderBitCount = 3; + internal const uint BFinalNoCompressionHeader = 0x1; + internal const int BFinalNoCompressionHeaderBitCount = 3; + internal const int MaxCodeLen = 16; + + static private byte[] s_distLookup; + + static FastEncoderStatics() + { + s_distLookup = new byte[512]; + + // Generate the global slot tables which allow us to convert a distance + // (0..32K) to a distance slot (0..29) + // + // Distance table + // Extra Extra Extra + // Code Bits Dist Code Bits Dist Code Bits Distance + // ---- ---- ---- ---- ---- ------ ---- ---- -------- + // 0 0 1 10 4 33-48 20 9 1025-1536 + // 1 0 2 11 4 49-64 21 9 1537-2048 + // 2 0 3 12 5 65-96 22 10 2049-3072 + // 3 0 4 13 5 97-128 23 10 3073-4096 + // 4 1 5,6 14 6 129-192 24 11 4097-6144 + // 5 1 7,8 15 6 193-256 25 11 6145-8192 + // 6 2 9-12 16 7 257-384 26 12 8193-12288 + // 7 2 13-16 17 7 385-512 27 12 12289-16384 + // 8 3 17-24 18 8 513-768 28 13 16385-24576 + // 9 3 25-32 19 8 769-1024 29 13 24577-32768 + + // Initialize the mapping length (0..255) -> length code (0..28) + //int length = 0; + //for (code = 0; code < FastEncoderStatics.NumLengthBaseCodes-1; code++) { + // for (int n = 0; n < (1 << FastEncoderStatics.ExtraLengthBits[code]); n++) + // lengthLookup[length++] = (byte) code; + //} + //lengthLookup[length-1] = (byte) code; + + // Initialize the mapping dist (0..32K) -> dist code (0..29) + int dist = 0; + int code; + for (code = 0; code < 16; code++) + { + for (int n = 0; n < (1 << FastEncoderStatics.ExtraDistanceBits[code]); n++) + s_distLookup[dist++] = (byte)code; + } + + dist >>= 7; // from now on, all distances are divided by 128 + + for (; code < FastEncoderStatics.NumDistBaseCodes; code++) + { + for (int n = 0; n < (1 << (FastEncoderStatics.ExtraDistanceBits[code] - 7)); n++) + s_distLookup[256 + dist++] = (byte)code; + } + } + + // Return the position slot (0...29) of a match offset (0...32767) + static internal int GetSlot(int pos) + { + return s_distLookup[((pos) < 256) ? (pos) : (256 + ((pos) >> 7))]; + } + + // Reverse 'length' of the bits in code + public static uint BitReverse(uint code, int length) + { + uint new_code = 0; + + Debug.Assert(length > 0 && length <= 16, "Invalid len"); + do + { + new_code |= (code & 1); + new_code <<= 1; + code >>= 1; + } while (--length > 0); + + return new_code >> 1; + } + } +} diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FastEncoderWindow.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FastEncoderWindow.cs new file mode 100644 index 000000000000..8b1379c541ad --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FastEncoderWindow.cs @@ -0,0 +1,453 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Diagnostics; + +namespace System.IO.Compression +{ + internal class FastEncoderWindow + { + private byte[] _window; // complete bytes window + private int _bufPos; // the start index of uncompressed bytes + private int _bufEnd; // the end index of uncompressed bytes + + // Be very careful about increasing the window size; the code tables will have to + // be updated, since they assume that extra_distance_bits is never larger than a + // certain size. + private const int FastEncoderHashShift = 4; + private const int FastEncoderHashtableSize = 2048; + private const int FastEncoderHashMask = FastEncoderHashtableSize - 1; + private const int FastEncoderWindowSize = 8192; + private const int FastEncoderWindowMask = FastEncoderWindowSize - 1; + private const int FastEncoderMatch3DistThreshold = 16384; + internal const int MaxMatch = 258; + internal const int MinMatch = 3; + + // Following constants affect the search, + // they should be modifiable if we support different compression levels in future. + private const int SearchDepth = 32; + private const int GoodLength = 4; + private const int NiceLength = 32; + private const int LazyMatchThreshold = 6; + + // Hashtable structure + private ushort[] _prev; // next most recent occurance of chars with same hash value + private ushort[] _lookup; // hash table to find most recent occurance of chars with same hash value + + public FastEncoderWindow() + { + ResetWindow(); + } + + public int BytesAvailable + { // uncompressed bytes + get + { + Debug.Assert(_bufEnd - _bufPos >= 0, "Ending pointer can't be in front of starting pointer!"); + return _bufEnd - _bufPos; + } + } + + public DeflateInput UnprocessedInput + { + get + { + DeflateInput input = new DeflateInput(); + input.Buffer = _window; + input.StartIndex = _bufPos; + input.Count = _bufEnd - _bufPos; + return input; + } + } + + public void FlushWindow() + { + ResetWindow(); + } + + private void ResetWindow() + { + _window = new byte[2 * FastEncoderWindowSize + MaxMatch + 4]; + _prev = new ushort[FastEncoderWindowSize + MaxMatch]; + _lookup = new ushort[FastEncoderHashtableSize]; + _bufPos = FastEncoderWindowSize; + _bufEnd = _bufPos; + } + + public int FreeWindowSpace + { // Free space in the window + get + { + return 2 * FastEncoderWindowSize - _bufEnd; + } + } + + // copy bytes from input buffer into window + public void CopyBytes(byte[] inputBuffer, int startIndex, int count) + { + Array.Copy(inputBuffer, startIndex, _window, _bufEnd, count); + _bufEnd += count; + } + + // slide the history window to the left by FastEncoderWindowSize bytes + public void MoveWindows() + { + int i; + Debug.Assert(_bufPos == 2 * FastEncoderWindowSize, "only call this at the end of the window"); + + // verify that the hash table is correct + VerifyHashes(); // Debug only code + + Array.Copy(_window, _bufPos - FastEncoderWindowSize, _window, 0, FastEncoderWindowSize); + + // move all the hash pointers back + for (i = 0; i < FastEncoderHashtableSize; i++) + { + int val = ((int)_lookup[i]) - FastEncoderWindowSize; + + if (val <= 0) + { // too far away now? then set to zero + _lookup[i] = (ushort)0; + } + else + { + _lookup[i] = (ushort)val; + } + } + + // prev[]'s are absolute pointers, not relative pointers, so we have to move them back too + // making prev[]'s into relative pointers poses problems of its own + for (i = 0; i < FastEncoderWindowSize; i++) + { + long val = ((long)_prev[i]) - FastEncoderWindowSize; + + if (val <= 0) + { + _prev[i] = (ushort)0; + } + else + { + _prev[i] = (ushort)val; + } + } + +#if DEBUG + // For debugging, wipe the window clean, so that if there is a bug in our hashing, + // the hash pointers will now point to locations which are not valid for the hash value + // (and will be caught by our ASSERTs). + Array.Clear(_window, FastEncoderWindowSize, _window.Length - FastEncoderWindowSize); +#endif + + VerifyHashes(); // debug: verify hash table is correct + + _bufPos = FastEncoderWindowSize; + _bufEnd = _bufPos; + } + + private uint HashValue(uint hash, byte b) + { + return (hash << FastEncoderHashShift) ^ b; + } + + // insert string into hash table and return most recent location of same hash value + private uint InsertString(ref uint hash) + { + // Note we only use the lowest 11 bits of the hash vallue (hash table size is 11). + // This enables fast calculation of hash value for the input string. + // If we want to get the next hash code starting at next position, + // we can just increment bufPos and call this function. + + hash = HashValue(hash, _window[_bufPos + 2]); + + // Need to assert the hash value + uint search = _lookup[hash & FastEncoderHashMask]; + _lookup[hash & FastEncoderHashMask] = (ushort)_bufPos; + _prev[_bufPos & FastEncoderWindowMask] = (ushort)search; + return search; + } + + // + // insert strings into hashtable + // Arguments: + // hash : intial hash value + // matchLen : 1 + number of strings we need to insert. + // + private void InsertStrings(ref uint hash, int matchLen) + { + Debug.Assert(matchLen > 0, "Invalid match Len!"); + if (_bufEnd - _bufPos <= matchLen) + { + _bufPos += (matchLen - 1); + } + else + { + while (--matchLen > 0) + { + InsertString(ref hash); + _bufPos++; + } + } + } + + // + // Find out what we should generate next. It can be a symbol, a distance/length pair + // or a symbol followed by distance/length pair + // + internal bool GetNextSymbolOrMatch(Match match) + { + Debug.Assert(_bufPos >= FastEncoderWindowSize && _bufPos < (2 * FastEncoderWindowSize), "Invalid Buffer Position!"); + + // initialise the value of the hash, no problem if locations bufPos, bufPos+1 + // are invalid (not enough data), since we will never insert using that hash value + uint hash = HashValue(0, _window[_bufPos]); + hash = HashValue(hash, _window[_bufPos + 1]); + + int matchLen; + int matchPos = 0; + + VerifyHashes(); // Debug only code + if (_bufEnd - _bufPos <= 3) + { + // The hash value becomes corrupt when we get within 3 characters of the end of the + // input window, since the hash value is based on 3 characters. We just stop + // inserting into the hash table at this point, and allow no matches. + matchLen = 0; + } + else + { + // insert string into hash table and return most recent location of same hash value + int search = (int)InsertString(ref hash); + + // did we find a recent location of this hash value? + if (search != 0) + { + // yes, now find a match at what we'll call position X + matchLen = FindMatch(search, out matchPos, SearchDepth, NiceLength); + + // truncate match if we're too close to the end of the input window + if (_bufPos + matchLen > _bufEnd) + matchLen = _bufEnd - _bufPos; + } + else + { + // no most recent location found + matchLen = 0; + } + } + + if (matchLen < MinMatch) + { + // didn't find a match, so output unmatched char + match.State = MatchState.HasSymbol; + match.Symbol = _window[_bufPos]; + _bufPos++; + } + else + { + // bufPos now points to X+1 + _bufPos++; + + // is this match so good (long) that we should take it automatically without + // checking X+1 ? + if (matchLen <= LazyMatchThreshold) + { + int nextMatchLen; + int nextMatchPos = 0; + + // search at position X+1 + int search = (int)InsertString(ref hash); + + // no, so check for a better match at X+1 + if (search != 0) + { + nextMatchLen = FindMatch(search, out nextMatchPos, + matchLen < GoodLength ? SearchDepth : (SearchDepth >> 2), NiceLength); + + // truncate match if we're too close to the end of the window + // note: nextMatchLen could now be < MinMatch + if (_bufPos + nextMatchLen > _bufEnd) + { + nextMatchLen = _bufEnd - _bufPos; + } + } + else + { + nextMatchLen = 0; + } + + // right now X and X+1 are both inserted into the search tree + if (nextMatchLen > matchLen) + { + // since nextMatchLen > matchLen, it can't be < MinMatch here + + // match at X+1 is better, so output unmatched char at X + match.State = MatchState.HasSymbolAndMatch; + match.Symbol = _window[_bufPos - 1]; + match.Position = nextMatchPos; + match.Length = nextMatchLen; + + // insert remainder of second match into search tree + // example: (*=inserted already) + // + // X X+1 X+2 X+3 X+4 + // * * + // nextmatchlen=3 + // bufPos + // + // If nextMatchLen == 3, we want to perform 2 + // insertions (at X+2 and X+3). However, first we must + // inc bufPos. + // + _bufPos++; // now points to X+2 + matchLen = nextMatchLen; + InsertStrings(ref hash, matchLen); + } + else + { + // match at X is better, so take it + match.State = MatchState.HasMatch; + match.Position = matchPos; + match.Length = matchLen; + + // Insert remainder of first match into search tree, minus the first + // two locations, which were inserted by the FindMatch() calls. + // + // For example, if matchLen == 3, then we've inserted at X and X+1 + // already (and bufPos is now pointing at X+1), and now we need to insert + // only at X+2. + // + matchLen--; + _bufPos++; // now bufPos points to X+2 + InsertStrings(ref hash, matchLen); + } + } + else + { // match_length >= good_match + // in assertion: bufPos points to X+1, location X inserted already + // first match is so good that we're not even going to check at X+1 + match.State = MatchState.HasMatch; + match.Position = matchPos; + match.Length = matchLen; + + // insert remainder of match at X into search tree + InsertStrings(ref hash, matchLen); + } + } + + if (_bufPos == 2 * FastEncoderWindowSize) + { + MoveWindows(); + } + return true; + } + + // + // Find a match starting at specified position and return length of match + // Arguments: + // search : where to start searching + // matchPos : return match position here + // searchDepth : # links to traverse + // NiceLength : stop immediately if we find a match >= NiceLength + // + private int FindMatch(int search, out int matchPos, int searchDepth, int niceLength) + { + Debug.Assert(_bufPos >= 0 && _bufPos < 2 * FastEncoderWindowSize, "Invalid Buffer position!"); + Debug.Assert(search < _bufPos, "Invalid starting search point!"); + Debug.Assert(RecalculateHash((int)search) == RecalculateHash(_bufPos)); + + int bestMatch = 0; // best match length found so far + int bestMatchPos = 0; // absolute match position of best match found + + // the earliest we can look + int earliest = _bufPos - FastEncoderWindowSize; + Debug.Assert(earliest >= 0, "bufPos is less than FastEncoderWindowSize!"); + + byte wantChar = _window[_bufPos]; + while (search > earliest) + { + // make sure all our hash links are valid + Debug.Assert(RecalculateHash((int)search) == RecalculateHash(_bufPos), "Corrupted hash link!"); + + // Start by checking the character that would allow us to increase the match + // length by one. This improves performance quite a bit. + if (_window[search + bestMatch] == wantChar) + { + int j; + + // Now make sure that all the other characters are correct + for (j = 0; j < MaxMatch; j++) + { + if (_window[_bufPos + j] != _window[search + j]) + break; + } + + if (j > bestMatch) + { + bestMatch = j; + bestMatchPos = search; // absolute position + if (j > NiceLength) break; + wantChar = _window[_bufPos + j]; + } + } + + if (--searchDepth == 0) + { + break; + } + + Debug.Assert(_prev[search & FastEncoderWindowMask] < search, "we should always go backwards!"); + + search = _prev[search & FastEncoderWindowMask]; + } + + // doesn't necessarily mean we found a match; bestMatch could be > 0 and < MinMatch + matchPos = _bufPos - bestMatchPos - 1; // convert absolute to relative position + + // don't allow match length 3's which are too far away to be worthwhile + if (bestMatch == 3 && matchPos >= FastEncoderMatch3DistThreshold) + { + return 0; + } + + Debug.Assert(bestMatch < MinMatch || matchPos < FastEncoderWindowSize, "Only find match inside FastEncoderWindowSize"); + return bestMatch; + } + + + // This function makes any execution take a *very* long time to complete. + // Disabling for now by using non-"DEBUG" compilation constant. + [Conditional("VERIFY_HASHES")] + private void VerifyHashes() + { + for (int i = 0; i < FastEncoderHashtableSize; i++) + { + ushort where = _lookup[i]; + ushort nextWhere; + + while (where != 0 && _bufPos - where < FastEncoderWindowSize) + { + Debug.Assert(RecalculateHash(where) == i, "Incorrect Hashcode!"); + nextWhere = _prev[where & FastEncoderWindowMask]; + if (_bufPos - nextWhere >= FastEncoderWindowSize) + { + break; + } + + Debug.Assert(nextWhere < where, "pointer is messed up!"); + where = nextWhere; + } + } + } + + // can't use conditional attribute here. + private uint RecalculateHash(int position) + { + return (uint)(((_window[position] << (2 * FastEncoderHashShift)) ^ + (_window[position + 1] << FastEncoderHashShift) ^ + (_window[position + 2])) & FastEncoderHashMask); + } + } +} + diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FileFormats.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FileFormats.cs new file mode 100644 index 000000000000..d3a77f3fb6c9 --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/FileFormats.cs @@ -0,0 +1,45 @@ +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace System.IO.Compression +{ + internal interface IFileFormatWriter + { + byte[] GetHeader(); + void UpdateWithBytesRead(byte[] buffer, int offset, int bytesToCopy); + byte[] GetFooter(); + } + + internal interface IFileFormatReader + { + bool ReadHeader(InputBuffer input); + bool ReadFooter(InputBuffer input); + void UpdateWithBytesRead(byte[] buffer, int offset, int bytesToCopy); + void Validate(); + + /// + /// A reader corresponds to an expected file format and contains methods + /// to read header/footer data from a file of that format. If the Zlib library + /// is instead being used and the file format is supported, we can simply pass + /// a supported WindowSize and let Zlib do the header/footer parsing for us. + /// + /// This Property allows getting of a ZLibWindowSize that can be used in place + /// of manually parsing the raw data stream. + /// + /// + /// For raw data, return -8..-15 + /// For GZip header detection and decoding, return 16..31 + /// For GZip and Zlib header detection and decoding, return 32..47 + /// + /// + /// The windowBits parameter for inflation must be greater than or equal to the + /// windowBits parameter used in deflation. + /// + /// + /// If the incorrect header information is used, zlib inflation will likely throw a + /// Z_DATA_ERROR exception. + /// + int ZLibWindowSize { get; } + } +} + diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/HuffmanTree.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/HuffmanTree.cs new file mode 100644 index 000000000000..a45cf603d6ca --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/HuffmanTree.cs @@ -0,0 +1,326 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Diagnostics; + +namespace System.IO.Compression +{ + // Strictly speaking this class is not a HuffmanTree, this class is + // a lookup table combined with a HuffmanTree. The idea is to speed up + // the lookup for short symbols (they should appear more frequently ideally.) + // However we don't want to create a huge table since it might take longer to + // build the table than decoding (Deflate usually generates new tables frequently.) + // + // Jean-loup Gailly and Mark Adler gave a very good explanation about this. + // The full text (algorithm.txt) can be found inside + // ftp://ftp.uu.net/pub/archiving/zip/zlib/zlib.zip. + // + // Following paper explains decoding in details: + // Hirschberg and Lelewer, "Efficient decoding of prefix codes," + // Comm. ACM, 33,4, April 1990, pp. 449-459. + // + + internal class HuffmanTree + { + internal const int MaxLiteralTreeElements = 288; + internal const int MaxDistTreeElements = 32; + internal const int EndOfBlockCode = 256; + internal const int NumberOfCodeLengthTreeElements = 19; + + private int _tableBits; + private short[] _table; + private short[] _left; + private short[] _right; + private byte[] _codeLengthArray; +#if DEBUG + private uint[] _codeArrayDebug; +#endif + + private int _tableMask; + + // huffman tree for static block + private static HuffmanTree s_staticLiteralLengthTree; + private static HuffmanTree s_staticDistanceTree; + + static HuffmanTree() + { + // construct the static literal tree and distance tree + s_staticLiteralLengthTree = new HuffmanTree(GetStaticLiteralTreeLength()); + s_staticDistanceTree = new HuffmanTree(GetStaticDistanceTreeLength()); + } + + static public HuffmanTree StaticLiteralLengthTree + { + get + { + return s_staticLiteralLengthTree; + } + } + + static public HuffmanTree StaticDistanceTree + { + get + { + return s_staticDistanceTree; + } + } + + public HuffmanTree(byte[] codeLengths) + { + Debug.Assert(codeLengths.Length == MaxLiteralTreeElements + || codeLengths.Length == MaxDistTreeElements + || codeLengths.Length == NumberOfCodeLengthTreeElements, + "we only expect three kinds of Length here"); + _codeLengthArray = codeLengths; + + if (_codeLengthArray.Length == MaxLiteralTreeElements) + { // bits for Literal/Length tree table + _tableBits = 9; + } + else + { // bits for distance tree table and code length tree table + _tableBits = 7; + } + _tableMask = (1 << _tableBits) - 1; + + CreateTable(); + } + + + // Generate the array contains huffman codes lengths for static huffman tree. + // The data is in RFC 1951. + private static byte[] GetStaticLiteralTreeLength() + { + byte[] literalTreeLength = new byte[MaxLiteralTreeElements]; + for (int i = 0; i <= 143; i++) + literalTreeLength[i] = 8; + + for (int i = 144; i <= 255; i++) + literalTreeLength[i] = 9; + + for (int i = 256; i <= 279; i++) + literalTreeLength[i] = 7; + + for (int i = 280; i <= 287; i++) + literalTreeLength[i] = 8; + + return literalTreeLength; + } + + private static byte[] GetStaticDistanceTreeLength() + { + byte[] staticDistanceTreeLength = new byte[MaxDistTreeElements]; + for (int i = 0; i < MaxDistTreeElements; i++) + { + staticDistanceTreeLength[i] = 5; + } + return staticDistanceTreeLength; + } + + + // Calculate the huffman code for each character based on the code length for each character. + // This algorithm is described in standard RFC 1951 + private uint[] CalculateHuffmanCode() + { + uint[] bitLengthCount = new uint[17]; + foreach (int codeLength in _codeLengthArray) + { + bitLengthCount[codeLength]++; + } + bitLengthCount[0] = 0; // clear count for length 0 + + uint[] nextCode = new uint[17]; + uint tempCode = 0; + for (int bits = 1; bits <= 16; bits++) + { + tempCode = (tempCode + bitLengthCount[bits - 1]) << 1; + nextCode[bits] = tempCode; + } + + uint[] code = new uint[MaxLiteralTreeElements]; + for (int i = 0; i < _codeLengthArray.Length; i++) + { + int len = _codeLengthArray[i]; + + if (len > 0) + { + code[i] = FastEncoderStatics.BitReverse(nextCode[len], len); + nextCode[len]++; + } + } + return code; + } + + private void CreateTable() + { + uint[] codeArray = CalculateHuffmanCode(); + _table = new short[1 << _tableBits]; +#if DEBUG + _codeArrayDebug = codeArray; +#endif + + // I need to find proof that left and right array will always be + // enough. I think they are. + _left = new short[2 * _codeLengthArray.Length]; + _right = new short[2 * _codeLengthArray.Length]; + short avail = (short)_codeLengthArray.Length; + + for (int ch = 0; ch < _codeLengthArray.Length; ch++) + { + // length of this code + int len = _codeLengthArray[ch]; + if (len > 0) + { + // start value (bit reversed) + int start = (int)codeArray[ch]; + + if (len <= _tableBits) + { + // If a particular symbol is shorter than nine bits, + // then that symbol's translation is duplicated + // in all those entries that start with that symbol's bits. + // For example, if the symbol is four bits, then it's duplicated + // 32 times in a nine-bit table. If a symbol is nine bits long, + // it appears in the table once. + // + // Make sure that in the loop below, code is always + // less than table_size. + // + // On last iteration we store at array index: + // initial_start_at + (locs-1)*increment + // = initial_start_at + locs*increment - increment + // = initial_start_at + (1 << tableBits) - increment + // = initial_start_at + table_size - increment + // + // Therefore we must ensure: + // initial_start_at + table_size - increment < table_size + // or: initial_start_at < increment + // + int increment = 1 << len; + if (start >= increment) + { + throw new InvalidDataException(SR.InvalidHuffmanData); + } + + // Note the bits in the table are reverted. + int locs = 1 << (_tableBits - len); + for (int j = 0; j < locs; j++) + { + _table[start] = (short)ch; + start += increment; + } + } + else + { + // For any code which has length longer than num_elements, + // build a binary tree. + + int overflowBits = len - _tableBits; // the nodes we need to respent the data. + int codeBitMask = 1 << _tableBits; // mask to get current bit (the bits can't fit in the table) + + // the left, right table is used to repesent the + // the rest bits. When we got the first part (number bits.) and look at + // tbe table, we will need to follow the tree to find the real character. + // This is in place to avoid bloating the table if there are + // a few ones with long code. + int index = start & ((1 << _tableBits) - 1); + short[] array = _table; + + do + { + short value = array[index]; + + if (value == 0) + { // set up next pointer if this node is not used before. + array[index] = (short)-avail; // use next available slot. + value = (short)-avail; + avail++; + } + + if (value > 0) + { // prevent an IndexOutOfRangeException from array[index] + throw new InvalidDataException(SR.InvalidHuffmanData); + } + + Debug.Assert(value < 0, "CreateTable: Only negative numbers are used for tree pointers!"); + + if ((start & codeBitMask) == 0) + { // if current bit is 0, go change the left array + array = _left; + } + else + { // if current bit is 1, set value in the right array + array = _right; + } + index = -value; // go to next node + + codeBitMask <<= 1; + overflowBits--; + } while (overflowBits != 0); + + array[index] = (short)ch; + } + } + } + } + + // + // This function will try to get enough bits from input and + // try to decode the bits. + // If there are no enought bits in the input, this function will return -1. + // + public int GetNextSymbol(InputBuffer input) + { + // Try to load 16 bits into input buffer if possible and get the bitBuffer value. + // If there aren't 16 bits available we will return all we have in the + // input buffer. + uint bitBuffer = input.TryLoad16Bits(); + if (input.AvailableBits == 0) + { // running out of input. + return -1; + } + + // decode an element + int symbol = _table[bitBuffer & _tableMask]; + if (symbol < 0) + { // this will be the start of the binary tree + // navigate the tree + uint mask = (uint)1 << _tableBits; + do + { + symbol = -symbol; + if ((bitBuffer & mask) == 0) + symbol = _left[symbol]; + else + symbol = _right[symbol]; + mask <<= 1; + } while (symbol < 0); + } + + int codeLength = _codeLengthArray[symbol]; + + // huffman code lengths must be at least 1 bit long + if (codeLength <= 0) + { + throw new InvalidDataException(SR.InvalidHuffmanData); + } + + // + // If this code is longer than the # bits we had in the bit buffer (i.e. + // we read only part of the code), we can hit the entry in the table or the tree + // for another symbol. However the length of another symbol will not match the + // available bits count. + if (codeLength > input.AvailableBits) + { + // We already tried to load 16 bits and maximum length is 15, + // so this means we are running out of input. + return -1; + } + + input.SkipBits(codeLength); + return symbol; + } + } +} \ No newline at end of file diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/InflaterManaged.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/InflaterManaged.cs new file mode 100644 index 000000000000..7da6bbb22fb6 --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/InflaterManaged.cs @@ -0,0 +1,766 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// +// zlib.h -- interface of the 'zlib' general purpose compression library +// version 1.2.1, November 17th, 2003 +// +// Copyright (C) 1995-2003 Jean-loup Gailly and Mark Adler +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must not +// claim that you wrote the original software. If you use this software +// in a product, an acknowledgment in the product documentation would be +// appreciated but is not required. +// 2. Altered source versions must be plainly marked as such, and must not be +// misrepresented as being the original software. +// 3. This notice may not be removed or altered from any source distribution. +// +// + +using System.Diagnostics; + +namespace System.IO.Compression +{ + internal class InflaterManaged + { + // const tables used in decoding: + + // Extra bits for length code 257 - 285. + private static readonly byte[] s_extraLengthBits = { + 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,16,56,62}; + + // The base length for length code 257 - 285. + // The formula to get the real length for a length code is lengthBase[code - 257] + (value stored in extraBits) + private static readonly int[] s_lengthBase = { + 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,3,0,0}; + + + // The base distance for distance code 0 - 31 + // The real distance for a distance code is distanceBasePosition[code] + (value stored in extraBits) + private static readonly int[] s_distanceBasePosition = { + 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,32769,49153}; + + // code lengths for code length alphabet is stored in following order + private static readonly byte[] s_codeOrder = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; + + private static readonly byte[] s_staticDistanceTreeTable = { + 0x00,0x10,0x08,0x18,0x04,0x14,0x0c,0x1c,0x02,0x12,0x0a,0x1a, + 0x06,0x16,0x0e,0x1e,0x01,0x11,0x09,0x19,0x05,0x15,0x0d,0x1d, + 0x03,0x13,0x0b,0x1b,0x07,0x17,0x0f,0x1f, + }; + + private OutputWindow _output; + private InputBuffer _input; + private HuffmanTree _literalLengthTree; + private HuffmanTree _distanceTree; + + private InflaterState _state; + private bool _hasFormatReader; + private int _bfinal; + private BlockType _blockType; + + // uncompressed block + private byte[] _blockLengthBuffer = new byte[4]; + private int _blockLength; + + // compressed block + private int _length; + private int _distanceCode; + private int _extraBits; + + private int _loopCounter; + private int _literalLengthCodeCount; + private int _distanceCodeCount; + private int _codeLengthCodeCount; + private int _codeArraySize; + private int _lengthCode; + + private byte[] _codeList; // temporary array to store the code length for literal/Length and distance + private byte[] _codeLengthTreeCodeLength; + private bool _deflate64; + private HuffmanTree _codeLengthTree; + + private IFileFormatReader _formatReader; // class to decode header and footer (e.g. gzip) + + public InflaterManaged(bool deflate64) + { + _output = new OutputWindow(); + _input = new InputBuffer(); + + _codeList = new byte[HuffmanTree.MaxLiteralTreeElements + HuffmanTree.MaxDistTreeElements]; + _codeLengthTreeCodeLength = new byte[HuffmanTree.NumberOfCodeLengthTreeElements]; + _deflate64 = deflate64; + Reset(); + } + + internal InflaterManaged(IFileFormatReader reader, bool deflate64) + { + _output = new OutputWindow(); + _input = new InputBuffer(); + + _codeList = new byte[HuffmanTree.MaxLiteralTreeElements + HuffmanTree.MaxDistTreeElements]; + _codeLengthTreeCodeLength = new byte[HuffmanTree.NumberOfCodeLengthTreeElements]; + _deflate64 = deflate64; + if (reader != null) + { + _formatReader = reader; + _hasFormatReader = true; + } + Reset(); + } + + public void SetFileFormatReader(IFileFormatReader reader) + { + _formatReader = reader; + _hasFormatReader = true; + Reset(); + } + + private void Reset() + { + if (_hasFormatReader) + { + _state = InflaterState.ReadingHeader; // start by reading Header info + } + else + { + _state = InflaterState.ReadingBFinal; // start by reading BFinal bit + } + } + + public void SetInput(byte[] inputBytes, int offset, int length) + { + _input.SetInput(inputBytes, offset, length); // append the bytes + } + + public bool Finished() + { + return (_state == InflaterState.Done || _state == InflaterState.VerifyingFooter); + } + + public int AvailableOutput + { + get + { + return _output.AvailableBytes; + } + } + + public bool NeedsInput() + { + return _input.NeedsInput(); + } + + public int Inflate(byte[] bytes, int offset, int length) + { + // copy bytes from output to outputbytes if we have available bytes + // if buffer is not filled up. keep decoding until no input are available + // if decodeBlock returns false. Throw an exception. + int count = 0; + do + { + int copied = _output.CopyTo(bytes, offset, length); + if (copied > 0) + { + if (_hasFormatReader) + { + _formatReader.UpdateWithBytesRead(bytes, offset, copied); + } + + offset += copied; + count += copied; + length -= copied; + } + + if (length == 0) + { // filled in the bytes array + break; + } + // Decode will return false when more input is needed + } while (!Finished() && Decode()); + + if (_state == InflaterState.VerifyingFooter) + { // finished reading CRC + // In this case finished is true and output window has all the data. + // But some data in output window might not be copied out. + if (_output.AvailableBytes == 0) + { + _formatReader.Validate(); + } + } + + return count; + } + + //Each block of compressed data begins with 3 header bits + // containing the following data: + // first bit BFINAL + // next 2 bits BTYPE + // Note that the header bits do not necessarily begin on a byte + // boundary, since a block does not necessarily occupy an integral + // number of bytes. + // BFINAL is set if and only if this is the last block of the data + // set. + // BTYPE specifies how the data are compressed, as follows: + // 00 - no compression + // 01 - compressed with fixed Huffman codes + // 10 - compressed with dynamic Huffman codes + // 11 - reserved (error) + // The only difference between the two compressed cases is how the + // Huffman codes for the literal/length and distance alphabets are + // defined. + // + // This function returns true for success (end of block or output window is full,) + // false if we are short of input + // + private bool Decode() + { + bool eob = false; + bool result = false; + + if (Finished()) + { + return true; + } + + if (_hasFormatReader) + { + if (_state == InflaterState.ReadingHeader) + { + if (!_formatReader.ReadHeader(_input)) + { + return false; + } + _state = InflaterState.ReadingBFinal; + } + else if (_state == InflaterState.StartReadingFooter || _state == InflaterState.ReadingFooter) + { + if (!_formatReader.ReadFooter(_input)) + return false; + + _state = InflaterState.VerifyingFooter; + return true; + } + } + + if (_state == InflaterState.ReadingBFinal) + { // reading bfinal bit + // Need 1 bit + if (!_input.EnsureBitsAvailable(1)) + return false; + + _bfinal = _input.GetBits(1); + _state = InflaterState.ReadingBType; + } + + if (_state == InflaterState.ReadingBType) + { + // Need 2 bits + if (!_input.EnsureBitsAvailable(2)) + { + _state = InflaterState.ReadingBType; + return false; + } + + _blockType = (BlockType)_input.GetBits(2); + if (_blockType == BlockType.Dynamic) + { + _state = InflaterState.ReadingNumLitCodes; + } + else if (_blockType == BlockType.Static) + { + _literalLengthTree = HuffmanTree.StaticLiteralLengthTree; + _distanceTree = HuffmanTree.StaticDistanceTree; + _state = InflaterState.DecodeTop; + } + else if (_blockType == BlockType.Uncompressed) + { + _state = InflaterState.UncompressedAligning; + } + else + { + throw new InvalidDataException(SR.UnknownBlockType); + } + } + + if (_blockType == BlockType.Dynamic) + { + if (_state < InflaterState.DecodeTop) + { // we are reading the header + result = DecodeDynamicBlockHeader(); + } + else + { + result = DecodeBlock(out eob); // this can returns true when output is full + } + } + else if (_blockType == BlockType.Static) + { + result = DecodeBlock(out eob); + } + else if (_blockType == BlockType.Uncompressed) + { + result = DecodeUncompressedBlock(out eob); + } + else + { + throw new InvalidDataException(SR.UnknownBlockType); + } + + // + // If we reached the end of the block and the block we were decoding had + // bfinal=1 (final block) + // + if (eob && (_bfinal != 0)) + { + if (_hasFormatReader) + _state = InflaterState.StartReadingFooter; + else + _state = InflaterState.Done; + } + return result; + } + + + // Format of Non-compressed blocks (BTYPE=00): + // + // Any bits of input up to the next byte boundary are ignored. + // The rest of the block consists of the following information: + // + // 0 1 2 3 4... + // +---+---+---+---+================================+ + // | LEN | NLEN |... LEN bytes of literal data...| + // +---+---+---+---+================================+ + // + // LEN is the number of data bytes in the block. NLEN is the + // one's complement of LEN. + + private bool DecodeUncompressedBlock(out bool end_of_block) + { + end_of_block = false; + while (true) + { + switch (_state) + { + case InflaterState.UncompressedAligning: // intial state when calling this function + // we must skip to a byte boundary + _input.SkipToByteBoundary(); + _state = InflaterState.UncompressedByte1; + goto case InflaterState.UncompressedByte1; + + case InflaterState.UncompressedByte1: // decoding block length + case InflaterState.UncompressedByte2: + case InflaterState.UncompressedByte3: + case InflaterState.UncompressedByte4: + int bits = _input.GetBits(8); + if (bits < 0) + { + return false; + } + + _blockLengthBuffer[_state - InflaterState.UncompressedByte1] = (byte)bits; + if (_state == InflaterState.UncompressedByte4) + { + _blockLength = _blockLengthBuffer[0] + ((int)_blockLengthBuffer[1]) * 256; + int blockLengthComplement = _blockLengthBuffer[2] + ((int)_blockLengthBuffer[3]) * 256; + + // make sure complement matches + if ((ushort)_blockLength != (ushort)(~blockLengthComplement)) + { + throw new InvalidDataException(SR.InvalidBlockLength); + } + } + + _state += 1; + break; + + case InflaterState.DecodingUncompressed: // copying block data + + // Directly copy bytes from input to output. + int bytesCopied = _output.CopyFrom(_input, _blockLength); + _blockLength -= bytesCopied; + + if (_blockLength == 0) + { + // Done with this block, need to re-init bit buffer for next block + _state = InflaterState.ReadingBFinal; + end_of_block = true; + return true; + } + + // We can fail to copy all bytes for two reasons: + // Running out of Input + // running out of free space in output window + if (_output.FreeBytes == 0) + { + return true; + } + + return false; + + default: + Debug.Assert(false, "check why we are here!"); + throw new InvalidDataException(SR.UnknownState); + } + } + } + + private bool DecodeBlock(out bool end_of_block_code_seen) + { + end_of_block_code_seen = false; + + int freeBytes = _output.FreeBytes; // it is a little bit faster than frequently accessing the property + while (freeBytes > 258) + { + // 258 means we can safely do decoding since maximum repeat length is 258 + + int symbol; + switch (_state) + { + case InflaterState.DecodeTop: + // decode an element from the literal tree + + // TODO: optimize this!!! + symbol = _literalLengthTree.GetNextSymbol(_input); + if (symbol < 0) + { // running out of input + return false; + } + + if (symbol < 256) + { // literal + _output.Write((byte)symbol); + --freeBytes; + } + else if (symbol == 256) + { // end of block + end_of_block_code_seen = true; + // Reset state + _state = InflaterState.ReadingBFinal; + return true; // *********** + } + else + { // length/distance pair + symbol -= 257; // length code started at 257 + if (symbol < 8) + { + symbol += 3; // match length = 3,4,5,6,7,8,9,10 + _extraBits = 0; + } + else if (!_deflate64 && symbol == 28) + { // extra bits for code 285 is 0 + symbol = 258; // code 285 means length 258 + _extraBits = 0; + } + else + { + if (symbol < 0 || symbol >= s_extraLengthBits.Length) + { + throw new InvalidDataException(SR.GenericInvalidData); + } + _extraBits = s_extraLengthBits[symbol]; + Debug.Assert(_extraBits != 0, "We handle other cases seperately!"); + } + _length = symbol; + goto case InflaterState.HaveInitialLength; + } + break; + + case InflaterState.HaveInitialLength: + if (_extraBits > 0) + { + _state = InflaterState.HaveInitialLength; + int bits = _input.GetBits(_extraBits); + if (bits < 0) + { + return false; + } + + if (_length < 0 || _length >= s_lengthBase.Length) + { + throw new InvalidDataException(SR.GenericInvalidData); + } + _length = s_lengthBase[_length] + bits; + } + _state = InflaterState.HaveFullLength; + goto case InflaterState.HaveFullLength; + + case InflaterState.HaveFullLength: + if (_blockType == BlockType.Dynamic) + { + _distanceCode = _distanceTree.GetNextSymbol(_input); + } + else + { // get distance code directly for static block + _distanceCode = _input.GetBits(5); + if (_distanceCode >= 0) + { + _distanceCode = s_staticDistanceTreeTable[_distanceCode]; + } + } + + if (_distanceCode < 0) + { // running out input + return false; + } + + _state = InflaterState.HaveDistCode; + goto case InflaterState.HaveDistCode; + + case InflaterState.HaveDistCode: + // To avoid a table lookup we note that for distanceCode >= 2, + // extra_bits = (distanceCode-2) >> 1 + int offset; + if (_distanceCode > 3) + { + _extraBits = (_distanceCode - 2) >> 1; + int bits = _input.GetBits(_extraBits); + if (bits < 0) + { + return false; + } + offset = s_distanceBasePosition[_distanceCode] + bits; + } + else + { + offset = _distanceCode + 1; + } + + Debug.Assert(freeBytes >= 258, "following operation is not safe!"); + _output.WriteLengthDistance(_length, offset); + freeBytes -= _length; + _state = InflaterState.DecodeTop; + break; + + default: + Debug.Assert(false, "check why we are here!"); + throw new InvalidDataException(SR.UnknownState); + } + } + + return true; + } + + + // Format of the dynamic block header: + // 5 Bits: HLIT, # of Literal/Length codes - 257 (257 - 286) + // 5 Bits: HDIST, # of Distance codes - 1 (1 - 32) + // 4 Bits: HCLEN, # of Code Length codes - 4 (4 - 19) + // + // (HCLEN + 4) x 3 bits: code lengths for the code length + // alphabet given just above, in the order: 16, 17, 18, + // 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 + // + // These code lengths are interpreted as 3-bit integers + // (0-7); as above, a code length of 0 means the + // corresponding symbol (literal/length or distance code + // length) is not used. + // + // HLIT + 257 code lengths for the literal/length alphabet, + // encoded using the code length Huffman code + // + // HDIST + 1 code lengths for the distance alphabet, + // encoded using the code length Huffman code + // + // The code length repeat codes can cross from HLIT + 257 to the + // HDIST + 1 code lengths. In other words, all code lengths form + // a single sequence of HLIT + HDIST + 258 values. + private bool DecodeDynamicBlockHeader() + { + switch (_state) + { + case InflaterState.ReadingNumLitCodes: + _literalLengthCodeCount = _input.GetBits(5); + if (_literalLengthCodeCount < 0) + { + return false; + } + _literalLengthCodeCount += 257; + _state = InflaterState.ReadingNumDistCodes; + goto case InflaterState.ReadingNumDistCodes; + + case InflaterState.ReadingNumDistCodes: + _distanceCodeCount = _input.GetBits(5); + if (_distanceCodeCount < 0) + { + return false; + } + _distanceCodeCount += 1; + _state = InflaterState.ReadingNumCodeLengthCodes; + goto case InflaterState.ReadingNumCodeLengthCodes; + + case InflaterState.ReadingNumCodeLengthCodes: + _codeLengthCodeCount = _input.GetBits(4); + if (_codeLengthCodeCount < 0) + { + return false; + } + _codeLengthCodeCount += 4; + _loopCounter = 0; + _state = InflaterState.ReadingCodeLengthCodes; + goto case InflaterState.ReadingCodeLengthCodes; + + case InflaterState.ReadingCodeLengthCodes: + while (_loopCounter < _codeLengthCodeCount) + { + int bits = _input.GetBits(3); + if (bits < 0) + { + return false; + } + _codeLengthTreeCodeLength[s_codeOrder[_loopCounter]] = (byte)bits; + ++_loopCounter; + } + + for (int i = _codeLengthCodeCount; i < s_codeOrder.Length; i++) + { + _codeLengthTreeCodeLength[s_codeOrder[i]] = 0; + } + + // create huffman tree for code length + _codeLengthTree = new HuffmanTree(_codeLengthTreeCodeLength); + _codeArraySize = _literalLengthCodeCount + _distanceCodeCount; + _loopCounter = 0; // reset loop count + + _state = InflaterState.ReadingTreeCodesBefore; + goto case InflaterState.ReadingTreeCodesBefore; + + case InflaterState.ReadingTreeCodesBefore: + case InflaterState.ReadingTreeCodesAfter: + while (_loopCounter < _codeArraySize) + { + if (_state == InflaterState.ReadingTreeCodesBefore) + { + if ((_lengthCode = _codeLengthTree.GetNextSymbol(_input)) < 0) + { + return false; + } + } + + // The alphabet for code lengths is as follows: + // 0 - 15: Represent code lengths of 0 - 15 + // 16: Copy the previous code length 3 - 6 times. + // The next 2 bits indicate repeat length + // (0 = 3, ... , 3 = 6) + // Example: Codes 8, 16 (+2 bits 11), + // 16 (+2 bits 10) will expand to + // 12 code lengths of 8 (1 + 6 + 5) + // 17: Repeat a code length of 0 for 3 - 10 times. + // (3 bits of length) + // 18: Repeat a code length of 0 for 11 - 138 times + // (7 bits of length) + if (_lengthCode <= 15) + { + _codeList[_loopCounter++] = (byte)_lengthCode; + } + else + { + int repeatCount; + if (_lengthCode == 16) + { + if (!_input.EnsureBitsAvailable(2)) + { + _state = InflaterState.ReadingTreeCodesAfter; + return false; + } + + if (_loopCounter == 0) + { // can't have "prev code" on first code + throw new InvalidDataException(); + } + + byte previousCode = _codeList[_loopCounter - 1]; + repeatCount = _input.GetBits(2) + 3; + + if (_loopCounter + repeatCount > _codeArraySize) + { + throw new InvalidDataException(); + } + + for (int j = 0; j < repeatCount; j++) + { + _codeList[_loopCounter++] = previousCode; + } + } + else if (_lengthCode == 17) + { + if (!_input.EnsureBitsAvailable(3)) + { + _state = InflaterState.ReadingTreeCodesAfter; + return false; + } + + repeatCount = _input.GetBits(3) + 3; + + if (_loopCounter + repeatCount > _codeArraySize) + { + throw new InvalidDataException(); + } + + for (int j = 0; j < repeatCount; j++) + { + _codeList[_loopCounter++] = 0; + } + } + else + { // code == 18 + if (!_input.EnsureBitsAvailable(7)) + { + _state = InflaterState.ReadingTreeCodesAfter; + return false; + } + + repeatCount = _input.GetBits(7) + 11; + + if (_loopCounter + repeatCount > _codeArraySize) + { + throw new InvalidDataException(); + } + + for (int j = 0; j < repeatCount; j++) + { + _codeList[_loopCounter++] = 0; + } + } + } + _state = InflaterState.ReadingTreeCodesBefore; // we want to read the next code. + } + break; + + default: + Debug.Assert(false, "check why we are here!"); + throw new InvalidDataException(SR.UnknownState); + } + + byte[] literalTreeCodeLength = new byte[HuffmanTree.MaxLiteralTreeElements]; + byte[] distanceTreeCodeLength = new byte[HuffmanTree.MaxDistTreeElements]; + + // Create literal and distance tables + Array.Copy(_codeList, 0, literalTreeCodeLength, 0, _literalLengthCodeCount); + Array.Copy(_codeList, _literalLengthCodeCount, distanceTreeCodeLength, 0, _distanceCodeCount); + + // Make sure there is an end-of-block code, otherwise how could we ever end? + if (literalTreeCodeLength[HuffmanTree.EndOfBlockCode] == 0) + { + throw new InvalidDataException(); + } + + _literalLengthTree = new HuffmanTree(literalTreeCodeLength); + _distanceTree = new HuffmanTree(distanceTreeCodeLength); + _state = InflaterState.DecodeTop; + return true; + } + + public void Dispose() { } + } +} + diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/InflaterState.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/InflaterState.cs new file mode 100644 index 000000000000..4610aa0e6fab --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/InflaterState.cs @@ -0,0 +1,42 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.IO.Compression +{ + // Do not rearrange the enum values. + internal enum InflaterState + { + ReadingHeader = 0, // Only applies to GZIP + + ReadingBFinal = 2, // About to read bfinal bit + ReadingBType = 3, // About to read blockType bits + + ReadingNumLitCodes = 4, // About to read # literal codes + ReadingNumDistCodes = 5, // About to read # dist codes + ReadingNumCodeLengthCodes = 6, // About to read # code length codes + ReadingCodeLengthCodes = 7, // In the middle of reading the code length codes + ReadingTreeCodesBefore = 8, // In the middle of reading tree codes (loop top) + ReadingTreeCodesAfter = 9, // In the middle of reading tree codes (extension; code > 15) + + DecodeTop = 10, // About to decode a literal (char/match) in a compressed block + HaveInitialLength = 11, // Decoding a match, have the literal code (base length) + HaveFullLength = 12, // Ditto, now have the full match length (incl. extra length bits) + HaveDistCode = 13, // Ditto, now have the distance code also, need extra dist bits + + /* uncompressed blocks */ + UncompressedAligning = 15, + UncompressedByte1 = 16, + UncompressedByte2 = 17, + UncompressedByte3 = 18, + UncompressedByte4 = 19, + DecodingUncompressed = 20, + + // These three apply only to GZIP + StartReadingFooter = 21, // (Initialisation for reading footer) + ReadingFooter = 22, + VerifyingFooter = 23, + + Done = 24 // Finished + } +} diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/InputBuffer.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/InputBuffer.cs new file mode 100644 index 000000000000..49462211aecf --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/InputBuffer.cs @@ -0,0 +1,214 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Diagnostics; + +namespace System.IO.Compression +{ + // This class can be used to read bits from an byte array quickly. + // Normally we get bits from 'bitBuffer' field and bitsInBuffer stores + // the number of bits available in 'BitBuffer'. + // When we used up the bits in bitBuffer, we will try to get byte from + // the byte array and copy the byte to appropiate position in bitBuffer. + // + // The byte array is not reused. We will go from 'start' to 'end'. + // When we reach the end, most read operations will return -1, + // which means we are running out of input. + + internal class InputBuffer + { + private byte[] _buffer; // byte array to store input + private int _start; // start poisition of the buffer + private int _end; // end position of the buffer + private uint _bitBuffer = 0; // store the bits here, we can quickly shift in this buffer + private int _bitsInBuffer = 0; // number of bits available in bitBuffer + + // Total bits available in the input buffer + public int AvailableBits + { + get + { + return _bitsInBuffer; + } + } + + // Total bytes available in the input buffer + public int AvailableBytes + { + get + { + return (_end - _start) + (_bitsInBuffer / 8); + } + } + + // Ensure that count bits are in the bit buffer. + // Returns false if input is not sufficient to make this true. + // Count can be up to 16. + public bool EnsureBitsAvailable(int count) + { + Debug.Assert(0 < count && count <= 16, "count is invalid."); + + // manual inlining to improve perf + if (_bitsInBuffer < count) + { + if (NeedsInput()) + { + return false; + } + // insert a byte to bitbuffer + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + + if (_bitsInBuffer < count) + { + if (NeedsInput()) + { + return false; + } + // insert a byte to bitbuffer + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + } + } + + return true; + } + + // This function will try to load 16 or more bits into bitBuffer. + // It returns whatever is contained in bitBuffer after loading. + // The main difference between this and GetBits is that this will + // never return -1. So the caller needs to check AvailableBits to + // see how many bits are available. + public uint TryLoad16Bits() + { + if (_bitsInBuffer < 8) + { + if (_start < _end) + { + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + } + + if (_start < _end) + { + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + } + } + else if (_bitsInBuffer < 16) + { + if (_start < _end) + { + _bitBuffer |= (uint)_buffer[_start++] << _bitsInBuffer; + _bitsInBuffer += 8; + } + } + + return _bitBuffer; + } + + private uint GetBitMask(int count) + { + return ((uint)1 << count) - 1; + } + + // Gets count bits from the input buffer. Returns -1 if not enough bits available. + public int GetBits(int count) + { + Debug.Assert(0 < count && count <= 16, "count is invalid."); + + if (!EnsureBitsAvailable(count)) + { + return -1; + } + + int result = (int)(_bitBuffer & GetBitMask(count)); + _bitBuffer >>= count; + _bitsInBuffer -= count; + return result; + } + + /// Copies length bytes from input buffer to output buffer starting + /// at output[offset]. You have to make sure, that the buffer is + /// byte aligned. If not enough bytes are available, copies fewer + /// bytes. + /// Returns the number of bytes copied, 0 if no byte is available. + public int CopyTo(byte[] output, int offset, int length) + { + Debug.Assert(output != null, ""); + Debug.Assert(offset >= 0, ""); + Debug.Assert(length >= 0, ""); + Debug.Assert(offset <= output.Length - length, ""); + Debug.Assert((_bitsInBuffer % 8) == 0, ""); + + // Copy the bytes in bitBuffer first. + int bytesFromBitBuffer = 0; + while (_bitsInBuffer > 0 && length > 0) + { + output[offset++] = (byte)_bitBuffer; + _bitBuffer >>= 8; + _bitsInBuffer -= 8; + length--; + bytesFromBitBuffer++; + } + + if (length == 0) + { + return bytesFromBitBuffer; + } + + int avail = _end - _start; + if (length > avail) + { + length = avail; + } + + Array.Copy(_buffer, _start, output, offset, length); + _start += length; + return bytesFromBitBuffer + length; + } + + // Return true is all input bytes are used. + // This means the caller can call SetInput to add more input. + public bool NeedsInput() + { + return _start == _end; + } + + // Set the byte array to be processed. + // All the bits remained in bitBuffer will be processed before the new bytes. + // We don't clone the byte array here since it is expensive. + // The caller should make sure after a buffer is passed in. + // It will not be changed before calling this function again. + + public void SetInput(byte[] buffer, int offset, int length) + { + Debug.Assert(buffer != null, ""); + Debug.Assert(offset >= 0, ""); + Debug.Assert(length >= 0, ""); + Debug.Assert(offset <= buffer.Length - length, ""); + Debug.Assert(_start == _end, ""); + + _buffer = buffer; + _start = offset; + _end = offset + length; + } + + // Skip n bits in the buffer + public void SkipBits(int n) + { + Debug.Assert(_bitsInBuffer >= n, "No enough bits in the buffer, Did you call EnsureBitsAvailable?"); + _bitBuffer >>= n; + _bitsInBuffer -= n; + } + + // Skips to the next byte boundary. + public void SkipToByteBoundary() + { + _bitBuffer >>= (_bitsInBuffer % 8); + _bitsInBuffer = _bitsInBuffer - (_bitsInBuffer % 8); + } + } +} diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/Match.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/Match.cs new file mode 100644 index 000000000000..e59de988abe1 --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/Match.cs @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +namespace System.IO.Compression +{ + // This class represents a match in the history window + internal class Match + { + private MatchState _state; + private int _pos; + private int _len; + private byte _symbol; + + internal MatchState State + { + get { return _state; } + set { _state = value; } + } + + internal int Position + { + get { return _pos; } + set { _pos = value; } + } + + internal int Length + { + get { return _len; } + set { _len = value; } + } + + internal byte Symbol + { + get { return _symbol; } + set { _symbol = value; } + } + } +} \ No newline at end of file diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/MatchState.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/MatchState.cs new file mode 100644 index 000000000000..77524cedd858 --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/MatchState.cs @@ -0,0 +1,10 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +internal enum MatchState +{ + HasSymbol = 1, + HasMatch = 2, + HasSymbolAndMatch = 3 +} diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/OutputBuffer.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/OutputBuffer.cs new file mode 100644 index 000000000000..119b3457cd3f --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/OutputBuffer.cs @@ -0,0 +1,141 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.Diagnostics; + +namespace System.IO.Compression +{ + internal class OutputBuffer + { + private byte[] _byteBuffer; // buffer for storing bytes + private int _pos; // position + private uint _bitBuf; // store uncomplete bits + private int _bitCount; // number of bits in bitBuffer + + // set the output buffer we will be using + internal void UpdateBuffer(byte[] output) + { + _byteBuffer = output; + _pos = 0; + } + + internal int BytesWritten + { + get + { + return _pos; + } + } + + internal int FreeBytes + { + get + { + return _byteBuffer.Length - _pos; + } + } + + internal void WriteUInt16(ushort value) + { + Debug.Assert(FreeBytes >= 2, "No enough space in output buffer!"); + + _byteBuffer[_pos++] = (byte)value; + _byteBuffer[_pos++] = (byte)(value >> 8); + } + + internal void WriteBits(int n, uint bits) + { + Debug.Assert(n <= 16, "length must be larger than 16!"); + _bitBuf |= bits << _bitCount; + _bitCount += n; + if (_bitCount >= 16) + { + Debug.Assert(_byteBuffer.Length - _pos >= 2, "No enough space in output buffer!"); + _byteBuffer[_pos++] = unchecked((byte)_bitBuf); + _byteBuffer[_pos++] = unchecked((byte)(_bitBuf >> 8)); + _bitCount -= 16; + _bitBuf >>= 16; + } + } + + // write the bits left in the output as bytes. + internal void FlushBits() + { + // flush bits from bit buffer to output buffer + while (_bitCount >= 8) + { + _byteBuffer[_pos++] = unchecked((byte)_bitBuf); + _bitCount -= 8; + _bitBuf >>= 8; + } + + if (_bitCount > 0) + { + _byteBuffer[_pos++] = unchecked((byte)_bitBuf); + _bitBuf = 0; + _bitCount = 0; + } + } + + internal void WriteBytes(byte[] byteArray, int offset, int count) + { + Debug.Assert(FreeBytes >= count, "Not enough space in output buffer!"); + // faster + if (_bitCount == 0) + { + Array.Copy(byteArray, offset, _byteBuffer, _pos, count); + _pos += count; + } + else + { + WriteBytesUnaligned(byteArray, offset, count); + } + } + + private void WriteBytesUnaligned(byte[] byteArray, int offset, int count) + { + for (int i = 0; i < count; i++) + { + byte b = byteArray[offset + i]; + WriteByteUnaligned(b); + } + } + + private void WriteByteUnaligned(byte b) + { + WriteBits(8, b); + } + + internal int BitsInBuffer + { + get + { + return (_bitCount / 8) + 1; + } + } + + internal OutputBuffer.BufferState DumpState() + { + OutputBuffer.BufferState savedState; + savedState.pos = _pos; + savedState.bitBuf = _bitBuf; + savedState.bitCount = _bitCount; + return savedState; + } + + internal void RestoreState(OutputBuffer.BufferState state) + { + _pos = state.pos; + _bitBuf = state.bitBuf; + _bitCount = state.bitCount; + } + + internal struct BufferState + { + internal int pos; // position + internal uint bitBuf; // store uncomplete bits + internal int bitCount; // number of bits in bitBuffer + } + } +} diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/OutputWindow.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/OutputWindow.cs new file mode 100644 index 000000000000..aeb6441f08a0 --- /dev/null +++ b/src/System.IO.Compression/src/System/IO/Compression/DeflateManaged/OutputWindow.cs @@ -0,0 +1,155 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Diagnostics; +using System.Globalization; + +namespace System.IO.Compression +{ + // This class maintains a window for decompressed output. + // We need to keep this because the decompressed information can be + // a literal or a length/distance pair. For length/distance pair, + // we need to look back in the output window and copy bytes from there. + // We use a byte array of WindowSize circularly. + // + internal class OutputWindow + { + private const int WindowSize = 65536; + private const int WindowMask = 65535; + + private byte[] _window = new byte[WindowSize]; //The window is 2^15 bytes + private int _end; // this is the position to where we should write next byte + private int _bytesUsed; // The number of bytes in the output window which is not consumed. + + // Add a byte to output window + public void Write(byte b) + { + Debug.Assert(_bytesUsed < WindowSize, "Can't add byte when window is full!"); + _window[_end++] = b; + _end &= WindowMask; + ++_bytesUsed; + } + + public void WriteLengthDistance(int length, int distance) + { + Debug.Assert((_bytesUsed + length) <= WindowSize, "No Enough space"); + + // move backwards distance bytes in the output stream, + // and copy length bytes from this position to the output stream. + _bytesUsed += length; + int copyStart = (_end - distance) & WindowMask; // start position for coping. + + int border = WindowSize - length; + if (copyStart <= border && _end < border) + { + if (length <= distance) + { + System.Array.Copy(_window, copyStart, _window, _end, length); + _end += length; + } + else + { + // The referenced string may overlap the current + // position; for example, if the last 2 bytes decoded have values + // X and Y, a string reference with + // adds X,Y,X,Y,X to the output stream. + while (length-- > 0) + { + _window[_end++] = _window[copyStart++]; + } + } + } + else + { // copy byte by byte + while (length-- > 0) + { + _window[_end++] = _window[copyStart++]; + _end &= WindowMask; + copyStart &= WindowMask; + } + } + } + + // Copy up to length of bytes from input directly. + // This is used for uncompressed block. + public int CopyFrom(InputBuffer input, int length) + { + length = Math.Min(Math.Min(length, WindowSize - _bytesUsed), input.AvailableBytes); + int copied; + + // We might need wrap around to copy all bytes. + int tailLen = WindowSize - _end; + if (length > tailLen) + { + // copy the first part + copied = input.CopyTo(_window, _end, tailLen); + if (copied == tailLen) + { + // only try to copy the second part if we have enough bytes in input + copied += input.CopyTo(_window, 0, length - tailLen); + } + } + else + { + // only one copy is needed if there is no wrap around. + copied = input.CopyTo(_window, _end, length); + } + + _end = (_end + copied) & WindowMask; + _bytesUsed += copied; + return copied; + } + + // Free space in output window + public int FreeBytes + { + get + { + return WindowSize - _bytesUsed; + } + } + + // bytes not consumed in output window + public int AvailableBytes + { + get + { + return _bytesUsed; + } + } + + // copy the decompressed bytes to output array. + public int CopyTo(byte[] output, int offset, int length) + { + int copy_end; + + if (length > _bytesUsed) + { // we can copy all the decompressed bytes out + copy_end = _end; + length = _bytesUsed; + } + else + { + copy_end = (_end - _bytesUsed + length) & WindowMask; // copy length of bytes + } + + int copied = length; + + int tailLen = length - copy_end; + if (tailLen > 0) + { // this means we need to copy two parts seperately + // copy tailLen bytes from the end of output window + System.Array.Copy(_window, WindowSize - tailLen, + output, offset, tailLen); + offset += tailLen; + length = copy_end; + } + System.Array.Copy(_window, copy_end - length, output, offset, length); + _bytesUsed -= copied; + Debug.Assert(_bytesUsed >= 0, "check this function and find why we copied more bytes than we have"); + return copied; + } + } +} \ No newline at end of file diff --git a/src/System.IO.Compression/src/System/IO/Compression/DeflateStream.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/DeflateStream.cs similarity index 100% rename from src/System.IO.Compression/src/System/IO/Compression/DeflateStream.cs rename to src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/DeflateStream.cs diff --git a/src/System.IO.Compression/src/System/IO/Compression/Deflater.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/Deflater.cs similarity index 100% rename from src/System.IO.Compression/src/System/IO/Compression/Deflater.cs rename to src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/Deflater.cs diff --git a/src/System.IO.Compression/src/System/IO/Compression/Inflater.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/Inflater.cs similarity index 100% rename from src/System.IO.Compression/src/System/IO/Compression/Inflater.cs rename to src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/Inflater.cs diff --git a/src/System.IO.Compression/src/System/IO/Compression/ZLibException.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/ZLibException.cs similarity index 100% rename from src/System.IO.Compression/src/System/IO/Compression/ZLibException.cs rename to src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/ZLibException.cs diff --git a/src/System.IO.Compression/src/System/IO/Compression/ZLibNative.Unix.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/ZLibNative.Unix.cs similarity index 100% rename from src/System.IO.Compression/src/System/IO/Compression/ZLibNative.Unix.cs rename to src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/ZLibNative.Unix.cs diff --git a/src/System.IO.Compression/src/System/IO/Compression/ZLibNative.Windows.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/ZLibNative.Windows.cs similarity index 100% rename from src/System.IO.Compression/src/System/IO/Compression/ZLibNative.Windows.cs rename to src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/ZLibNative.Windows.cs diff --git a/src/System.IO.Compression/src/System/IO/Compression/ZLibNative.cs b/src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/ZLibNative.cs similarity index 100% rename from src/System.IO.Compression/src/System/IO/Compression/ZLibNative.cs rename to src/System.IO.Compression/src/System/IO/Compression/DeflateZLib/ZLibNative.cs diff --git a/src/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs b/src/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs index 49ef75203a64..979148302576 100644 --- a/src/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs +++ b/src/System.IO.Compression/src/System/IO/Compression/ZipArchiveEntry.cs @@ -425,6 +425,8 @@ private CompressionMethodValues CompressionMethod { if (value == CompressionMethodValues.Deflate) VersionToExtractAtLeast(ZipVersionNeededValues.Deflate); + else if (value == CompressionMethodValues.Deflate64) + VersionToExtractAtLeast(ZipVersionNeededValues.Deflate64); _storedCompressionMethod = value; } } @@ -673,9 +675,12 @@ private Stream GetDataDecompressor(Stream compressedStreamToRead) case CompressionMethodValues.Deflate: uncompressedStream = new DeflateStream(compressedStreamToRead, CompressionMode.Decompress); break; + case CompressionMethodValues.Deflate64: + uncompressedStream = new DeflateManagedStream(compressedStreamToRead, CompressionMethodValues.Deflate64); + break; case CompressionMethodValues.Stored: default: - //we can assume that only deflate/stored are allowed because we assume that + //we can assume that only deflate/deflate64/stored are allowed because we assume that //IsOpenable is checked before this function is called Debug.Assert(CompressionMethod == CompressionMethodValues.Stored); @@ -747,11 +752,11 @@ private Boolean IsOpenable(Boolean needToUncompress, Boolean needToLoadIntoMemor if (needToUncompress) { if (CompressionMethod != CompressionMethodValues.Stored && - CompressionMethod != CompressionMethodValues.Deflate) + CompressionMethod != CompressionMethodValues.Deflate && + CompressionMethod != CompressionMethodValues.Deflate64) { switch (CompressionMethod) { - case CompressionMethodValues.Deflate64: case CompressionMethodValues.BZip2: case CompressionMethodValues.LZMA: message = SR.Format(SR.UnsupportedCompressionMethod, CompressionMethod.ToString()); @@ -1287,7 +1292,7 @@ protected override void Dispose(Boolean disposing) [Flags] private enum BitFlagValues : ushort { DataDescriptor = 0x8, UnicodeFileName = 0x800 } - private enum CompressionMethodValues : ushort { Stored = 0x0, Deflate = 0x8, Deflate64 = 0x9, BZip2 = 0xC, LZMA = 0xE } + internal enum CompressionMethodValues : ushort { Stored = 0x0, Deflate = 0x8, Deflate64 = 0x9, BZip2 = 0xC, LZMA = 0xE } private enum OpenableValues { Openable, FileNonExistent, FileTooLarge } #endregion Nested Types diff --git a/src/System.IO.Compression/src/System/IO/Compression/ZipVersion.cs b/src/System.IO.Compression/src/System/IO/Compression/ZipVersion.cs index 08d138b9ae33..ecde702eaabe 100644 --- a/src/System.IO.Compression/src/System/IO/Compression/ZipVersion.cs +++ b/src/System.IO.Compression/src/System/IO/Compression/ZipVersion.cs @@ -4,7 +4,7 @@ namespace System.IO.Compression { - internal enum ZipVersionNeededValues : ushort { Default = 10, ExplicitDirectory = 20, Deflate = 20, Zip64 = 45 } + internal enum ZipVersionNeededValues : ushort { Default = 10, ExplicitDirectory = 20, Deflate = 20, Deflate64 = 21, Zip64 = 45 } /// /// The upper byte of the "version made by" flag in the central directory header of a zip file represents the