Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

COMPRESS-692: Add support for zstd compression in zip archives #634

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdUtils;
import org.apache.commons.compress.utils.ArchiveUtils;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.compress.utils.InputStreamStatistics;
Expand Down Expand Up @@ -293,6 +295,8 @@ public static boolean matches(final byte[] signature, final int length) {

private int entriesRead;

private ZstdCompressorInputStream zstdInputStream;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not needed, removed in git master.


/**
* The factory for extra fields or null.
*/
Expand Down Expand Up @@ -444,6 +448,9 @@ public void close() throws IOException {
closed = true;
try {
in.close();
if (zstdInputStream != null) {
zstdInputStream.close();
}
} finally {
inf.end();
}
Expand Down Expand Up @@ -772,6 +779,10 @@ public ZipArchiveEntry getNextZipEntry() throws IOException {
case ENHANCED_DEFLATED:
current.inputStream = new Deflate64CompressorInputStream(bis);
break;
case ZSTD:
case ZSTD_DEPRECATED:
current.inputStream = new ZstdCompressorInputStream(bis);
break;
default:
// we should never get here as all supported methods have been covered
// will cause an error when read is invoked, don't throw an exception here so people can
Expand Down Expand Up @@ -931,6 +942,19 @@ public int read(final byte[] buffer, final int offset, final int length) throws
} else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
|| current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
read = current.inputStream.read(buffer, offset, length);
} else if (current.entry.getMethod() == ZipMethod.ZSTD.getCode() || current.entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode()) {
if (zstdInputStream == null) {
if (ZstdUtils.isZstdCompressionAvailable()) {
try {
this.zstdInputStream = new ZstdCompressorInputStream(in);
} catch (IOException e) {
throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), current.entry);
}
} else {
throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), current.entry);
}
}
read = zstdInputStream.read(buffer, offset, length);
} else {
throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), current.entry);
}
Expand Down Expand Up @@ -1324,7 +1348,8 @@ private void skipRemainderOfArchive() throws IOException {
private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN || entry.getMethod() == ZipEntry.DEFLATED
|| entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
|| entry.getGeneralPurposeBit().usesDataDescriptor() && allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED;
|| entry.getGeneralPurposeBit().usesDataDescriptor() && allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED
|| entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode();
}

/**
Expand All @@ -1335,6 +1360,7 @@ private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
*/
private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
return !entry.getGeneralPurposeBit().usesDataDescriptor() || allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED
|| entry.getMethod() == ZipEntry.DEFLATED || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
|| entry.getMethod() == ZipEntry.DEFLATED || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
|| entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,9 @@ private byte[] createLocalFileHeader(final ZipArchiveEntry ze, final ByteBuffer
} else if (zipMethod == DEFLATED || out instanceof RandomAccessOutputStream) {
System.arraycopy(LZERO, 0, buf, LFH_COMPRESSED_SIZE_OFFSET, ZipConstants.WORD);
System.arraycopy(LZERO, 0, buf, LFH_ORIGINAL_SIZE_OFFSET, ZipConstants.WORD);
} else if (zipMethod == ZipMethod.ZSTD.getCode() || zipMethod == ZipMethod.ZSTD_DEPRECATED.getCode()) {
ZipLong.putLong(ze.getCompressedSize(), buf, LFH_COMPRESSED_SIZE_OFFSET);
ZipLong.putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET);
} else { // Stored
ZipLong.putLong(ze.getSize(), buf, LFH_COMPRESSED_SIZE_OFFSET);
ZipLong.putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET);
Expand Down Expand Up @@ -1075,6 +1078,9 @@ private boolean handleSizesAndCrc(final long bytesWritten, final long crc, final
entry.entry.setCompressedSize(bytesWritten);
entry.entry.setCrc(crc);

} else if (entry.entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode()) {
entry.entry.setCompressedSize(bytesWritten);
entry.entry.setCrc(crc);
} else if (!(out instanceof RandomAccessOutputStream)) {
if (entry.entry.getCrc() != crc) {
throw new ZipException("Bad CRC checksum for entry " + entry.entry.getName() + ": " + Long.toHexString(entry.entry.getCrc()) + " instead of "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import org.apache.commons.compress.archivers.EntryStreamOffsets;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import org.apache.commons.compress.utils.BoundedArchiveInputStream;
import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
import org.apache.commons.compress.utils.IOUtils;
Expand Down Expand Up @@ -1230,6 +1231,9 @@ public void close() throws IOException {
return new BZip2CompressorInputStream(is);
case ENHANCED_DEFLATED:
return new Deflate64CompressorInputStream(is);
case ZSTD:
case ZSTD_DEPRECATED:
return new ZstdCompressorInputStream(is);
case AES_ENCRYPTED:
case EXPANDING_LEVEL_1:
case EXPANDING_LEVEL_2:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,24 @@ public enum ZipMethod {
*/
LZMA(14),

/**
* Compression Method 20 for Zstd.
*
* @see <a href="https://github.com/facebook/zstd">https://github.com/facebook/zstd</a>
* @see <a href="https://pkwaredownloads.blob.core.windows.net/pkware-general/Documentation/APPNOTE-6.3.7.TXT">deprecated zstd compression method id</a>
* @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression method: (2 bytes)</a>
*/
ZSTD_DEPRECATED(20),

/**
* Compression Method 93 for Zstd.
*
* @see <a href="https://github.com/facebook/zstd">https://github.com/facebook/zstd</a>
* @see <a href="https://pkwaredownloads.blob.core.windows.net/pkware-general/Documentation/APPNOTE-6.3.8.TXT">changed zstd compression method id</a>
* @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression method: (2 bytes)</a>
*/
ZSTD(93),

/**
* Compression Method 95 for XZ.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,8 @@ private static boolean supportsEncryptionOf(final ZipArchiveEntry entry) {
private static boolean supportsMethodOf(final ZipArchiveEntry entry) {
return entry.getMethod() == ZipEntry.STORED || entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
|| entry.getMethod() == ZipMethod.IMPLODING.getCode() || entry.getMethod() == ZipEntry.DEFLATED
|| entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || entry.getMethod() == ZipMethod.BZIP2.getCode();
|| entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || entry.getMethod() == ZipMethod.BZIP2.getCode()
|| entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,13 @@
*/
package org.apache.commons.compress.compressors.zstandard;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.commons.compress.utils.OsgiUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.CountingOutputStream;

/**
* Utility code for the Zstandard compression format.
Expand All @@ -28,6 +34,23 @@
*/
public class ZstdUtils {

private static final class InnerNotClosingOutputStream extends CountingOutputStream {

/**
* The close of this class wont close its delegated output stream.
*
* @param delegate the output stream to which the output will be delegated
*/
private InnerNotClosingOutputStream(OutputStream delegate) {
super(delegate);
}

@Override
public void close() throws IOException {
// Don't close the inner output stream.
}
}

enum CachedAvailability {
DONT_CACHE, CACHED_AVAILABLE, CACHED_UNAVAILABLE
}
Expand Down Expand Up @@ -134,4 +157,21 @@ public static void setCacheZstdAvailablity(final boolean doCache) {
/** Private constructor to prevent instantiation of this utility class. */
private ZstdUtils() {
}

/**
* Reads uncompressed data stream and writes it compressed to the output
*
* @param input the data stream with uncompressed data
* @param output the data stream for compressed output
* @return the compressed size
* @throws IOException throws the exception which could be got from from IOUtils.copyLarge()
* or ZstdCompressorOutputStream constructor
*/
public static long readAndCompressWrite(InputStream input, OutputStream output) throws IOException {
final InnerNotClosingOutputStream outStream = new InnerNotClosingOutputStream(output);
final ZstdCompressorOutputStream outputStream = new ZstdCompressorOutputStream(outStream, 3, true);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The value 3 looks like a magic number. A comment should state the reason to use 3 instead of another number.

IOUtils.copyLarge(input, outputStream);
outputStream.flush();
return outStream.getByteCount();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.archivers.zip;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;

import org.apache.commons.compress.AbstractTest;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdUtils;
import org.junit.jupiter.api.Test;

public class ZstdCompressorTest extends AbstractTest {

@Test
public void testZstdInputStream() throws IOException {
final Path file = getPath("COMPRESS-692/compress-962.zip");
try (ZipFile zip = ZipFile.builder().setFile(file.toFile()).get()) {
final ZipArchiveEntry entry = zip.getEntries().nextElement();
assertEquals("Unexpected first entry", "dolor.txt", entry.getName());
assertTrue("entry can't be read", zip.canReadEntryData(entry));
assertEquals("Unexpected method", ZipMethod.ZSTD.getCode(), entry.getMethod());

try (InputStream inputStream = zip.getInputStream(entry)) {
final long uncompSize = entry.getSize();
final byte[] buf = new byte[(int) uncompSize];
inputStream.read(buf);

final String uncompData = new String(buf);
assertTrue(uncompData.startsWith("dolor sit amet"));
assertTrue(uncompData.endsWith("ex ea commodo"));
assertEquals(6066, uncompData.length());
}

}
}

@Test
public void testZstdMethodInZipFile() throws IOException {
final String zipContentFile = "Name.txt";
final byte[] simpleText = "This is a Simple Test File.".getBytes();
final File file = Files.createTempFile("", ".zip").toFile();
// Create the Zip File
{
try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) {
final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile);
archiveEntry.setMethod(ZipMethod.ZSTD.getCode());
archiveEntry.setSize(simpleText.length);
zipOutputStream.putArchiveEntry(archiveEntry);
ZstdUtils.readAndCompressWrite(new ByteArrayInputStream(simpleText), zipOutputStream);
zipOutputStream.closeArchiveEntry();
}
}

// Read the Zip File
{
try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) {
// Find the entry
final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile);

// Check the Zstd compression method
assertEquals(entry.getMethod(), ZipMethod.ZSTD.getCode());
final InputStream inputStream = zipFile.getInputStream(entry);
assertTrue(inputStream instanceof ZstdCompressorInputStream);

final long dataOffset = entry.getDataOffset();
final int uncompressedSize = (int) entry.getSize();

assertEquals(simpleText.length, uncompressedSize);

final byte[] uncompressedData = new byte[uncompressedSize];
inputStream.read(uncompressedData, 0, uncompressedSize);

// Check the uncompressed data
assertEquals(new String(simpleText), new String(uncompressedData));

try (FileInputStream fileInputStream = new FileInputStream(file)) {
fileInputStream.skip(dataOffset);
final byte[] compressedData = new byte[4];
fileInputStream.read(compressedData);
assertTrue("Compressed data must begin with the magic bytes of Zstd", ZstdUtils.matches(compressedData, 4));
}
}
}
}

@Test
public void testZstdDeprecatedMethod() throws IOException {
final String zipContentFile = "Name.txt";
final byte[] simpleText = "This is a Simple Test File.".getBytes();
final File file = Files.createTempFile("", ".zip").toFile();
// Create the Zip File
{
try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) {
final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile);
archiveEntry.setMethod(ZipMethod.ZSTD_DEPRECATED.getCode());
archiveEntry.setSize(simpleText.length);
zipOutputStream.putArchiveEntry(archiveEntry);
ZstdUtils.readAndCompressWrite(new ByteArrayInputStream(simpleText), zipOutputStream);
zipOutputStream.closeArchiveEntry();
}
}

// Read the Zip File
{
try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) {
// Find the entry
final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile);

// Check the Zstd compression method
assertEquals(entry.getMethod(), ZipMethod.ZSTD_DEPRECATED.getCode());
final InputStream inputStream = zipFile.getInputStream(entry);

assertTrue("Input stream must be a ZstdInputStream", inputStream instanceof ZstdCompressorInputStream);
}
}
}
}
Binary file added src/test/resources/COMPRESS-692/compress-962.zip
Binary file not shown.