Skip to content

Commit

Permalink
[COMPRESS-692] Add support for zstd compression in zip archives
Browse files Browse the repository at this point in the history
This is a simplified version of PR #634
  • Loading branch information
garydgregory committed Feb 3, 2025
1 parent 4e532d2 commit bf885f6
Show file tree
Hide file tree
Showing 9 changed files with 199 additions and 14 deletions.
1 change: 1 addition & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ The <action> type attribute can be add,update,fix,remove.
<action type="add" dev="ggregory" due-to="Gary Gregory">Add PMD check to default Maven goal.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add SevenZFile.Builder.setMaxMemoryLimitKiB(int).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add MemoryLimitException.MemoryLimitException(long, int, Throwable) and deprecate MemoryLimitException.MemoryLimitException(long, int, Exception).</action>
<action type="add" issue="COMPRESS-692" dev="ggregory" due-to="Mehmet Karaman, Andrey Loskutov, Gary Gregory">Add support for zstd compression in zip archives.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump org.apache.commons:commons-parent from 72 to 79 #563, #567, #574, #582, #587, #595.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump com.github.luben:zstd-jni from 1.5.6-4 to 1.5.6-9 #565, #578, #601, #616, #630.</action>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import org.apache.commons.compress.utils.ArchiveUtils;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.compress.utils.InputStreamStatistics;
Expand Down Expand Up @@ -772,6 +773,10 @@ public ZipArchiveEntry getNextZipEntry() throws IOException {
case ENHANCED_DEFLATED:
current.inputStream = new Deflate64CompressorInputStream(bis);
break;
case ZSTD:
case ZSTD_DEPRECATED:
current.inputStream = new ZstdCompressorInputStream(bis);
break;
default:
// we should never get here as all supported methods have been covered
// will cause an error when read is invoked, don't throw an exception here so people can
Expand Down Expand Up @@ -924,15 +929,17 @@ public int read(final byte[] buffer, final int offset, final int length) throws
}

final int read;
if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) {
final int method = current.entry.getMethod();
if (method == ZipArchiveOutputStream.STORED) {
read = readStored(buffer, offset, length);
} else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) {
} else if (method == ZipArchiveOutputStream.DEFLATED) {
read = readDeflated(buffer, offset, length);
} else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() || current.entry.getMethod() == ZipMethod.IMPLODING.getCode()
|| current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) {
} else if (method == ZipMethod.UNSHRINKING.getCode() || method == ZipMethod.IMPLODING.getCode()
|| method == ZipMethod.ENHANCED_DEFLATED.getCode() || method == ZipMethod.BZIP2.getCode()
|| method == ZipMethod.ZSTD.getCode() || method == ZipMethod.ZSTD_DEPRECATED.getCode()) {
read = current.inputStream.read(buffer, offset, length);
} else {
throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), current.entry);
throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(method), current.entry);
}

if (read >= 0) {
Expand Down Expand Up @@ -1319,22 +1326,26 @@ private void skipRemainderOfArchive() throws IOException {
}

/**
* Whether the compressed size for the entry is either known or not required by the compression method being used.
* Tests whether the compressed size for the entry is either known or not required by the compression method being used.
*/
private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) {
return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN || entry.getMethod() == ZipEntry.DEFLATED
|| entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode()
|| entry.getGeneralPurposeBit().usesDataDescriptor() && allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED;
final int method = entry.getMethod();
return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN || method == ZipEntry.DEFLATED
|| method == ZipMethod.ENHANCED_DEFLATED.getCode()
|| entry.getGeneralPurposeBit().usesDataDescriptor() && allowStoredEntriesWithDataDescriptor && method == ZipEntry.STORED
|| method == ZipMethod.ZSTD.getCode() || method == ZipMethod.ZSTD_DEPRECATED.getCode();
}

/**
* Whether this entry requires a data descriptor this library can work with.
* Tests whether this entry requires a data descriptor this library can work with.
*
* @return true if allowStoredEntriesWithDataDescriptor is true, the entry doesn't require any data descriptor or the method is DEFLATED or
* ENHANCED_DEFLATED.
*/
private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) {
return !entry.getGeneralPurposeBit().usesDataDescriptor() || allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED
|| entry.getMethod() == ZipEntry.DEFLATED || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode();
final int method = entry.getMethod();
return !entry.getGeneralPurposeBit().usesDataDescriptor() || allowStoredEntriesWithDataDescriptor && method == ZipEntry.STORED
|| method == ZipEntry.DEFLATED || method == ZipMethod.ENHANCED_DEFLATED.getCode()
|| method == ZipMethod.ZSTD.getCode() || method == ZipMethod.ZSTD_DEPRECATED.getCode();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,9 @@ private byte[] createLocalFileHeader(final ZipArchiveEntry ze, final ByteBuffer
} else if (zipMethod == DEFLATED || out instanceof RandomAccessOutputStream) {
System.arraycopy(LZERO, 0, buf, LFH_COMPRESSED_SIZE_OFFSET, ZipConstants.WORD);
System.arraycopy(LZERO, 0, buf, LFH_ORIGINAL_SIZE_OFFSET, ZipConstants.WORD);
} else if (zipMethod == ZipMethod.ZSTD.getCode() || zipMethod == ZipMethod.ZSTD_DEPRECATED.getCode()) {
ZipLong.putLong(ze.getCompressedSize(), buf, LFH_COMPRESSED_SIZE_OFFSET);
ZipLong.putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET);
} else { // Stored
ZipLong.putLong(ze.getSize(), buf, LFH_COMPRESSED_SIZE_OFFSET);
ZipLong.putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET);
Expand Down Expand Up @@ -1075,6 +1078,9 @@ private boolean handleSizesAndCrc(final long bytesWritten, final long crc, final
entry.entry.setCompressedSize(bytesWritten);
entry.entry.setCrc(crc);

} else if (entry.entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode()) {
entry.entry.setCompressedSize(bytesWritten);
entry.entry.setCrc(crc);
} else if (!(out instanceof RandomAccessOutputStream)) {
if (entry.entry.getCrc() != crc) {
throw new ZipException("Bad CRC checksum for entry " + entry.entry.getName() + ": " + Long.toHexString(entry.entry.getCrc()) + " instead of "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import org.apache.commons.compress.archivers.EntryStreamOffsets;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import org.apache.commons.compress.utils.BoundedArchiveInputStream;
import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream;
import org.apache.commons.compress.utils.IOUtils;
Expand Down Expand Up @@ -1230,6 +1231,9 @@ public void close() throws IOException {
return new BZip2CompressorInputStream(is);
case ENHANCED_DEFLATED:
return new Deflate64CompressorInputStream(is);
case ZSTD:
case ZSTD_DEPRECATED:
return new ZstdCompressorInputStream(is);
case AES_ENCRYPTED:
case EXPANDING_LEVEL_1:
case EXPANDING_LEVEL_2:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,26 @@ public enum ZipMethod {
*/
LZMA(14),

/**
* Compression Method 20 for Zstd.
*
* @see <a href="https://github.com/facebook/zstd">https://github.com/facebook/zstd</a>
* @see <a href="https://pkwaredownloads.blob.core.windows.net/pkware-general/Documentation/APPNOTE-6.3.7.TXT">deprecated zstd compression method id</a>
* @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression method: (2 bytes)</a>
* @since 1.28.0
*/
ZSTD_DEPRECATED(20),

/**
* Compression Method 93 for Zstd.
*
* @see <a href="https://github.com/facebook/zstd">https://github.com/facebook/zstd</a>
* @see <a href="https://pkwaredownloads.blob.core.windows.net/pkware-general/Documentation/APPNOTE-6.3.8.TXT">changed zstd compression method id</a>
* @see <a href="https://www.pkware.com/documents/casestudies/APPNOTE.TXT">Explanation of fields: compression method: (2 bytes)</a>
* @since 1.28.0
*/
ZSTD(93),

/**
* Compression Method 95 for XZ.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,8 @@ private static boolean supportsEncryptionOf(final ZipArchiveEntry entry) {
private static boolean supportsMethodOf(final ZipArchiveEntry entry) {
return entry.getMethod() == ZipEntry.STORED || entry.getMethod() == ZipMethod.UNSHRINKING.getCode()
|| entry.getMethod() == ZipMethod.IMPLODING.getCode() || entry.getMethod() == ZipEntry.DEFLATED
|| entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || entry.getMethod() == ZipMethod.BZIP2.getCode();
|| entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || entry.getMethod() == ZipMethod.BZIP2.getCode()
|| entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode();
}

/**
Expand Down
7 changes: 6 additions & 1 deletion src/site/xdoc/zip.xml
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,12 @@
See the examples section for a code sample demonstrating how to make a zip file.
</p>
</subsection>

<subsection name="Zstandard Support" id="zstd">
<p>
Starting with Compress 1.28.0, <code>org.apache.commons.compress.archivers.zip</code> supports reading and writing using the Zstandard method.
Zstandard method <code>93</code> and the deprecated <code>20</code> are supported.
</p>
</subsection>
</section>
</body>
</document>
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.archivers.zip;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;

import org.apache.commons.compress.AbstractTest;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream;
import org.apache.commons.compress.compressors.zstandard.ZstdUtils;
import org.apache.commons.io.IOUtils;
import org.junit.jupiter.api.Test;

public class ZstdCompressorTest extends AbstractTest {

/**
* Reads uncompressed data stream and writes it compressed to the output
*
* @param input the data stream with uncompressed data
* @param output the data stream for compressed output
* @throws IOException throws the exception which could be got from from IOUtils.copyLarge() or ZstdCompressorOutputStream constructor
*/
private static void compress(final InputStream input, final OutputStream output) throws IOException {
final ZstdCompressorOutputStream outputStream = new ZstdCompressorOutputStream(output, 3, true);
IOUtils.copyLarge(input, outputStream);
outputStream.flush();
}

@Test
public void testZstdDeprecatedMethod() throws IOException {
final String zipContentFile = "Name.txt";
final byte[] simpleText = "This is a Simple Test File.".getBytes();
final File file = Files.createTempFile("", ".zip").toFile();
// Create the Zip File
try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) {
final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile);
archiveEntry.setMethod(ZipMethod.ZSTD_DEPRECATED.getCode());
archiveEntry.setSize(simpleText.length);
zipOutputStream.putArchiveEntry(archiveEntry);
ZstdCompressorTest.compress(new ByteArrayInputStream(simpleText), zipOutputStream);
zipOutputStream.closeArchiveEntry();
}
// Read the Zip File
try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) {
// Find the entry
final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile);
// Check the Zstd compression method
assertEquals(entry.getMethod(), ZipMethod.ZSTD_DEPRECATED.getCode());
final InputStream inputStream = zipFile.getInputStream(entry);
assertTrue("Input stream must be a ZstdInputStream", inputStream instanceof ZstdCompressorInputStream);
}
}

@Test
public void testZstdInputStream() throws IOException {
final Path file = getPath("COMPRESS-692/compress-962.zip");
try (ZipFile zip = ZipFile.builder().setFile(file.toFile()).get()) {
final ZipArchiveEntry entry = zip.getEntries().nextElement();
assertEquals("Unexpected first entry", "dolor.txt", entry.getName());
assertTrue("entry can't be read", zip.canReadEntryData(entry));
assertEquals("Unexpected method", ZipMethod.ZSTD.getCode(), entry.getMethod());
try (InputStream inputStream = zip.getInputStream(entry)) {
final long uncompSize = entry.getSize();
final byte[] buf = new byte[(int) uncompSize];
inputStream.read(buf);
final String uncompData = new String(buf);
assertTrue(uncompData.startsWith("dolor sit amet"));
assertTrue(uncompData.endsWith("ex ea commodo"));
assertEquals(6066, uncompData.length());
}
}
}

@Test
public void testZstdMethodInZipFile() throws IOException {
final String zipContentFile = "Name.txt";
final byte[] simpleText = "This is a Simple Test File.".getBytes();
final File file = Files.createTempFile("", ".zip").toFile();
// Create the Zip File
try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) {
final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile);
archiveEntry.setMethod(ZipMethod.ZSTD.getCode());
archiveEntry.setSize(simpleText.length);
zipOutputStream.putArchiveEntry(archiveEntry);
ZstdCompressorTest.compress(new ByteArrayInputStream(simpleText), zipOutputStream);
zipOutputStream.closeArchiveEntry();
}
// Read the Zip File
try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) {
// Find the entry
final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile);
// Check the Zstd compression method
assertEquals(entry.getMethod(), ZipMethod.ZSTD.getCode());
final InputStream inputStream = zipFile.getInputStream(entry);
assertTrue(inputStream instanceof ZstdCompressorInputStream);
final long dataOffset = entry.getDataOffset();
final int uncompressedSize = (int) entry.getSize();
assertEquals(simpleText.length, uncompressedSize);
final byte[] uncompressedData = new byte[uncompressedSize];
inputStream.read(uncompressedData, 0, uncompressedSize);
// Check the uncompressed data
assertEquals(new String(simpleText), new String(uncompressedData));
try (FileInputStream fileInputStream = new FileInputStream(file)) {
fileInputStream.skip(dataOffset);
final byte[] compressedData = new byte[4];
fileInputStream.read(compressedData);
assertTrue("Compressed data must begin with the magic bytes of Zstd", ZstdUtils.matches(compressedData, 4));
}
}
}
}
Binary file added src/test/resources/COMPRESS-692/compress-962.zip
Binary file not shown.

0 comments on commit bf885f6

Please sign in to comment.