diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 02eaad67e0b..825d67c53c8 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -84,6 +84,7 @@ The type attribute can be add,update,fix,remove. Add PMD check to default Maven goal. Add SevenZFile.Builder.setMaxMemoryLimitKiB(int). Add MemoryLimitException.MemoryLimitException(long, int, Throwable) and deprecate MemoryLimitException.MemoryLimitException(long, int, Exception). + Add support for zstd compression in zip archives. Bump org.apache.commons:commons-parent from 72 to 79 #563, #567, #574, #582, #587, #595. Bump com.github.luben:zstd-jni from 1.5.6-4 to 1.5.6-9 #565, #578, #601, #616, #630. diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java index a59b54cebd0..a50f31e5602 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java @@ -45,6 +45,7 @@ import org.apache.commons.compress.archivers.ArchiveInputStream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; import org.apache.commons.compress.utils.ArchiveUtils; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.compress.utils.InputStreamStatistics; @@ -772,6 +773,10 @@ public ZipArchiveEntry getNextZipEntry() throws IOException { case ENHANCED_DEFLATED: current.inputStream = new Deflate64CompressorInputStream(bis); break; + case ZSTD: + case ZSTD_DEPRECATED: + current.inputStream = new ZstdCompressorInputStream(bis); + break; default: // we should never get here as all supported methods have been covered // will cause an error when read is invoked, don't throw an exception here so people can @@ -924,15 +929,17 @@ public int read(final byte[] buffer, final int offset, final int length) throws } final int read; - if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { + final int method = current.entry.getMethod(); + if (method == ZipArchiveOutputStream.STORED) { read = readStored(buffer, offset, length); - } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { + } else if (method == ZipArchiveOutputStream.DEFLATED) { read = readDeflated(buffer, offset, length); - } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() - || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { + } else if (method == ZipMethod.UNSHRINKING.getCode() || method == ZipMethod.IMPLODING.getCode() + || method == ZipMethod.ENHANCED_DEFLATED.getCode() || method == ZipMethod.BZIP2.getCode() + || method == ZipMethod.ZSTD.getCode() || method == ZipMethod.ZSTD_DEPRECATED.getCode()) { read = current.inputStream.read(buffer, offset, length); } else { - throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), current.entry); + throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(method), current.entry); } if (read >= 0) { @@ -1319,22 +1326,26 @@ private void skipRemainderOfArchive() throws IOException { } /** - * Whether the compressed size for the entry is either known or not required by the compression method being used. + * Tests whether the compressed size for the entry is either known or not required by the compression method being used. */ private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { - return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN || entry.getMethod() == ZipEntry.DEFLATED - || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() - || entry.getGeneralPurposeBit().usesDataDescriptor() && allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED; + final int method = entry.getMethod(); + return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN || method == ZipEntry.DEFLATED + || method == ZipMethod.ENHANCED_DEFLATED.getCode() + || entry.getGeneralPurposeBit().usesDataDescriptor() && allowStoredEntriesWithDataDescriptor && method == ZipEntry.STORED + || method == ZipMethod.ZSTD.getCode() || method == ZipMethod.ZSTD_DEPRECATED.getCode(); } /** - * Whether this entry requires a data descriptor this library can work with. + * Tests whether this entry requires a data descriptor this library can work with. * * @return true if allowStoredEntriesWithDataDescriptor is true, the entry doesn't require any data descriptor or the method is DEFLATED or * ENHANCED_DEFLATED. */ private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { - return !entry.getGeneralPurposeBit().usesDataDescriptor() || allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED - || entry.getMethod() == ZipEntry.DEFLATED || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); + final int method = entry.getMethod(); + return !entry.getGeneralPurposeBit().usesDataDescriptor() || allowStoredEntriesWithDataDescriptor && method == ZipEntry.STORED + || method == ZipEntry.DEFLATED || method == ZipMethod.ENHANCED_DEFLATED.getCode() + || method == ZipMethod.ZSTD.getCode() || method == ZipMethod.ZSTD_DEPRECATED.getCode(); } } diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java index 6a6ab8e391a..d945f7442ec 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java @@ -870,6 +870,9 @@ private byte[] createLocalFileHeader(final ZipArchiveEntry ze, final ByteBuffer } else if (zipMethod == DEFLATED || out instanceof RandomAccessOutputStream) { System.arraycopy(LZERO, 0, buf, LFH_COMPRESSED_SIZE_OFFSET, ZipConstants.WORD); System.arraycopy(LZERO, 0, buf, LFH_ORIGINAL_SIZE_OFFSET, ZipConstants.WORD); + } else if (zipMethod == ZipMethod.ZSTD.getCode() || zipMethod == ZipMethod.ZSTD_DEPRECATED.getCode()) { + ZipLong.putLong(ze.getCompressedSize(), buf, LFH_COMPRESSED_SIZE_OFFSET); + ZipLong.putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET); } else { // Stored ZipLong.putLong(ze.getSize(), buf, LFH_COMPRESSED_SIZE_OFFSET); ZipLong.putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET); @@ -1075,6 +1078,9 @@ private boolean handleSizesAndCrc(final long bytesWritten, final long crc, final entry.entry.setCompressedSize(bytesWritten); entry.entry.setCrc(crc); + } else if (entry.entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode()) { + entry.entry.setCompressedSize(bytesWritten); + entry.entry.setCrc(crc); } else if (!(out instanceof RandomAccessOutputStream)) { if (entry.entry.getCrc() != crc) { throw new ZipException("Bad CRC checksum for entry " + entry.entry.getName() + ": " + Long.toHexString(entry.entry.getCrc()) + " instead of " diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java index b99a45e10b2..378e21f4696 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -54,6 +54,7 @@ import org.apache.commons.compress.archivers.EntryStreamOffsets; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; import org.apache.commons.compress.utils.BoundedArchiveInputStream; import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; import org.apache.commons.compress.utils.IOUtils; @@ -1230,6 +1231,9 @@ public void close() throws IOException { return new BZip2CompressorInputStream(is); case ENHANCED_DEFLATED: return new Deflate64CompressorInputStream(is); + case ZSTD: + case ZSTD_DEPRECATED: + return new ZstdCompressorInputStream(is); case AES_ENCRYPTED: case EXPANDING_LEVEL_1: case EXPANDING_LEVEL_2: diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java index 209b5ec3996..09f36ae39a8 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java @@ -124,6 +124,26 @@ public enum ZipMethod { */ LZMA(14), + /** + * Compression Method 20 for Zstd. + * + * @see https://github.com/facebook/zstd + * @see deprecated zstd compression method id + * @see Explanation of fields: compression method: (2 bytes) + * @since 1.28.0 + */ + ZSTD_DEPRECATED(20), + + /** + * Compression Method 93 for Zstd. + * + * @see https://github.com/facebook/zstd + * @see changed zstd compression method id + * @see Explanation of fields: compression method: (2 bytes) + * @since 1.28.0 + */ + ZSTD(93), + /** * Compression Method 95 for XZ. * diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java index 83a4b25c9bb..f2a6f59bdbb 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java @@ -333,7 +333,8 @@ private static boolean supportsEncryptionOf(final ZipArchiveEntry entry) { private static boolean supportsMethodOf(final ZipArchiveEntry entry) { return entry.getMethod() == ZipEntry.STORED || entry.getMethod() == ZipMethod.UNSHRINKING.getCode() || entry.getMethod() == ZipMethod.IMPLODING.getCode() || entry.getMethod() == ZipEntry.DEFLATED - || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || entry.getMethod() == ZipMethod.BZIP2.getCode(); + || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || entry.getMethod() == ZipMethod.BZIP2.getCode() + || entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode(); } /** diff --git a/src/site/xdoc/zip.xml b/src/site/xdoc/zip.xml index ab3f9fc1827..16ad8c59b77 100644 --- a/src/site/xdoc/zip.xml +++ b/src/site/xdoc/zip.xml @@ -634,7 +634,12 @@ See the examples section for a code sample demonstrating how to make a zip file.

- + +

+ Starting with Compress 1.28.0, org.apache.commons.compress.archivers.zip supports reading and writing using the Zstandard method. + Zstandard method 93 and the deprecated 20 are supported. +

+
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZstdCompressorTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZstdCompressorTest.java new file mode 100644 index 00000000000..403034f8bf8 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZstdCompressorTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.compress.AbstractTest; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdUtils; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +public class ZstdCompressorTest extends AbstractTest { + + /** + * Reads uncompressed data stream and writes it compressed to the output + * + * @param input the data stream with uncompressed data + * @param output the data stream for compressed output + * @throws IOException throws the exception which could be got from from IOUtils.copyLarge() or ZstdCompressorOutputStream constructor + */ + private static void compress(final InputStream input, final OutputStream output) throws IOException { + final ZstdCompressorOutputStream outputStream = new ZstdCompressorOutputStream(output, 3, true); + IOUtils.copyLarge(input, outputStream); + outputStream.flush(); + } + + @Test + public void testZstdDeprecatedMethod() throws IOException { + final String zipContentFile = "Name.txt"; + final byte[] simpleText = "This is a Simple Test File.".getBytes(); + final File file = Files.createTempFile("", ".zip").toFile(); + // Create the Zip File + try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) { + final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile); + archiveEntry.setMethod(ZipMethod.ZSTD_DEPRECATED.getCode()); + archiveEntry.setSize(simpleText.length); + zipOutputStream.putArchiveEntry(archiveEntry); + ZstdCompressorTest.compress(new ByteArrayInputStream(simpleText), zipOutputStream); + zipOutputStream.closeArchiveEntry(); + } + // Read the Zip File + try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) { + // Find the entry + final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile); + // Check the Zstd compression method + assertEquals(entry.getMethod(), ZipMethod.ZSTD_DEPRECATED.getCode()); + final InputStream inputStream = zipFile.getInputStream(entry); + assertTrue("Input stream must be a ZstdInputStream", inputStream instanceof ZstdCompressorInputStream); + } + } + + @Test + public void testZstdInputStream() throws IOException { + final Path file = getPath("COMPRESS-692/compress-962.zip"); + try (ZipFile zip = ZipFile.builder().setFile(file.toFile()).get()) { + final ZipArchiveEntry entry = zip.getEntries().nextElement(); + assertEquals("Unexpected first entry", "dolor.txt", entry.getName()); + assertTrue("entry can't be read", zip.canReadEntryData(entry)); + assertEquals("Unexpected method", ZipMethod.ZSTD.getCode(), entry.getMethod()); + try (InputStream inputStream = zip.getInputStream(entry)) { + final long uncompSize = entry.getSize(); + final byte[] buf = new byte[(int) uncompSize]; + inputStream.read(buf); + final String uncompData = new String(buf); + assertTrue(uncompData.startsWith("dolor sit amet")); + assertTrue(uncompData.endsWith("ex ea commodo")); + assertEquals(6066, uncompData.length()); + } + } + } + + @Test + public void testZstdMethodInZipFile() throws IOException { + final String zipContentFile = "Name.txt"; + final byte[] simpleText = "This is a Simple Test File.".getBytes(); + final File file = Files.createTempFile("", ".zip").toFile(); + // Create the Zip File + try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) { + final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile); + archiveEntry.setMethod(ZipMethod.ZSTD.getCode()); + archiveEntry.setSize(simpleText.length); + zipOutputStream.putArchiveEntry(archiveEntry); + ZstdCompressorTest.compress(new ByteArrayInputStream(simpleText), zipOutputStream); + zipOutputStream.closeArchiveEntry(); + } + // Read the Zip File + try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) { + // Find the entry + final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile); + // Check the Zstd compression method + assertEquals(entry.getMethod(), ZipMethod.ZSTD.getCode()); + final InputStream inputStream = zipFile.getInputStream(entry); + assertTrue(inputStream instanceof ZstdCompressorInputStream); + final long dataOffset = entry.getDataOffset(); + final int uncompressedSize = (int) entry.getSize(); + assertEquals(simpleText.length, uncompressedSize); + final byte[] uncompressedData = new byte[uncompressedSize]; + inputStream.read(uncompressedData, 0, uncompressedSize); + // Check the uncompressed data + assertEquals(new String(simpleText), new String(uncompressedData)); + try (FileInputStream fileInputStream = new FileInputStream(file)) { + fileInputStream.skip(dataOffset); + final byte[] compressedData = new byte[4]; + fileInputStream.read(compressedData); + assertTrue("Compressed data must begin with the magic bytes of Zstd", ZstdUtils.matches(compressedData, 4)); + } + } + } +} diff --git a/src/test/resources/COMPRESS-692/compress-962.zip b/src/test/resources/COMPRESS-692/compress-962.zip new file mode 100644 index 00000000000..3832b6eedf0 Binary files /dev/null and b/src/test/resources/COMPRESS-692/compress-962.zip differ