From bf885f62e2aaf4966b5055f9dda4773264e9ac89 Mon Sep 17 00:00:00 2001 From: "Gary D. Gregory" Date: Mon, 3 Feb 2025 09:47:02 -0500 Subject: [PATCH] [COMPRESS-692] Add support for zstd compression in zip archives This is a simplified version of PR #634 --- src/changes/changes.xml | 1 + .../archivers/zip/ZipArchiveInputStream.java | 35 +++-- .../archivers/zip/ZipArchiveOutputStream.java | 6 + .../compress/archivers/zip/ZipFile.java | 4 + .../compress/archivers/zip/ZipMethod.java | 20 +++ .../compress/archivers/zip/ZipUtil.java | 3 +- src/site/xdoc/zip.xml | 7 +- .../archivers/zip/ZstdCompressorTest.java | 137 ++++++++++++++++++ .../resources/COMPRESS-692/compress-962.zip | Bin 0 -> 3986 bytes 9 files changed, 199 insertions(+), 14 deletions(-) create mode 100644 src/test/java/org/apache/commons/compress/archivers/zip/ZstdCompressorTest.java create mode 100644 src/test/resources/COMPRESS-692/compress-962.zip diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 02eaad67e0b..825d67c53c8 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -84,6 +84,7 @@ The type attribute can be add,update,fix,remove. Add PMD check to default Maven goal. Add SevenZFile.Builder.setMaxMemoryLimitKiB(int). Add MemoryLimitException.MemoryLimitException(long, int, Throwable) and deprecate MemoryLimitException.MemoryLimitException(long, int, Exception). + Add support for zstd compression in zip archives. Bump org.apache.commons:commons-parent from 72 to 79 #563, #567, #574, #582, #587, #595. Bump com.github.luben:zstd-jni from 1.5.6-4 to 1.5.6-9 #565, #578, #601, #616, #630. diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java index a59b54cebd0..a50f31e5602 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveInputStream.java @@ -45,6 +45,7 @@ import org.apache.commons.compress.archivers.ArchiveInputStream; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; import org.apache.commons.compress.utils.ArchiveUtils; import org.apache.commons.compress.utils.IOUtils; import org.apache.commons.compress.utils.InputStreamStatistics; @@ -772,6 +773,10 @@ public ZipArchiveEntry getNextZipEntry() throws IOException { case ENHANCED_DEFLATED: current.inputStream = new Deflate64CompressorInputStream(bis); break; + case ZSTD: + case ZSTD_DEPRECATED: + current.inputStream = new ZstdCompressorInputStream(bis); + break; default: // we should never get here as all supported methods have been covered // will cause an error when read is invoked, don't throw an exception here so people can @@ -924,15 +929,17 @@ public int read(final byte[] buffer, final int offset, final int length) throws } final int read; - if (current.entry.getMethod() == ZipArchiveOutputStream.STORED) { + final int method = current.entry.getMethod(); + if (method == ZipArchiveOutputStream.STORED) { read = readStored(buffer, offset, length); - } else if (current.entry.getMethod() == ZipArchiveOutputStream.DEFLATED) { + } else if (method == ZipArchiveOutputStream.DEFLATED) { read = readDeflated(buffer, offset, length); - } else if (current.entry.getMethod() == ZipMethod.UNSHRINKING.getCode() || current.entry.getMethod() == ZipMethod.IMPLODING.getCode() - || current.entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || current.entry.getMethod() == ZipMethod.BZIP2.getCode()) { + } else if (method == ZipMethod.UNSHRINKING.getCode() || method == ZipMethod.IMPLODING.getCode() + || method == ZipMethod.ENHANCED_DEFLATED.getCode() || method == ZipMethod.BZIP2.getCode() + || method == ZipMethod.ZSTD.getCode() || method == ZipMethod.ZSTD_DEPRECATED.getCode()) { read = current.inputStream.read(buffer, offset, length); } else { - throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(current.entry.getMethod()), current.entry); + throw new UnsupportedZipFeatureException(ZipMethod.getMethodByCode(method), current.entry); } if (read >= 0) { @@ -1319,22 +1326,26 @@ private void skipRemainderOfArchive() throws IOException { } /** - * Whether the compressed size for the entry is either known or not required by the compression method being used. + * Tests whether the compressed size for the entry is either known or not required by the compression method being used. */ private boolean supportsCompressedSizeFor(final ZipArchiveEntry entry) { - return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN || entry.getMethod() == ZipEntry.DEFLATED - || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() - || entry.getGeneralPurposeBit().usesDataDescriptor() && allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED; + final int method = entry.getMethod(); + return entry.getCompressedSize() != ArchiveEntry.SIZE_UNKNOWN || method == ZipEntry.DEFLATED + || method == ZipMethod.ENHANCED_DEFLATED.getCode() + || entry.getGeneralPurposeBit().usesDataDescriptor() && allowStoredEntriesWithDataDescriptor && method == ZipEntry.STORED + || method == ZipMethod.ZSTD.getCode() || method == ZipMethod.ZSTD_DEPRECATED.getCode(); } /** - * Whether this entry requires a data descriptor this library can work with. + * Tests whether this entry requires a data descriptor this library can work with. * * @return true if allowStoredEntriesWithDataDescriptor is true, the entry doesn't require any data descriptor or the method is DEFLATED or * ENHANCED_DEFLATED. */ private boolean supportsDataDescriptorFor(final ZipArchiveEntry entry) { - return !entry.getGeneralPurposeBit().usesDataDescriptor() || allowStoredEntriesWithDataDescriptor && entry.getMethod() == ZipEntry.STORED - || entry.getMethod() == ZipEntry.DEFLATED || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode(); + final int method = entry.getMethod(); + return !entry.getGeneralPurposeBit().usesDataDescriptor() || allowStoredEntriesWithDataDescriptor && method == ZipEntry.STORED + || method == ZipEntry.DEFLATED || method == ZipMethod.ENHANCED_DEFLATED.getCode() + || method == ZipMethod.ZSTD.getCode() || method == ZipMethod.ZSTD_DEPRECATED.getCode(); } } diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java index 6a6ab8e391a..d945f7442ec 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipArchiveOutputStream.java @@ -870,6 +870,9 @@ private byte[] createLocalFileHeader(final ZipArchiveEntry ze, final ByteBuffer } else if (zipMethod == DEFLATED || out instanceof RandomAccessOutputStream) { System.arraycopy(LZERO, 0, buf, LFH_COMPRESSED_SIZE_OFFSET, ZipConstants.WORD); System.arraycopy(LZERO, 0, buf, LFH_ORIGINAL_SIZE_OFFSET, ZipConstants.WORD); + } else if (zipMethod == ZipMethod.ZSTD.getCode() || zipMethod == ZipMethod.ZSTD_DEPRECATED.getCode()) { + ZipLong.putLong(ze.getCompressedSize(), buf, LFH_COMPRESSED_SIZE_OFFSET); + ZipLong.putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET); } else { // Stored ZipLong.putLong(ze.getSize(), buf, LFH_COMPRESSED_SIZE_OFFSET); ZipLong.putLong(ze.getSize(), buf, LFH_ORIGINAL_SIZE_OFFSET); @@ -1075,6 +1078,9 @@ private boolean handleSizesAndCrc(final long bytesWritten, final long crc, final entry.entry.setCompressedSize(bytesWritten); entry.entry.setCrc(crc); + } else if (entry.entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode()) { + entry.entry.setCompressedSize(bytesWritten); + entry.entry.setCrc(crc); } else if (!(out instanceof RandomAccessOutputStream)) { if (entry.entry.getCrc() != crc) { throw new ZipException("Bad CRC checksum for entry " + entry.entry.getName() + ": " + Long.toHexString(entry.entry.getCrc()) + " instead of " diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java index b99a45e10b2..378e21f4696 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipFile.java @@ -54,6 +54,7 @@ import org.apache.commons.compress.archivers.EntryStreamOffsets; import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; import org.apache.commons.compress.compressors.deflate64.Deflate64CompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; import org.apache.commons.compress.utils.BoundedArchiveInputStream; import org.apache.commons.compress.utils.BoundedSeekableByteChannelInputStream; import org.apache.commons.compress.utils.IOUtils; @@ -1230,6 +1231,9 @@ public void close() throws IOException { return new BZip2CompressorInputStream(is); case ENHANCED_DEFLATED: return new Deflate64CompressorInputStream(is); + case ZSTD: + case ZSTD_DEPRECATED: + return new ZstdCompressorInputStream(is); case AES_ENCRYPTED: case EXPANDING_LEVEL_1: case EXPANDING_LEVEL_2: diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java index 209b5ec3996..09f36ae39a8 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipMethod.java @@ -124,6 +124,26 @@ public enum ZipMethod { */ LZMA(14), + /** + * Compression Method 20 for Zstd. + * + * @see https://github.com/facebook/zstd + * @see deprecated zstd compression method id + * @see Explanation of fields: compression method: (2 bytes) + * @since 1.28.0 + */ + ZSTD_DEPRECATED(20), + + /** + * Compression Method 93 for Zstd. + * + * @see https://github.com/facebook/zstd + * @see changed zstd compression method id + * @see Explanation of fields: compression method: (2 bytes) + * @since 1.28.0 + */ + ZSTD(93), + /** * Compression Method 95 for XZ. * diff --git a/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java b/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java index 83a4b25c9bb..f2a6f59bdbb 100644 --- a/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java +++ b/src/main/java/org/apache/commons/compress/archivers/zip/ZipUtil.java @@ -333,7 +333,8 @@ private static boolean supportsEncryptionOf(final ZipArchiveEntry entry) { private static boolean supportsMethodOf(final ZipArchiveEntry entry) { return entry.getMethod() == ZipEntry.STORED || entry.getMethod() == ZipMethod.UNSHRINKING.getCode() || entry.getMethod() == ZipMethod.IMPLODING.getCode() || entry.getMethod() == ZipEntry.DEFLATED - || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || entry.getMethod() == ZipMethod.BZIP2.getCode(); + || entry.getMethod() == ZipMethod.ENHANCED_DEFLATED.getCode() || entry.getMethod() == ZipMethod.BZIP2.getCode() + || entry.getMethod() == ZipMethod.ZSTD.getCode() || entry.getMethod() == ZipMethod.ZSTD_DEPRECATED.getCode(); } /** diff --git a/src/site/xdoc/zip.xml b/src/site/xdoc/zip.xml index ab3f9fc1827..16ad8c59b77 100644 --- a/src/site/xdoc/zip.xml +++ b/src/site/xdoc/zip.xml @@ -634,7 +634,12 @@ See the examples section for a code sample demonstrating how to make a zip file.

- + +

+ Starting with Compress 1.28.0, org.apache.commons.compress.archivers.zip supports reading and writing using the Zstandard method. + Zstandard method 93 and the deprecated 20 are supported. +

+
diff --git a/src/test/java/org/apache/commons/compress/archivers/zip/ZstdCompressorTest.java b/src/test/java/org/apache/commons/compress/archivers/zip/ZstdCompressorTest.java new file mode 100644 index 00000000000..403034f8bf8 --- /dev/null +++ b/src/test/java/org/apache/commons/compress/archivers/zip/ZstdCompressorTest.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.commons.compress.archivers.zip; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.apache.commons.compress.AbstractTest; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream; +import org.apache.commons.compress.compressors.zstandard.ZstdUtils; +import org.apache.commons.io.IOUtils; +import org.junit.jupiter.api.Test; + +public class ZstdCompressorTest extends AbstractTest { + + /** + * Reads uncompressed data stream and writes it compressed to the output + * + * @param input the data stream with uncompressed data + * @param output the data stream for compressed output + * @throws IOException throws the exception which could be got from from IOUtils.copyLarge() or ZstdCompressorOutputStream constructor + */ + private static void compress(final InputStream input, final OutputStream output) throws IOException { + final ZstdCompressorOutputStream outputStream = new ZstdCompressorOutputStream(output, 3, true); + IOUtils.copyLarge(input, outputStream); + outputStream.flush(); + } + + @Test + public void testZstdDeprecatedMethod() throws IOException { + final String zipContentFile = "Name.txt"; + final byte[] simpleText = "This is a Simple Test File.".getBytes(); + final File file = Files.createTempFile("", ".zip").toFile(); + // Create the Zip File + try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) { + final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile); + archiveEntry.setMethod(ZipMethod.ZSTD_DEPRECATED.getCode()); + archiveEntry.setSize(simpleText.length); + zipOutputStream.putArchiveEntry(archiveEntry); + ZstdCompressorTest.compress(new ByteArrayInputStream(simpleText), zipOutputStream); + zipOutputStream.closeArchiveEntry(); + } + // Read the Zip File + try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) { + // Find the entry + final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile); + // Check the Zstd compression method + assertEquals(entry.getMethod(), ZipMethod.ZSTD_DEPRECATED.getCode()); + final InputStream inputStream = zipFile.getInputStream(entry); + assertTrue("Input stream must be a ZstdInputStream", inputStream instanceof ZstdCompressorInputStream); + } + } + + @Test + public void testZstdInputStream() throws IOException { + final Path file = getPath("COMPRESS-692/compress-962.zip"); + try (ZipFile zip = ZipFile.builder().setFile(file.toFile()).get()) { + final ZipArchiveEntry entry = zip.getEntries().nextElement(); + assertEquals("Unexpected first entry", "dolor.txt", entry.getName()); + assertTrue("entry can't be read", zip.canReadEntryData(entry)); + assertEquals("Unexpected method", ZipMethod.ZSTD.getCode(), entry.getMethod()); + try (InputStream inputStream = zip.getInputStream(entry)) { + final long uncompSize = entry.getSize(); + final byte[] buf = new byte[(int) uncompSize]; + inputStream.read(buf); + final String uncompData = new String(buf); + assertTrue(uncompData.startsWith("dolor sit amet")); + assertTrue(uncompData.endsWith("ex ea commodo")); + assertEquals(6066, uncompData.length()); + } + } + } + + @Test + public void testZstdMethodInZipFile() throws IOException { + final String zipContentFile = "Name.txt"; + final byte[] simpleText = "This is a Simple Test File.".getBytes(); + final File file = Files.createTempFile("", ".zip").toFile(); + // Create the Zip File + try (ZipArchiveOutputStream zipOutputStream = new ZipArchiveOutputStream(file)) { + final ZipArchiveEntry archiveEntry = new ZipArchiveEntry(zipContentFile); + archiveEntry.setMethod(ZipMethod.ZSTD.getCode()); + archiveEntry.setSize(simpleText.length); + zipOutputStream.putArchiveEntry(archiveEntry); + ZstdCompressorTest.compress(new ByteArrayInputStream(simpleText), zipOutputStream); + zipOutputStream.closeArchiveEntry(); + } + // Read the Zip File + try (ZipFile zipFile = ZipFile.builder().setFile(file).get()) { + // Find the entry + final ZipArchiveEntry entry = zipFile.getEntry(zipContentFile); + // Check the Zstd compression method + assertEquals(entry.getMethod(), ZipMethod.ZSTD.getCode()); + final InputStream inputStream = zipFile.getInputStream(entry); + assertTrue(inputStream instanceof ZstdCompressorInputStream); + final long dataOffset = entry.getDataOffset(); + final int uncompressedSize = (int) entry.getSize(); + assertEquals(simpleText.length, uncompressedSize); + final byte[] uncompressedData = new byte[uncompressedSize]; + inputStream.read(uncompressedData, 0, uncompressedSize); + // Check the uncompressed data + assertEquals(new String(simpleText), new String(uncompressedData)); + try (FileInputStream fileInputStream = new FileInputStream(file)) { + fileInputStream.skip(dataOffset); + final byte[] compressedData = new byte[4]; + fileInputStream.read(compressedData); + assertTrue("Compressed data must begin with the magic bytes of Zstd", ZstdUtils.matches(compressedData, 4)); + } + } + } +} diff --git a/src/test/resources/COMPRESS-692/compress-962.zip b/src/test/resources/COMPRESS-692/compress-962.zip new file mode 100644 index 0000000000000000000000000000000000000000..3832b6eedf02385108839f357475671d14f27a78 GIT binary patch literal 3986 zcmdUyXH-*Z9*1v2K#C$mF+ylY2u(zK8Bux>fsi0Vlp+C)AtFNP;$Wd0484pXiIh+V z1ql`nv2{^k z^G#@S>6u8!h7O7>-euMe5CKHkf(u-k+0$AVChN0p(C$Jnn`k$lRnSn>2S~TnR?A8TRZ=>0UcB;iaIS=lNnTNE}&k3cV#izEu%R zHxt`a;iUr`J4lNRFo$(Jq1o98Tz59i#z5vKN?D`+6D{`Voh_Fk9wMk+rZb^0_EA-3=UO3B;$%!QKqpe`0)FQi*u~aF zHX4@xA*{W5Pf<>5;ntDg677lE@c9v0Lm|SSG(AM1XYMu_quIEYx*5=9x#|oYdwGu; zKzk_Gxs}p2ccZzj|G&g=TFzJtcN@+Av3TqnFRg{YEvJsoc zwtJ>asu@Rg@`MFr1l94P5-*c$%^ol{9laFWI&W5EpVWtQtSGS&IVADsw_Il`JZUDw zan^Q()sOFeVJrkd0VwXAtdh2JK~y4ah>L{s9e zR}r1cwWR^=UEn1XQkjm7KT4o0-%DcldfGJ+O*-SQj5kg~;>4llhED%|XAYJ0H0_SNu(M2t3iLpf4z0;;?!+zh0ar zqrLtLakrhYw%{6>0DuL_1Sk6X3kH4)K0(%4Tj35kfh+P9_wS7a*``Qa=sm();5t!T$TAeTagwOOtLFG;xd_{Lp(BMFqwz1duaD~OvY7|Ap+RP0+1FUu-b zB2-uV)kMsI$hlLjx7xd1bwr@|c=`1MbD9#L-=UmaQGq`|6&PJtfg*r@U`Yio4D4dy zEpWjOPi0Wsy=*jNP35W+7kzY&z2R8>@_ZP> zlv@mo_coK6zS#B7_%Uz0|+UXkh+jw+DWf{TU+T{@0d8G*nxFuT+U$#|;C0l(s zdv=?KrN*@7;-Fjk__7U<+3MG#2^r6PTjs0S{XQ7MqF$#c3pb$yRy~adKW1~=JYL6M zHG*$SV6A~O}blfK8vI0F(saW*~CftT^aV~(>qRA}zx<@6r^`Baf= zEmRbn=WE7P+r@)9PRpfmD0)Zyk0zG7T0+ov5a@p3-#t?)tQN5R>UAAiL15(;1lSI! zAl_$oAA1B@U9*1K82zyfZ;Ymc|BQ7t-WYw=DwFcL=#M34W3(kN0IVy`#%RLtUNL_i z{i!Lm7A*pP%O{&wX41w~+=yS*U#EU*a;>E<{iYE9mDGQpV;lFl4ntIYZjX;UXX73f gf-8G`@;H-TBk7-x=jB^ETR!lu3LY3{smFkS003?;CIA2c literal 0 HcmV?d00001