Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add optional FHCRC to GZIP header #627

Merged
merged 3 commits into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ private boolean init(final boolean isFirstMember) throws IOException {
// doesn't support this field, but zlib seems to be able to at least
// skip over it.
if ((flg & FHCRC) != 0) {
parameters.setHeaderCRC(true);
inData.readShort();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ public class GzipCompressorOutputStream extends CompressorOutputStream<OutputStr
/** Header flag indicating a comment follows the header */
private static final int FCOMMENT = 1 << 4;

/** Header flag indicating a header CRC follows the header */
private static final int FHCRC = 1 << 1;

/** Deflater used to compress the data */
private final Deflater deflater;

Expand Down Expand Up @@ -161,8 +164,11 @@ public void write(final int b) throws IOException {
*/
private void writeC(final String value, final Charset charset) throws IOException {
if (value != null) {
out.write(value.getBytes(charset));
final byte[] ba = value.getBytes(charset);
out.write(ba);
out.write(0);
crc.update(ba);
crc.update(0);
}
}

Expand All @@ -174,7 +180,11 @@ private void writeHeader(final GzipParameters parameters) throws IOException {
buffer.order(ByteOrder.LITTLE_ENDIAN);
buffer.putShort((short) GZIPInputStream.GZIP_MAGIC);
buffer.put((byte) Deflater.DEFLATED); // compression method (8: deflate)
buffer.put((byte) ((extra != null ? FEXTRA : 0) | (fileName != null ? FNAME : 0) | (comment != null ? FCOMMENT : 0))); // flags
buffer.put((byte) ((extra != null ? FEXTRA : 0)
| (fileName != null ? FNAME : 0)
| (comment != null ? FCOMMENT : 0)
| (parameters.hasHeaderCRC() ? FHCRC : 0)
)); // flags
buffer.putInt((int) parameters.getModificationInstant().getEpochSecond());
// extra flags
final int compressionLevel = parameters.getCompressionLevel();
Expand All @@ -187,13 +197,23 @@ private void writeHeader(final GzipParameters parameters) throws IOException {
}
buffer.put((byte) parameters.getOperatingSystem());
out.write(buffer.array());
crc.update(buffer.array());
if (extra != null) {
out.write(extra.length & 0xff); // little endian
out.write(extra.length >>> 8 & 0xff);
out.write(extra);
crc.update(extra.length & 0xff);
crc.update(extra.length >>> 8 & 0xff);
crc.update(extra);
}
writeC(fileName, parameters.getFileNameCharset());
writeC(comment, parameters.getFileNameCharset());
if (parameters.hasHeaderCRC()) {
final int v = (int) crc.getValue() & 0xffff;
out.write(v & 0xff);
out.write((v >>> 8) & 0xff);
}
crc.reset();
}

private void writeTrailer() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ public int type() {
private OS operatingSystem = OS.UNKNOWN; // Unknown OS by default
private int bufferSize = 512;
private int deflateStrategy = Deflater.DEFAULT_STRATEGY;
private boolean headerCRC;

/**
* Gets size of the buffer used to retrieve compressed data.
Expand Down Expand Up @@ -432,6 +433,16 @@ public OS getOS() {
return operatingSystem;
}

/**
* Returns if the header CRC is to be added (when writing) or was present (when reading).
*
* @return true is header CRC will be added (on write) or was found (after read).
* @since 1.28.0
*/
public boolean hasHeaderCRC() {
return headerCRC;
}

private String requireNonNulByte(final String text) {
if (StringUtils.isNotEmpty(text) && ArrayUtils.contains(text.getBytes(fileNameCharset), (byte) 0)) {
throw new IllegalArgumentException("String encoded in Charset '" + fileNameCharset + "' contains the nul byte 0 which is not supported in gzip.");
Expand Down Expand Up @@ -489,6 +500,7 @@ public void setDeflateStrategy(final int deflateStrategy) {
this.deflateStrategy = deflateStrategy;
}


/**
* Sets the extra subfields. Note that a non-null extra will appear in the gzip header regardless of the presence of subfields, while a null extra will not
* appear at all.
Expand Down Expand Up @@ -539,6 +551,17 @@ public void setFileNameCharset(final Charset charset) {
this.fileNameCharset = Charsets.toCharset(charset, GzipUtils.GZIP_ENCODING);
}

/**
* Establishes the presence of the header flag FLG.FHCRC and its headers CRC16 value.
*
* @param headerCRC when true, the header CRC16 (actually low 16 buts of a CRC32) is calculated and inserted
* in the gzip header on write; on read it means the field was present.
* @since 1.28.0
*/
public void setHeaderCRC(boolean headerCRC) {
this.headerCRC = headerCRC;
}

/**
* Sets the modification time (MTIME) of the compressed file.
*
Expand Down Expand Up @@ -599,6 +622,7 @@ public void setOS(final OS os) {
this.operatingSystem = os != null ? os : OS.UNKNOWN;
}


@Override
public String toString() {
final StringBuilder builder = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.apache.commons.compress.compressors.gzip;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
Expand All @@ -28,6 +29,8 @@
import static org.junit.jupiter.api.Assertions.fail;
import static org.junit.jupiter.api.Assumptions.assumeTrue;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
Expand All @@ -36,8 +39,13 @@
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipException;

import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.compress.compressors.gzip.ExtraField.SubField;
import org.apache.commons.compress.compressors.gzip.GzipParameters.OS;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
Expand Down Expand Up @@ -214,4 +222,66 @@ public void testFileNameChinesePercentEncoded() throws IOException {
testFileName("??????.xml", EXPECTED_FILE_NAME);
}


/**
* Tests the gzip header CRC.
*
* @throws IOException When the test has issues with the underlying file system or unexpected gzip operations.
*/
@Test
public void testHcrc() throws IOException, DecoderException {
final GzipParameters parameters = new GzipParameters();
parameters.setHeaderCRC(true);
parameters.setModificationTime(0x66554433); // avoid changing time
parameters.setFileName("AAAA");
parameters.setComment("ZZZZ");
parameters.setOS(OS.UNIX);
final ExtraField extra = new ExtraField();
extra.addSubField("BB", "CCCC".getBytes(StandardCharsets.ISO_8859_1));
parameters.setExtraField(extra);
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (GzipCompressorOutputStream gos = new GzipCompressorOutputStream(baos, parameters)) {
// nothing to write for this test.
}
final byte[] result = baos.toByteArray();
final byte[] expected = Hex.decodeHex("1f8b" // id1 id2
+ "08" // cm
+ "1e" // flg(FEXTRA|FNAME|FCOMMENT|FHCRC)
+ "33445566" // mtime little endian
+ "00" + "03" // xfl os
+ "0800" + "4242" + "0400" + "43434343" //xlen sfid sflen "CCCC"
+ "4141414100" // "AAAA" with \0
+ "5a5a5a5a00" // "ZZZZ" with \0
+ "d842" //crc32 = 839242d8
+ "0300" // empty deflate stream
+ "00000000" // crs32
+ "00000000" // isize
);
assertArrayEquals(expected, result);
assertDoesNotThrow(() -> {
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(result))) {
// if it does not fail, the hcrc is good.
}
});
try (GzipCompressorInputStream gis = new GzipCompressorInputStream(new ByteArrayInputStream(result))) {
final GzipParameters metaData = gis.getMetaData();
assertTrue(metaData.hasHeaderCRC());
assertEquals(0x66554433, metaData.getModificationTime());
assertEquals(1, metaData.getExtraField().size());
final SubField sf = metaData.getExtraField().iterator().next();
assertEquals("BB", sf.getId());
assertEquals("CCCC", new String(sf.getPayload(), StandardCharsets.ISO_8859_1));
assertEquals("AAAA", metaData.getFileName());
assertEquals("ZZZZ", metaData.getComment());
assertEquals(OS.UNIX, metaData.getOS());
}
// verify that the constructor normally fails on bad HCRC
assertThrows(ZipException.class, () -> {
result[30] = 0x77; //corrupt the low byte of header CRC
try (GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(result))) {
// if it does not fail, the hcrc is good.
}
}, "Header CRC verification is no longer feasible with JDK classes. The earlier assertion would have passed despite a bad header CRC.");
}

}
Loading