Skip to content

Commit

Permalink
Use a larger buffer size for java.util.zip.*Stream classes
Browse files Browse the repository at this point in the history
`DeflaterInputStream`, `GZIPInputStream`, `GZIPOutputStream`, and `InflaterInputStream`, all use an internal byte buffer of 512 bytes by default.

Whenever the wrapped stream exceeds this size, a full copy to a new buffer will occur, which will increase at increments of the same size. For example, a stream of length 2K will be copied four times. Increasing the size of the buffer we use can result in significant reductions in CPU usage (read: copies).

Examples in the repository
--------------------------

There are already two places where we increase the default size of these buffers:

- `//src/main/java/com/google/devtools/build/lib/bazel/repository/TarGzFunction.java`
- `//src/main/java/com/google/devtools/build/lib/bazel/repository/downloader/HttpStream.java`

Prior art
---------

There is an open enhancement issue in the OpenJDK tracker on this which contains a benchmark for `InflaterOutputStream`:

> Increase the default, internal buffer size of the Streams in `java.util.zip`
> https://bugs.openjdk.org/browse/JDK-8242864

A similar change was merged in for JDK15+ in 2020:

> Improve performance of `InflaterOutputStream.write()`
> https://bugs.openjdk.org/browse/JDK-8242848

Providing a simple benchmark
----------------------------

I'm inlining a simple `jmh` benchmark and the results underneath it for one `GzipInputStream` case.

The benchmark:

```
@fork(1)
@threads(1)
@WarmUp(iterations = 2)
@State(Scope.Benchmark)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
public class GZIPInputStreamBenchmark {
    @param({"1024", "3072", "9216"})
    long inputLength;
    @param({"512", "1024", "4096", "8192"})
    int bufferSize;
    private byte[] content;

    @setup(Level.Iteration)
    public void setup() throws IOException {
        var baos = new ByteArrayOutputStream();
        // No need to set the buffer size on this as it's a one-time cost for setup and not counted in the result.
        var gzip = new GZIPOutputStream(baos);

        var inputBytes = generateRandomByteArrayOfLength(inputLength);
        gzip.write(inputBytes);
        gzip.finish();

        this.content = baos.toByteArray();
    }

    @benchmark
    @BenchmarkMode(Mode.AverageTime)
    public void getGzipInputStream(Blackhole bh) throws IOException {
        try (var is = new ByteArrayInputStream(this.content);
             var gzip = new GZIPInputStream(is, bufferSize)) {
            bh.consume(gzip.readAllBytes());
        }
    }

    byte[] generateRandomByteArrayOfLength(long length) {
        var random = new Random();
        var intStream = random.ints(0, 5000).limit(length).boxed();

        return intStream.collect(
                ByteArrayOutputStream::new,
                (baos, i) -> baos.write(i.intValue()),
                (baos1, baos2) -> baos1.write(baos2.toByteArray(), 0, baos2.size())
        ).toByteArray();
    }
}
```

The results:

```
Benchmark                                    (bufferSize)  (inputLength)  Mode  Cnt      Score    Error  Units
GZIPInputStreamBenchmark.getGzipInputStream           512           1024  avgt    5   3207.217 ± 24.919  ns/op
GZIPInputStreamBenchmark.getGzipInputStream           512           3072  avgt    5   5874.191 ±  5.827  ns/op
GZIPInputStreamBenchmark.getGzipInputStream           512           9216  avgt    5  15567.345 ± 93.281  ns/op
GZIPInputStreamBenchmark.getGzipInputStream          1024           1024  avgt    5   2580.566 ± 14.566  ns/op
GZIPInputStreamBenchmark.getGzipInputStream          1024           3072  avgt    5   4154.582 ± 16.016  ns/op
GZIPInputStreamBenchmark.getGzipInputStream          1024           9216  avgt    5   9942.521 ± 61.215  ns/op
GZIPInputStreamBenchmark.getGzipInputStream          4096           1024  avgt    5   2150.255 ± 52.770  ns/op
GZIPInputStreamBenchmark.getGzipInputStream          4096           3072  avgt    5   2289.185 ± 71.396  ns/op
GZIPInputStreamBenchmark.getGzipInputStream          4096           9216  avgt    5   5656.891 ± 28.499  ns/op
GZIPInputStreamBenchmark.getGzipInputStream          8192           1024  avgt    5   2177.427 ± 30.896  ns/op
GZIPInputStreamBenchmark.getGzipInputStream          8192           3072  avgt    5   2517.390 ± 21.296  ns/op
GZIPInputStreamBenchmark.getGzipInputStream          8192           9216  avgt    5   5227.932 ± 55.525  ns/op
```

Co-authored-by: Kushal Pisavadia <[email protected]>

Closes #20316.

PiperOrigin-RevId: 588444920
Change-Id: I1fb47f0b08dcb8d72f3e2c43534c33d60efb87f2
  • Loading branch information
EdSchouten authored and copybara-github committed Dec 6, 2023
1 parent 020b85e commit 75a6693
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
* <a href="http://www.pkware.com/documents/casestudies/APPNOTE.TXT">ZIP format</a>
*/
public class ZipCombiner implements AutoCloseable {
private static final int INFLATER_BUFFER_BYTES = 8192;
public static final Date DOS_EPOCH = new Date(ZipUtil.DOS_EPOCH);
/**
* Whether to compress or decompress entries.
Expand Down Expand Up @@ -440,7 +441,7 @@ public void addZip(File zipFile) throws IOException {
entries.put(filename, null);
InputStream in = zip.getRawInputStream(entry);
if (entry.getMethod() == Compression.DEFLATED) {
in = new InflaterInputStream(in, getInflater());
in = new InflaterInputStream(in, getInflater(), INFLATER_BUFFER_BYTES);
}
action.getStrategy().merge(in, action.getMergeBuffer());
break;
Expand Down Expand Up @@ -492,7 +493,9 @@ private void writeEntryFromBuffer(ZipFileEntry entry, byte[] uncompressed) throw
writeEntry(entry, new ByteArrayInputStream(uncompressed));
} else {
ByteArrayOutputStream compressed = new ByteArrayOutputStream();
copyStream(new DeflaterInputStream(new ByteArrayInputStream(uncompressed), getDeflater()),
copyStream(
new DeflaterInputStream(
new ByteArrayInputStream(uncompressed), getDeflater(), INFLATER_BUFFER_BYTES),
compressed);
entry.setMethod(Compression.DEFLATED);
entry.setCompressedSize(compressed.size());
Expand Down Expand Up @@ -529,14 +532,19 @@ private void writeEntry(ZipReader zip, ZipFileEntry entry, EntryAction action)
// from the raw file data and deflate to a temporary byte array to determine the deflated
// size. Then use this byte array as the input stream for writing the entry.
ByteArrayOutputStream tmp = new ByteArrayOutputStream();
copyStream(new DeflaterInputStream(zip.getRawInputStream(entry), getDeflater()), tmp);
copyStream(
new DeflaterInputStream(
zip.getRawInputStream(entry), getDeflater(), INFLATER_BUFFER_BYTES),
tmp);
data = new ByteArrayInputStream(tmp.toByteArray());
outEntry.setMethod(Compression.DEFLATED);
outEntry.setCompressedSize(tmp.size());
} else if (mode == OutputMode.FORCE_STORED && entry.getMethod() != Compression.STORED) {
// The output mode is stored, but the entry compression is not; create an inflater stream
// from the raw file data.
data = new InflaterInputStream(zip.getRawInputStream(entry), getInflater());
// from the raw file data.
data =
new InflaterInputStream(
zip.getRawInputStream(entry), getInflater(), INFLATER_BUFFER_BYTES);
outEntry.setMethod(Compression.STORED);
outEntry.setCompressedSize(entry.getSize());
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
package com.google.devtools.build.zip;

import com.google.devtools.build.zip.ZipFileEntry.Compression;

import java.io.IOException;
import java.io.InputStream;
import java.util.zip.Inflater;
Expand All @@ -24,6 +23,7 @@

/** An input stream for reading the file data of a ZIP file entry. */
class ZipEntryInputStream extends InputStream {
private static final int INFLATER_BUFFER_BYTES = 8192;
private InputStream stream;
private long rem;

Expand Down Expand Up @@ -61,7 +61,7 @@ class ZipEntryInputStream extends InputStream {
rem = zipEntry.getSize();
}
if (!raw && zipEntry.getMethod() == Compression.DEFLATED) {
stream = new InflaterInputStream(stream, new Inflater(true));
stream = new InflaterInputStream(stream, new Inflater(true), INFLATER_BUFFER_BYTES);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ protected void computeKey(

private static final class CompressedFileWriteAction extends FileWriteAction {
private static final String GUID = "5bfba914-2251-11ee-be56-0242ac120002";
private static final int GZIP_BYTES_BUFFER = 8192;

private final byte[] compressedBytes;
private final int uncompressedSize;
Expand All @@ -252,7 +253,7 @@ private static final class CompressedFileWriteAction extends FileWriteAction {
// Presize on the small end to avoid over-allocating memory.
ByteArrayOutputStream byteStream = new ByteArrayOutputStream(dataToCompress.length / 100);

try (GZIPOutputStream zipStream = new GZIPOutputStream(byteStream)) {
try (GZIPOutputStream zipStream = new GZIPOutputStream(byteStream, GZIP_BYTES_BUFFER)) {
zipStream.write(dataToCompress);
} catch (IOException e) {
// This should be impossible since we're writing to a byte array.
Expand All @@ -268,7 +269,7 @@ private static final class CompressedFileWriteAction extends FileWriteAction {
public String getFileContents() {
byte[] uncompressedBytes = new byte[uncompressedSize];
try (GZIPInputStream zipStream =
new GZIPInputStream(new ByteArrayInputStream(compressedBytes))) {
new GZIPInputStream(new ByteArrayInputStream(compressedBytes), GZIP_BYTES_BUFFER)) {
int read;
int totalRead = 0;
while (totalRead < uncompressedSize
Expand All @@ -293,7 +294,7 @@ public String getFileContents() {
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> {
try (GZIPInputStream gzipIn =
new GZIPInputStream(new ByteArrayInputStream(compressedBytes))) {
new GZIPInputStream(new ByteArrayInputStream(compressedBytes), GZIP_BYTES_BUFFER)) {
ByteStreams.copy(gzipIn, out);
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class GenQueryOutputStream extends OutputStream {
*/
private static final int COMPRESSION_THRESHOLD = 1 << 20;

private static final int GZIP_BYTES_BUFFER = 8192;

/**
* Encapsulates the output of a {@link GenQuery}'s query. CPU and memory overhead of individual
* methods depends on the underlying content and settings.
Expand Down Expand Up @@ -83,7 +85,7 @@ interface GenQueryResult {
GenQueryOutputStream(boolean compressedOutputRequested) throws IOException {
this.compressedOutputRequested = compressedOutputRequested;
if (compressedOutputRequested) {
this.out = new GZIPOutputStream(bytesOut);
this.out = new GZIPOutputStream(bytesOut, GZIP_BYTES_BUFFER);
this.outputWasCompressed = true;
} else {
this.out = bytesOut;
Expand Down Expand Up @@ -138,7 +140,7 @@ private void maybeStartCompression(int additionalBytes) throws IOException {
}

ByteString.Output compressedBytesOut = ByteString.newOutput();
GZIPOutputStream gzipOut = new GZIPOutputStream(compressedBytesOut);
GZIPOutputStream gzipOut = new GZIPOutputStream(compressedBytesOut, GZIP_BYTES_BUFFER);
bytesOut.writeTo(gzipOut);
bytesOut = compressedBytesOut;
out = gzipOut;
Expand Down

0 comments on commit 75a6693

Please sign in to comment.