Skip to content

Commit

Permalink
Merge branch 'master' of github.com:lemire/JavaFastPFOR
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Jun 19, 2024
2 parents 82a4d19 + d75c85f commit 182c6d2
Show file tree
Hide file tree
Showing 26 changed files with 616 additions and 292 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/basic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ jobs:
strategy:
fail-fast: false
matrix:
java: [ 11, 16 ]
java: [ 17, 21 ]
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4.1.1
- name: Set up JDK
uses: actions/setup-java@v2.5.0
uses: actions/setup-java@v4.1.0
with:
java-version: ${{ matrix.java }}
distribution: 'adopt'
Expand All @@ -21,4 +21,4 @@ jobs:
- name: Build example
run: javac -cp target/classes/:. example.java
- name: Run example
run: java -cp target/classes/:. example
run: java -cp target/classes/:. example
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
JavaFastPFOR: A simple integer compression library in Java
==========================================================
[![][maven img]][maven] [![][license img]][license] [![docs-badge][]][docs]
[![Code Quality: Cpp](https://img.shields.io/lgtm/grade/java/g/lemire/JavaFastPFOR.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/lemire/JavaFastPFOR/context:java)
[![Java CI](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml/badge.svg)](https://github.com/lemire/JavaFastPFOR/actions/workflows/basic.yml)


Expand Down Expand Up @@ -82,9 +81,9 @@ the following code in your pom.xml file:
```xml
<dependencies>
<dependency>
<groupId>me.lemire.integercompression</groupId>
<artifactId>JavaFastPFOR</artifactId>
<version>[0.1,)</version>
<groupId>me.lemire.integercompression</groupId>
<artifactId>JavaFastPFOR</artifactId>
<version>[0.2,)</version>
</dependency>
</dependencies>
```
Expand Down Expand Up @@ -222,6 +221,7 @@ He also posted his slides online: http://www.slideshare.net/ikhtearSharif/ikhtea
Other recommended libraries
-----------------------------

* Fast integer compression in Go: https://github.com/ronanh/intcomp
* Encoding: Integer Compression Libraries for Go https://github.com/zhenjl/encoding
* CSharpFastPFOR: A C# integer compression library https://github.com/Genbox/CSharpFastPFOR
* TurboPFor is a C library that offers lots of interesting optimizations and Java wrappers. Well worth checking! (Uses a GPL license.) https://github.com/powturbo/TurboPFor
Expand Down
10 changes: 5 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>me.lemire.integercompression</groupId>
<artifactId>JavaFastPFOR</artifactId>
<version>0.1.13-SNAPSHOT</version>
<version>0.2.2-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
Expand Down Expand Up @@ -69,10 +69,10 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
<version>3.12.1</version>
<configuration>
<source>11</source>
<target>11</target>
<source>17</source>
<target>17</target>
</configuration>
<executions>
<execution>
Expand Down Expand Up @@ -138,7 +138,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.4.1</version>
<version>3.6.3</version>
<configuration>
<excludePackageNames>me.lemire.integercompression.vector;com.kamikaze.pfordelta:me.lemire.integercompression.benchmarktools</excludePackageNames>
</configuration>
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/me/lemire/integercompression/BitPacking.java
Original file line number Diff line number Diff line change
Expand Up @@ -1690,7 +1690,7 @@ protected static void fastpack9(final int[] in, int inpos,
}

/**
* Unpack 32 integers
* Pack without mask 32 integers
*
* @param in
* source array
Expand Down Expand Up @@ -3005,7 +3005,7 @@ protected static void fastpackwithoutmask9(final int[] in, int inpos,
}

/**
* Pack the 32 integers
* Unpack the 32 integers
*
* @param in
* source array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ public void uncompress(int[] inBuf, IntWrapper inPos, int inLen,
* In case you need a different way to allocate buffers, you can override this method
* with a custom behavior. The default implementation allocates a new Java direct
* {@link ByteBuffer} on each invocation.
*
* @param sizeInBytes
* @return
*/
protected ByteBuffer makeBuffer(int sizeInBytes) {
return ByteBuffer.allocateDirect(sizeInBytes);
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/me/lemire/integercompression/FastPFOR.java
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,9 @@ public String toString() {
* In case you need a different way to allocate buffers, you can override this method
* with a custom behavior. The default implementation allocates a new Java direct
* {@link ByteBuffer} on each invocation.
*
* @param sizeInBytes
* @return
*/
protected ByteBuffer makeBuffer(int sizeInBytes) {
return ByteBuffer.allocateDirect(sizeInBytes);
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/me/lemire/integercompression/FastPFOR128.java
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ public String toString() {
* In case you need a different way to allocate buffers, you can override this method
* with a custom behavior. The default implementation allocates a new Java direct
* {@link ByteBuffer} on each invocation.
*
* @param sizeInBytes
* @return
*/
protected ByteBuffer makeBuffer(int sizeInBytes) {
return ByteBuffer.allocateDirect(sizeInBytes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ public IntCompressor() {
* @throws UncompressibleInputException if the data is too poorly compressible
*/
public int[] compress(int[] input) {
int [] compressed = new int[input.length + input.length / 100 + 1024];
int[] compressed = new int[input.length + input.length / 100 + 1024];
// Store at index=0 the length of the input, hence enabling .headlessCompress
compressed[0] = input.length;
IntWrapper outpos = new IntWrapper(1);
try {
Expand All @@ -58,6 +59,7 @@ public int[] compress(int[] input) {
* @return uncompressed array
*/
public int[] uncompress(int[] compressed) {
// Read at index=0 the length of the input, hence enabling .headlessUncompress
int[] decompressed = new int[compressed[0]];
IntWrapper inpos = new IntWrapper(1);
codec.headlessUncompress(compressed, inpos,
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/me/lemire/integercompression/IntegerCODEC.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public interface IntegerCODEC {
* @param in
* input array
* @param inpos
* location in the input array
* where to start reading in the array
* @param inlength
* how many integers to compress
* @param out
Expand All @@ -52,7 +52,7 @@ public void compress(int[] in, IntWrapper inpos, int inlength,
* @param out
* array where to write the compressed output
* @param outpos
* where to write the compressed output in out
* where to start writing the uncompressed output in out
*/
public void uncompress(int[] in, IntWrapper inpos, int inlength,
int[] out, IntWrapper outpos);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
* variation on the IntegerCODEC interface meant to be used for random access
* (i.e., given a large array, you can segment it and decode just the subarray you need).
*
* The main difference is that we must specify the number of integers we wish to
* decode. This information should be stored elsewhere.
* The main difference is that you must specify the number of integers you wish to
* uncompress. This information should be stored elsewhere.
*
* This interface was designed by the Terrier team for their search engine.
*
Expand All @@ -30,10 +30,13 @@ public interface SkippableIntegerCODEC {
* inpos will be incremented by 12 while outpos will be incremented by 3. We
* use IntWrapper to pass the values by reference.
*
* Implementation note: contrary to {@link IntegerCODEC#compress},
* this may skip writing information about the number of encoded integers.
*
* @param in
* input array
* @param inpos
* location in the input array
* where to start reading in the array
* @param inlength
* how many integers to compress
* @param out
Expand All @@ -57,11 +60,11 @@ public void headlessCompress(int[] in, IntWrapper inpos, int inlength, int[] out
* @param inlength
* length of the compressed data (ignored by some schemes)
* @param out
* array where to write the compressed output
* array where to write the uncompressed output
* @param outpos
* where to write the compressed output in out
* where to start writing the uncompressed output in out
* @param num
* number of integers we want to decode, the actual number of integers decoded can be less
* number of integers we want to decode. May be less than the actual number of compressed integers
*/
public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] out,
IntWrapper outpos, int num);
Expand Down
3 changes: 3 additions & 0 deletions src/main/java/me/lemire/integercompression/VariableByte.java
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,9 @@ public void headlessUncompress(int[] in, IntWrapper inpos, int inlength, int[] o
* In case you need a different way to allocate buffers, you can override this method
* with a custom behavior. The default implementation allocates a new Java direct
* {@link ByteBuffer} on each invocation.
*
* @param sizeInBytes
* @return
*/
protected ByteBuffer makeBuffer(int sizeInBytes) {
return ByteBuffer.allocateDirect(sizeInBytes);
Expand Down
143 changes: 143 additions & 0 deletions src/main/java/me/lemire/longcompression/LongBinaryPacking.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package me.lemire.longcompression;

import me.lemire.integercompression.BinaryPacking;
import me.lemire.integercompression.IntWrapper;
import me.lemire.integercompression.Util;

/**
* Scheme based on a commonly used idea: can be extremely fast.
* It encodes integers in blocks of 64 longs. For arrays containing
* an arbitrary number of longs, you should use it in conjunction
* with another CODEC:
*
* <pre>LongCODEC ic =
* new Composition(new LongBinaryPacking(), new LongVariableByte()).</pre>
*
* Note that this does not use differential coding: if you are working on sorted
* lists, you must compute the deltas separately.
*
* <p>
* For details, please see {@link BinaryPacking}
* </p>
*
* @author Benoit Lacelle
*/
public final class LongBinaryPacking implements LongCODEC, SkippableLongCODEC {
final static int BLOCK_SIZE = 64;

@Override
public void compress(long[] in, IntWrapper inpos, int inlength,
long[] out, IntWrapper outpos) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
if (inlength == 0)
return;
out[outpos.get()] = inlength;
outpos.increment();
headlessCompress(in, inpos, inlength, out, outpos);
}

@Override
public void headlessCompress(long[] in, IntWrapper inpos, int inlength,
long[] out, IntWrapper outpos) {
inlength = Util.greatestMultiple(inlength, BLOCK_SIZE);
int tmpoutpos = outpos.get();
int s = inpos.get();
// Compress by block of 8 * 64 longs as much as possible
for (; s + BLOCK_SIZE * 8 - 1 < inpos.get() + inlength; s += BLOCK_SIZE * 8) {
// maxbits can be anything between 0 and 64 included: expressed within a byte (1 << 6)
final int mbits1 = LongUtil.maxbits(in, s + 0 * BLOCK_SIZE, BLOCK_SIZE);
final int mbits2 = LongUtil.maxbits(in, s + 1 * BLOCK_SIZE, BLOCK_SIZE);
final int mbits3 = LongUtil.maxbits(in, s + 2 * BLOCK_SIZE, BLOCK_SIZE);
final int mbits4 = LongUtil.maxbits(in, s + 3 * BLOCK_SIZE, BLOCK_SIZE);
final int mbits5 = LongUtil.maxbits(in, s + 4 * BLOCK_SIZE, BLOCK_SIZE);
final int mbits6 = LongUtil.maxbits(in, s + 5 * BLOCK_SIZE, BLOCK_SIZE);
final int mbits7 = LongUtil.maxbits(in, s + 6 * BLOCK_SIZE, BLOCK_SIZE);
final int mbits8 = LongUtil.maxbits(in, s + 7 * BLOCK_SIZE, BLOCK_SIZE);
// The first long expressed the maxbits for the 8 buckets
out[tmpoutpos++] = ((long) mbits1 << 56) | ((long) mbits2 << 48) | ((long) mbits3 << 40) | ((long) mbits4 << 32) | (mbits5 << 24) | (mbits6 << 16) | (mbits7 << 8) | (mbits8);
LongBitPacking.fastpackwithoutmask(in, s + 0 * BLOCK_SIZE, out, tmpoutpos, (int) mbits1);
tmpoutpos += mbits1;
LongBitPacking.fastpackwithoutmask(in, s + 1 * BLOCK_SIZE, out, tmpoutpos, (int) mbits2);
tmpoutpos += mbits2;
LongBitPacking.fastpackwithoutmask(in, s + 2 * BLOCK_SIZE, out, tmpoutpos, (int) mbits3);
tmpoutpos += mbits3;
LongBitPacking.fastpackwithoutmask(in, s + 3 * BLOCK_SIZE, out, tmpoutpos, (int) mbits4);
tmpoutpos += mbits4;
LongBitPacking.fastpackwithoutmask(in, s + 4 * BLOCK_SIZE, out, tmpoutpos, (int) mbits5);
tmpoutpos += mbits5;
LongBitPacking.fastpackwithoutmask(in, s + 5 * BLOCK_SIZE, out, tmpoutpos, (int) mbits6);
tmpoutpos += mbits6;
LongBitPacking.fastpackwithoutmask(in, s + 6 * BLOCK_SIZE, out, tmpoutpos, (int) mbits7);
tmpoutpos += mbits7;
LongBitPacking.fastpackwithoutmask(in, s + 7 * BLOCK_SIZE, out, tmpoutpos, (int) mbits8);
tmpoutpos += mbits8;
}
// Then we compress up to 7 blocks of 64 longs
for (; s < inpos.get() + inlength; s += BLOCK_SIZE ) {
final int mbits = LongUtil.maxbits(in, s, BLOCK_SIZE);
out[tmpoutpos++] = mbits;
LongBitPacking.fastpackwithoutmask(in, s, out, tmpoutpos, mbits);
tmpoutpos += mbits;
}
inpos.add(inlength);
outpos.set(tmpoutpos);
}

@Override
public void uncompress(long[] in, IntWrapper inpos, int inlength,
long[] out, IntWrapper outpos) {
if (inlength == 0)
return;
final int outlength = (int) in[inpos.get()];
inpos.increment();
headlessUncompress(in,inpos, inlength,out,outpos,outlength);
}

@Override
public void headlessUncompress(long[] in, IntWrapper inpos, int inlength,
long[] out, IntWrapper outpos, int num) {
final int outlength = Util.greatestMultiple(num, BLOCK_SIZE);
int tmpinpos = inpos.get();
int s = outpos.get();
for (; s + BLOCK_SIZE * 8 - 1 < outpos.get() + outlength; s += BLOCK_SIZE * 8) {
final int mbits1 = (int) ((in[tmpinpos] >>> 56));
final int mbits2 = (int) ((in[tmpinpos] >>> 48) & 0xFF);
final int mbits3 = (int) ((in[tmpinpos] >>> 40) & 0xFF);
final int mbits4 = (int) ((in[tmpinpos] >>> 32) & 0xFF);
final int mbits5 = (int) ((in[tmpinpos] >>> 24) & 0xFF);
final int mbits6 = (int) ((in[tmpinpos] >>> 16) & 0xFF);
final int mbits7 = (int) ((in[tmpinpos] >>> 8) & 0xFF);
final int mbits8 = (int) ((in[tmpinpos]) & 0xFF);
++tmpinpos;
LongBitPacking.fastunpack(in, tmpinpos, out, s + 0 * BLOCK_SIZE, mbits1);
tmpinpos += mbits1;
LongBitPacking.fastunpack(in, tmpinpos, out, s + 1 * BLOCK_SIZE, mbits2);
tmpinpos += mbits2;
LongBitPacking.fastunpack(in, tmpinpos, out, s + 2 * BLOCK_SIZE, mbits3);
tmpinpos += mbits3;
LongBitPacking.fastunpack(in, tmpinpos, out, s + 3 * BLOCK_SIZE, mbits4);
tmpinpos += mbits4;
LongBitPacking.fastunpack(in, tmpinpos, out, s + 4 * BLOCK_SIZE, mbits5);
tmpinpos += mbits5;
LongBitPacking.fastunpack(in, tmpinpos, out, s + 5 * BLOCK_SIZE, mbits6);
tmpinpos += mbits6;
LongBitPacking.fastunpack(in, tmpinpos, out, s + 6 * BLOCK_SIZE, mbits7);
tmpinpos += mbits7;
LongBitPacking.fastunpack(in, tmpinpos, out, s + 7 * BLOCK_SIZE, mbits8);
tmpinpos += mbits8;
}
for (; s < outpos.get() + outlength; s += BLOCK_SIZE ) {
final int mbits = (int) in[tmpinpos];
++tmpinpos;
LongBitPacking.fastunpack(in, tmpinpos, out, s, mbits);
tmpinpos += mbits;
}
outpos.add(outlength);
inpos.set(tmpinpos);
}

@Override
public String toString() {
return this.getClass().getSimpleName();
}
}
Loading

0 comments on commit 182c6d2

Please sign in to comment.