Skip to content

Commit

Permalink
Fix the range check for range index on raw column (apache#9453)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jackie-Jiang authored and Yao Liu committed Oct 3, 2022
1 parent 838422c commit 0238b54
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 54 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,9 @@ public class BitSlicedRangeIndexReader implements RangeIndexReader<ImmutableRoar
private final long _offset;
private final long _min;
private final long _max;
private final long _numDocs;
private final int _numDocs;

public BitSlicedRangeIndexReader(PinotDataBuffer dataBuffer,
ColumnMetadata metadata) {
public BitSlicedRangeIndexReader(PinotDataBuffer dataBuffer, ColumnMetadata metadata) {
_dataBuffer = dataBuffer;
long offset = 0;
int version = dataBuffer.getInt(offset);
Expand All @@ -49,49 +48,85 @@ public BitSlicedRangeIndexReader(PinotDataBuffer dataBuffer,
_min = dataBuffer.getLong(offset);
offset += Long.BYTES;
_offset = offset;
_max = metadata.hasDictionary()
? metadata.getCardinality() - 1
: ((Number) metadata.getMaxValue()).longValue();
// TODO: Read max from header to prevent cases where max value is not available in the column metadata
if (metadata.hasDictionary()) {
_max = metadata.getCardinality() - 1;
} else {
Number maxValue = (Number) metadata.getMaxValue();
_max = maxValue != null ? maxValue.longValue() : Long.MAX_VALUE;
}
_numDocs = metadata.getTotalDocs();
}

@Override
public int getNumMatchingDocs(int min, int max) {
// TODO: Handle this before reading the range index
if (min > max || min > _max || max < _min) {
return 0;
}
return queryRangeBitmapCardinality(Math.max(min, _min) - _min, max - _min, _max - _min);
}

@Override
public int getNumMatchingDocs(long min, long max) {
// TODO: Handle this before reading the range index
if (min > max || min > _max || max < _min) {
return 0;
}
return queryRangeBitmapCardinality(Math.max(min, _min) - _min, max - _min, _max - _min);
}

@Override
public int getNumMatchingDocs(float min, float max) {
// TODO: Handle this before reading the range index
if (min > max) {
return 0;
}
return queryRangeBitmapCardinality(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFL);
}

@Override
public int getNumMatchingDocs(double min, double max) {
// TODO: Handle this before reading the range index
if (min > max) {
return 0;
}
return queryRangeBitmapCardinality(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFFFFFFFFFL);
}

@Override
public ImmutableRoaringBitmap getMatchingDocIds(int min, int max) {
// TODO: Handle this before reading the range index
if (min > max || min > _max || max < _min) {
return new MutableRoaringBitmap();
}
return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max - _min);
}

@Override
public ImmutableRoaringBitmap getMatchingDocIds(long min, long max) {
// TODO: Handle this before reading the range index
if (min > max || min > _max || max < _min) {
return new MutableRoaringBitmap();
}
return queryRangeBitmap(Math.max(min, _min) - _min, max - _min, _max - _min);
}

@Override
public ImmutableRoaringBitmap getMatchingDocIds(float min, float max) {
// TODO: Handle this before reading the range index
if (min > max) {
return new MutableRoaringBitmap();
}
return queryRangeBitmap(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFL);
}

@Override
public ImmutableRoaringBitmap getMatchingDocIds(double min, double max) {
// TODO: Handle this before reading the range index
if (min > max) {
return new MutableRoaringBitmap();
}
return queryRangeBitmap(FPOrdering.ordinalOf(min), FPOrdering.ordinalOf(max), 0xFFFFFFFFFFFFFFFFL);
}

Expand Down Expand Up @@ -125,7 +160,14 @@ private ImmutableRoaringBitmap queryRangeBitmap(long min, long max, long columnM
RangeBitmap rangeBitmap = mapRangeBitmap();
if (Long.compareUnsigned(max, columnMax) < 0) {
if (Long.compareUnsigned(min, 0) > 0) {
return rangeBitmap.between(min, max).toMutableRoaringBitmap();
// TODO: RangeBitmap has a bug in version 0.9.28 which gives wrong result computing between for 2 numbers with
// different sign. The bug is tracked here: https://github.com/RoaringBitmap/RoaringBitmap/issues/586.
// This is a work-around for the bug.
if (columnMax > 0) {
return rangeBitmap.between(min, max).toMutableRoaringBitmap();
} else {
return rangeBitmap.gte(min, rangeBitmap.lte(max)).toMutableRoaringBitmap();
}
}
return rangeBitmap.lte(max).toMutableRoaringBitmap();
} else {
Expand All @@ -142,14 +184,21 @@ private int queryRangeBitmapCardinality(long min, long max, long columnMax) {
RangeBitmap rangeBitmap = mapRangeBitmap();
if (Long.compareUnsigned(max, columnMax) < 0) {
if (Long.compareUnsigned(min, 0) > 0) {
return (int) rangeBitmap.betweenCardinality(min, max);
// TODO: RangeBitmap has a bug in version 0.9.28 which gives wrong result computing between for 2 numbers with
// different sign. The bug is tracked here: https://github.com/RoaringBitmap/RoaringBitmap/issues/586.
// This is a work-around for the bug.
if (columnMax > 0) {
return (int) rangeBitmap.betweenCardinality(min, max);
} else {
return (int) rangeBitmap.gteCardinality(min, rangeBitmap.lte(max));
}
}
return (int) rangeBitmap.lteCardinality(max);
} else {
if (Long.compareUnsigned(min, 0) > 0) {
return (int) rangeBitmap.gteCardinality(min);
}
return (int) _numDocs;
return _numDocs;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
import static org.apache.pinot.segment.spi.V1Constants.Indexes.BITMAP_RANGE_INDEX_FILE_EXTENSION;
import static org.apache.pinot.spi.data.FieldSpec.DataType.*;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;


public class BitSlicedIndexCreatorTest {
Expand Down Expand Up @@ -160,24 +159,39 @@ private void testInt(Dataset<int[]> dataset)
File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + BITMAP_RANGE_INDEX_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer, metadata);
int prev = Integer.MIN_VALUE;
for (int quantile : dataset.quantiles()) {
ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev, quantile);
int resultCount = reader.getNumMatchingDocs(prev, quantile);
assertEquals(result, reference);
assertEquals(resultCount, result == null ? 0 : result.getCardinality());
testRange(reader, dataset, Integer.MIN_VALUE, Integer.MIN_VALUE);
int[] quantiles = dataset.quantiles();
int prev = quantiles[0] - 1;
testRange(reader, dataset, Integer.MIN_VALUE, prev);
testRange(reader, dataset, prev, prev);
for (int quantile : quantiles) {
testRange(reader, dataset, prev, quantile);
testRange(reader, dataset, quantile, quantile);
prev = quantile;
}
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Integer.MAX_VALUE);
assertTrue(result != null && result.isEmpty());
assertEquals(reader.getMatchingDocIds(Integer.MIN_VALUE, Integer.MAX_VALUE),
dataset.scan(Integer.MIN_VALUE, Integer.MAX_VALUE));
testRange(reader, dataset, prev, prev + 1);
testRange(reader, dataset, prev + 1, prev + 1);
testRange(reader, dataset, prev + 1, Integer.MAX_VALUE);
testRange(reader, dataset, Integer.MAX_VALUE, Integer.MAX_VALUE);
testRange(reader, dataset, Integer.MIN_VALUE, Integer.MAX_VALUE);
} finally {
FileUtils.forceDelete(rangeIndexFile);
}
}

private static void testRange(BitSlicedRangeIndexReader reader, Dataset<int[]> dataset, int min, int max) {
ImmutableRoaringBitmap reference = dataset.scan(min, max);
assertEquals(reader.getMatchingDocIds(min, max), reference);
assertEquals(reader.getNumMatchingDocs(min, max), reference.getCardinality());

// Also test reversed min/max value
if (min != max) {
reference = dataset.scan(max, min);
assertEquals(reader.getMatchingDocIds(max, min), reference);
assertEquals(reader.getNumMatchingDocs(max, min), reference.getCardinality());
}
}

private void testLong(Dataset<long[]> dataset)
throws IOException {
ColumnMetadata metadata = dataset.toColumnMetadata();
Expand All @@ -190,24 +204,39 @@ private void testLong(Dataset<long[]> dataset)
File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + BITMAP_RANGE_INDEX_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer, metadata);
long prev = Long.MIN_VALUE;
for (long quantile : dataset.quantiles()) {
ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev, quantile);
int resultCount = reader.getNumMatchingDocs(prev, quantile);
assertEquals(result, reference);
assertEquals(resultCount, result == null ? 0 : result.getCardinality());
testRange(reader, dataset, Long.MIN_VALUE, Long.MIN_VALUE);
long[] quantiles = dataset.quantiles();
long prev = quantiles[0] - 1;
testRange(reader, dataset, Long.MIN_VALUE, prev);
testRange(reader, dataset, prev, prev);
for (long quantile : quantiles) {
testRange(reader, dataset, prev, quantile);
testRange(reader, dataset, quantile, quantile);
prev = quantile;
}
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Long.MAX_VALUE);
assertTrue(result != null && result.isEmpty());
assertEquals(reader.getMatchingDocIds(Long.MIN_VALUE, Long.MAX_VALUE),
dataset.scan(Long.MIN_VALUE, Long.MAX_VALUE));
testRange(reader, dataset, prev, prev + 1);
testRange(reader, dataset, prev + 1, prev + 1);
testRange(reader, dataset, prev + 1, Long.MAX_VALUE);
testRange(reader, dataset, Long.MAX_VALUE, Long.MAX_VALUE);
testRange(reader, dataset, Long.MIN_VALUE, Long.MAX_VALUE);
} finally {
FileUtils.forceDelete(rangeIndexFile);
}
}

private static void testRange(BitSlicedRangeIndexReader reader, Dataset<long[]> dataset, long min, long max) {
ImmutableRoaringBitmap reference = dataset.scan(min, max);
assertEquals(reader.getMatchingDocIds(min, max), reference);
assertEquals(reader.getNumMatchingDocs(min, max), reference.getCardinality());

// Also test reversed min/max value
if (min != max) {
reference = dataset.scan(max, min);
assertEquals(reader.getMatchingDocIds(max, min), reference);
assertEquals(reader.getNumMatchingDocs(max, min), reference.getCardinality());
}
}

private void testFloat(Dataset<float[]> dataset)
throws IOException {
ColumnMetadata metadata = dataset.toColumnMetadata();
Expand All @@ -220,24 +249,39 @@ private void testFloat(Dataset<float[]> dataset)
File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + BITMAP_RANGE_INDEX_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer, metadata);
float prev = Float.NEGATIVE_INFINITY;
for (float quantile : dataset.quantiles()) {
ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev, quantile);
int resultCount = reader.getNumMatchingDocs(prev, quantile);
assertEquals(result, reference);
assertEquals(resultCount, result == null ? 0 : result.getCardinality());
testRange(reader, dataset, Float.NEGATIVE_INFINITY, Float.NEGATIVE_INFINITY);
float[] quantiles = dataset.quantiles();
float prev = quantiles[0] - 1;
testRange(reader, dataset, Float.NEGATIVE_INFINITY, prev);
testRange(reader, dataset, prev, prev);
for (float quantile : quantiles) {
testRange(reader, dataset, prev, quantile);
testRange(reader, dataset, quantile, quantile);
prev = quantile;
}
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Float.POSITIVE_INFINITY);
assertTrue(result != null && result.isEmpty());
assertEquals(reader.getMatchingDocIds(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY),
dataset.scan(Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY));
testRange(reader, dataset, prev, prev + 1);
testRange(reader, dataset, prev + 1, prev + 1);
testRange(reader, dataset, prev + 1, Float.POSITIVE_INFINITY);
testRange(reader, dataset, Float.POSITIVE_INFINITY, Float.POSITIVE_INFINITY);
testRange(reader, dataset, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
} finally {
FileUtils.forceDelete(rangeIndexFile);
}
}

private static void testRange(BitSlicedRangeIndexReader reader, Dataset<float[]> dataset, float min, float max) {
ImmutableRoaringBitmap reference = dataset.scan(min, max);
assertEquals(reader.getMatchingDocIds(min, max), reference);
assertEquals(reader.getNumMatchingDocs(min, max), reference.getCardinality());

// Also test reversed min/max value
if (min != max) {
reference = dataset.scan(max, min);
assertEquals(reader.getMatchingDocIds(max, min), reference);
assertEquals(reader.getNumMatchingDocs(max, min), reference.getCardinality());
}
}

private void testDouble(Dataset<double[]> dataset)
throws IOException {
ColumnMetadata metadata = dataset.toColumnMetadata();
Expand All @@ -250,24 +294,39 @@ private void testDouble(Dataset<double[]> dataset)
File rangeIndexFile = new File(INDEX_DIR, metadata.getColumnName() + BITMAP_RANGE_INDEX_FILE_EXTENSION);
try (PinotDataBuffer dataBuffer = PinotDataBuffer.mapReadOnlyBigEndianFile(rangeIndexFile)) {
BitSlicedRangeIndexReader reader = new BitSlicedRangeIndexReader(dataBuffer, metadata);
double prev = Double.NEGATIVE_INFINITY;
for (double quantile : dataset.quantiles()) {
ImmutableRoaringBitmap reference = dataset.scan(prev, quantile);
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev, quantile);
int resultCount = reader.getNumMatchingDocs(prev, quantile);
assertEquals(result, reference);
assertEquals(resultCount, result == null ? 0 : result.getCardinality());
testRange(reader, dataset, Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY);
double[] quantiles = dataset.quantiles();
double prev = quantiles[0] - 1;
testRange(reader, dataset, Double.NEGATIVE_INFINITY, prev);
testRange(reader, dataset, prev, prev);
for (double quantile : quantiles) {
testRange(reader, dataset, prev, quantile);
testRange(reader, dataset, quantile, quantile);
prev = quantile;
}
ImmutableRoaringBitmap result = reader.getMatchingDocIds(prev + 1, Double.POSITIVE_INFINITY);
assertTrue(result != null && result.isEmpty());
assertEquals(reader.getMatchingDocIds(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY),
dataset.scan(Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY));
testRange(reader, dataset, prev, prev + 1);
testRange(reader, dataset, prev + 1, prev + 1);
testRange(reader, dataset, prev + 1, Double.POSITIVE_INFINITY);
testRange(reader, dataset, Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY);
testRange(reader, dataset, Double.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY);
} finally {
FileUtils.forceDelete(rangeIndexFile);
}
}

private static void testRange(BitSlicedRangeIndexReader reader, Dataset<double[]> dataset, double min, double max) {
ImmutableRoaringBitmap reference = dataset.scan(min, max);
assertEquals(reader.getMatchingDocIds(min, max), reference);
assertEquals(reader.getNumMatchingDocs(min, max), reference.getCardinality());

// Also test reversed min/max value
if (min != max) {
reference = dataset.scan(max, min);
assertEquals(reader.getMatchingDocIds(max, min), reference);
assertEquals(reader.getNumMatchingDocs(max, min), reference.getCardinality());
}
}

private static BitSlicedRangeIndexCreator newBitSlicedIndexCreator(ColumnMetadata metadata) {
return metadata.hasDictionary() ? new BitSlicedRangeIndexCreator(INDEX_DIR,
metadata.getFieldSpec(), metadata.getCardinality()) : new BitSlicedRangeIndexCreator(INDEX_DIR,
Expand Down

0 comments on commit 0238b54

Please sign in to comment.