Skip to content

Commit

Permalink
Merge pull request #423 from bearrito/feature/container-validate
Browse files Browse the repository at this point in the history
Start validation
  • Loading branch information
lemire authored Jun 7, 2024
2 parents 0742966 + c8b1e79 commit e18a7ce
Show file tree
Hide file tree
Showing 11 changed files with 787 additions and 153 deletions.
47 changes: 35 additions & 12 deletions arraycontainer.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
package roaring

import (
"errors"
"fmt"
)

type arrayContainer struct {
content []uint16
}

var (
ErrArrayIncorrectSort = errors.New("incorrectly sorted array")
ErrArrayInvalidSize = errors.New("invalid array size")
)

func (ac *arrayContainer) String() string {
s := "{"
for it := ac.getShortIterator(); it.hasNext(); {
Expand All @@ -26,8 +32,7 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
_ = x[len(ac.content)-1+i]
_ = ac.content[len(ac.content)-1]
for k := 0; k < len(ac.content); k++ {
x[k+i] =
uint32(ac.content[k]) | mask
x[k+i] = uint32(ac.content[k]) | mask
}
return i + len(ac.content)
}
Expand Down Expand Up @@ -168,7 +173,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
}
answer := newArrayContainer()
answer.content = make([]uint16, newCardinality, newCardinality) //a hack for sure
answer.content = make([]uint16, newCardinality, newCardinality) // a hack for sure

copy(answer.content, ac.content[:startIndex])
outPos := startIndex
Expand All @@ -194,11 +199,9 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
}
answer.content = answer.content[:newCardinality]
return answer

}

func (ac *arrayContainer) equals(o container) bool {

srb, ok := o.(*arrayContainer)
if ok {
// Check if the containers are the same object.
Expand Down Expand Up @@ -239,8 +242,8 @@ func (ac *arrayContainer) toBitmapContainer() *bitmapContainer {
bc := newBitmapContainer()
bc.loadData(ac)
return bc

}

func (ac *arrayContainer) iadd(x uint16) (wasNew bool) {
// Special case adding to the end of the container.
l := len(ac.content)
Expand Down Expand Up @@ -352,7 +355,7 @@ func (ac *arrayContainer) ior(a container) container {
return ac.iorArray(x)
case *bitmapContainer:
return a.(*bitmapContainer).orArray(ac)
//return ac.iorBitmap(x) // note: this does not make sense
// return ac.iorBitmap(x) // note: this does not make sense
case *runContainer16:
if x.isFull() {
return x.clone()
Expand Down Expand Up @@ -589,7 +592,6 @@ func (ac *arrayContainer) iandBitmap(bc *bitmapContainer) container {
}
ac.content = ac.content[:pos]
return ac

}

func (ac *arrayContainer) xor(a container) container {
Expand Down Expand Up @@ -630,7 +632,6 @@ func (ac *arrayContainer) xorArray(value2 *arrayContainer) container {
length := exclusiveUnion2by2(value1.content, value2.content, answer.content)
answer.content = answer.content[:length]
return answer

}

func (ac *arrayContainer) andNot(a container) container {
Expand Down Expand Up @@ -822,7 +823,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
} else { // no expansion needed
ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1)
if cardinalityChange < 0 {

for i := startIndex + newValuesInRange; i < newCardinality; i++ {
ac.content[i] = ac.content[i-cardinalityChange]
}
Expand Down Expand Up @@ -915,7 +915,6 @@ func (ac *arrayContainer) rank(x uint16) int {
return answer + 1
}
return -answer - 1

}

func (ac *arrayContainer) selectInt(x uint16) int {
Expand Down Expand Up @@ -1039,7 +1038,6 @@ func (ac *arrayContainer) numberOfRuns() (nr int) {

// convert to run or array *if needed*
func (ac *arrayContainer) toEfficientContainer() container {

numRuns := ac.numberOfRuns()

sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
Expand Down Expand Up @@ -1099,3 +1097,28 @@ func (ac *arrayContainer) addOffset(x uint16) (container, container) {

return low, high
}

// validate checks cardinality and sort order of the array container
func (ac *arrayContainer) validate() error {
cardinality := ac.getCardinality()

if cardinality <= 0 {
return ErrArrayInvalidSize
}

if cardinality > arrayDefaultMaxSize {
return ErrArrayInvalidSize
}

previous := ac.content[0]
for i := 1; i < len(ac.content); i++ {
next := ac.content[i]
if previous > next {
return ErrArrayIncorrectSort
}
previous = next

}

return nil
}
50 changes: 46 additions & 4 deletions arraycontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,11 @@ func TestArrayContainerNumberOfRuns025(t *testing.T) {
}

func TestArrayContainerIaddRangeNearMax068(t *testing.T) {
iv := []interval16{newInterval16Range(65525, 65527),
iv := []interval16{
newInterval16Range(65525, 65527),
newInterval16Range(65530, 65530),
newInterval16Range(65534, 65535)}
newInterval16Range(65534, 65535),
}
rc := newRunContainer16TakeOwnership(iv)

ac2 := rc.toArrayContainer()
Expand All @@ -262,9 +264,11 @@ func TestArrayContainerIaddRangeNearMax068(t *testing.T) {
}

func TestArrayContainerEtc070(t *testing.T) {
iv := []interval16{newInterval16Range(65525, 65527),
iv := []interval16{
newInterval16Range(65525, 65527),
newInterval16Range(65530, 65530),
newInterval16Range(65534, 65535)}
newInterval16Range(65534, 65535),
}
rc := newRunContainer16TakeOwnership(iv)
ac := rc.toArrayContainer()

Expand Down Expand Up @@ -431,6 +435,44 @@ func TestArrayContainerResetTo(t *testing.T) {
})
}

func TestArrayContainerValidation(t *testing.T) {
array := newArrayContainer()
upperBound := arrayDefaultMaxSize

err := array.validate()
assert.Error(t, err)

for i := 0; i < upperBound; i++ {
array.iadd(uint16(i))
}
err = array.validate()
assert.NoError(t, err)

// Introduce a sort error
// We know that upperbound is unsorted because we populated up to upperbound
array.content[500] = uint16(upperBound + upperBound)

err = array.validate()
assert.Error(t, err)

array = newArrayContainer()

// Technically a run, but make sure the incorrect sort detection handles equal elements
for i := 0; i < upperBound; i++ {
array.iadd(uint16(1))
}
err = array.validate()
assert.NoError(t, err)

array = newArrayContainer()

for i := 0; i < 2*upperBound; i++ {
array.iadd(uint16(i))
}
err = array.validate()
assert.Error(t, err)
}

// go test -bench BenchmarkShortIteratorAdvance -run -
func BenchmarkShortIteratorAdvanceArray(b *testing.B) {
benchmarkContainerIteratorAdvance(b, newArrayContainer())
Expand Down
38 changes: 28 additions & 10 deletions bitmapcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ func (bcsi *bitmapContainerShortIterator) next() uint16 {
bcsi.i = bcsi.ptr.NextSetBit(uint(bcsi.i) + 1)
return uint16(j)
}

func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
Expand Down Expand Up @@ -241,7 +242,7 @@ func (bc *bitmapContainer) getSizeInBytes() int {
}

func (bc *bitmapContainer) serializedSizeInBytes() int {
//return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
// return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
return len(bc.bitmap) * 8
}

Expand Down Expand Up @@ -441,7 +442,7 @@ func (bc *bitmapContainer) ior(a container) container {
if bc.isFull() {
return newRunContainer16Range(0, MaxUint16)
}
//bc.computeCardinality()
// bc.computeCardinality()
return bc
}
panic(fmt.Errorf("unsupported container type %T", a))
Expand Down Expand Up @@ -819,9 +820,8 @@ func (bc *bitmapContainer) andBitmap(value2 *bitmapContainer) container {
}
ac := newArrayContainerSize(newcardinality)
fillArrayAND(ac.content, bc.bitmap, value2.bitmap)
ac.content = ac.content[:newcardinality] //not sure why i need this
ac.content = ac.content[:newcardinality] // not sure why i need this
return ac

}

func (bc *bitmapContainer) intersectsArray(value2 *arrayContainer) bool {
Expand All @@ -842,7 +842,6 @@ func (bc *bitmapContainer) intersectsBitmap(value2 *bitmapContainer) bool {
}
}
return false

}

func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container {
Expand Down Expand Up @@ -995,7 +994,7 @@ func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) containe
return bc
}

func (bc *bitmapContainer) contains(i uint16) bool { //testbit
func (bc *bitmapContainer) contains(i uint16) bool { // testbit
x := uint(i)
w := bc.bitmap[x>>6]
mask := uint64(1) << (x & 63)
Expand Down Expand Up @@ -1051,7 +1050,7 @@ func (bc *bitmapContainer) toArrayContainer() *arrayContainer {
}

func (bc *bitmapContainer) fillArray(container []uint16) {
//TODO: rewrite in assembly
// TODO: rewrite in assembly
pos := 0
base := 0
for k := 0; k < len(bc.bitmap); k++ {
Expand Down Expand Up @@ -1141,7 +1140,6 @@ func (bc *bitmapContainer) numberOfRuns() int {

// convert to run or array *if needed*
func (bc *bitmapContainer) toEfficientContainer() container {

numRuns := bc.numberOfRuns()

sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
Expand All @@ -1159,7 +1157,6 @@ func (bc *bitmapContainer) toEfficientContainer() container {
}

func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {

if len(rc.iv) == 1 {
return newBitmapContainerwithRange(int(rc.iv[0].start), int(rc.iv[0].last()))
}
Expand All @@ -1169,7 +1166,7 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
setBitmapRange(bc.bitmap, int(rc.iv[i].start), int(rc.iv[i].last())+1)
bc.cardinality += int(rc.iv[i].last()) + 1 - int(rc.iv[i].start)
}
//bc.computeCardinality()
// bc.computeCardinality()
return bc
}

Expand Down Expand Up @@ -1234,3 +1231,24 @@ func (bc *bitmapContainer) addOffset(x uint16) (container, container) {

return low, high
}

// validate checks that the container size is non-negative
func (bc *bitmapContainer) validate() error {
if bc.cardinality < arrayDefaultMaxSize {
return fmt.Errorf("bitmap container size was less than: %d", arrayDefaultMaxSize)
}

if maxCapacity < len(bc.bitmap)*64 {
return fmt.Errorf("bitmap slize size %d exceeded max capacity %d", maxCapacity, len(bc.bitmap)*64)
}

if bc.cardinality > maxCapacity {
return fmt.Errorf("bitmap container size was greater than: %d", maxCapacity)
}

if bc.cardinality != int(popcntSlice(bc.bitmap)) {
return fmt.Errorf("bitmap container size %d did not match underlying slice length: %d", bc.cardinality, int(popcntSlice(bc.bitmap)))
}

return nil
}
43 changes: 43 additions & 0 deletions bitmapcontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,46 @@ func TestBitmapContainerIAndNot(t *testing.T) {
require.ElementsMatch(t, []uint16{12279, 12282, 12285}, bc.(*arrayContainer).content)
require.Equal(t, 3, bc.getCardinality())
}

func TestBitMapContainerValidate(t *testing.T) {
bc := newBitmapContainer()

for i := 0; i < arrayDefaultMaxSize-1; i++ {
bc.iadd(uint16(i * 3))
}
// bitmap containers should have size arrayDefaultMaxSize or larger
assert.Error(t, bc.validate())
bc.iadd(math.MaxUint16)

assert.NoError(t, bc.validate())

// Break the max cardinality invariant
bc.cardinality = maxCapacity + 1

assert.Error(t, bc.validate())

// violate cardinality underlying container size invariant
bc = newBitmapContainer()
for i := 0; i < arrayDefaultMaxSize+1; i++ {
bc.iadd(uint16(i * 3))
}
assert.NoError(t, bc.validate())
bc.cardinality += 1
assert.Error(t, bc.validate())

// check that underlying packed slice doesn't exceed maxCapacity
bc = newBitmapContainer()
orginalSize := (1 << 16) / 64
for i := 0; i < orginalSize; i++ {
bc.cardinality += 1
bc.bitmap[i] = uint64(1)
}

appendSize := ((1 << 16) - orginalSize) + 1
for i := 0; i < appendSize; i++ {
bc.cardinality += 1
bc.bitmap = append(bc.bitmap, uint64(1))
}

assert.Error(t, bc.validate())
}
Loading

0 comments on commit e18a7ce

Please sign in to comment.