Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start validation #423

Merged
merged 11 commits into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 33 additions & 12 deletions arraycontainer.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package roaring

import (
"errors"
"fmt"
)

type arrayContainer struct {
content []uint16
}

var ErrArrayIncorrectSort = errors.New("incorrectly sorted array")

func (ac *arrayContainer) String() string {
s := "{"
for it := ac.getShortIterator(); it.hasNext(); {
Expand All @@ -26,8 +29,7 @@ func (ac *arrayContainer) fillLeastSignificant16bits(x []uint32, i int, mask uin
_ = x[len(ac.content)-1+i]
_ = ac.content[len(ac.content)-1]
for k := 0; k < len(ac.content); k++ {
x[k+i] =
uint32(ac.content[k]) | mask
x[k+i] = uint32(ac.content[k]) | mask
}
return i + len(ac.content)
}
Expand Down Expand Up @@ -168,7 +170,7 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
return ac.toBitmapContainer().not(firstOfRange, lastOfRange+1)
}
answer := newArrayContainer()
answer.content = make([]uint16, newCardinality, newCardinality) //a hack for sure
answer.content = make([]uint16, newCardinality, newCardinality) // a hack for sure

copy(answer.content, ac.content[:startIndex])
outPos := startIndex
Expand All @@ -194,11 +196,9 @@ func (ac *arrayContainer) notClose(firstOfRange, lastOfRange int) container {
}
answer.content = answer.content[:newCardinality]
return answer

}

func (ac *arrayContainer) equals(o container) bool {

srb, ok := o.(*arrayContainer)
if ok {
// Check if the containers are the same object.
Expand Down Expand Up @@ -239,8 +239,8 @@ func (ac *arrayContainer) toBitmapContainer() *bitmapContainer {
bc := newBitmapContainer()
bc.loadData(ac)
return bc

}

func (ac *arrayContainer) iadd(x uint16) (wasNew bool) {
// Special case adding to the end of the container.
l := len(ac.content)
Expand Down Expand Up @@ -352,7 +352,7 @@ func (ac *arrayContainer) ior(a container) container {
return ac.iorArray(x)
case *bitmapContainer:
return a.(*bitmapContainer).orArray(ac)
//return ac.iorBitmap(x) // note: this does not make sense
// return ac.iorBitmap(x) // note: this does not make sense
case *runContainer16:
if x.isFull() {
return x.clone()
Expand Down Expand Up @@ -589,7 +589,6 @@ func (ac *arrayContainer) iandBitmap(bc *bitmapContainer) container {
}
ac.content = ac.content[:pos]
return ac

}

func (ac *arrayContainer) xor(a container) container {
Expand Down Expand Up @@ -630,7 +629,6 @@ func (ac *arrayContainer) xorArray(value2 *arrayContainer) container {
length := exclusiveUnion2by2(value1.content, value2.content, answer.content)
answer.content = answer.content[:length]
return answer

}

func (ac *arrayContainer) andNot(a container) container {
Expand Down Expand Up @@ -822,7 +820,6 @@ func (ac *arrayContainer) inotClose(firstOfRange, lastOfRange int) container {
} else { // no expansion needed
ac.negateRange(buffer, startIndex, lastIndex, firstOfRange, lastOfRange+1)
if cardinalityChange < 0 {

for i := startIndex + newValuesInRange; i < newCardinality; i++ {
ac.content[i] = ac.content[i-cardinalityChange]
}
Expand Down Expand Up @@ -915,7 +912,6 @@ func (ac *arrayContainer) rank(x uint16) int {
return answer + 1
}
return -answer - 1

}

func (ac *arrayContainer) selectInt(x uint16) int {
Expand Down Expand Up @@ -1039,7 +1035,6 @@ func (ac *arrayContainer) numberOfRuns() (nr int) {

// convert to run or array *if needed*
func (ac *arrayContainer) toEfficientContainer() container {

numRuns := ac.numberOfRuns()

sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
Expand Down Expand Up @@ -1099,3 +1094,29 @@ func (ac *arrayContainer) addOffset(x uint16) (container, container) {

return low, high
}

// validate checks cardinality and sort order of the array container
func (ac *arrayContainer) validate() error {
cardinality := ac.getCardinality()

// TODO use ERR consts
if cardinality <= 0 {
return errors.New("zero or negative size")
}

if cardinality > arrayDefaultMaxSize {
return errors.New("exceeds default max size")
}

previous := ac.content[0]
for i := 1; i < len(ac.content); i++ {
next := ac.content[i]
if previous > next {
return ErrArrayIncorrectSort
}
previous = next

}

return nil
}
52 changes: 48 additions & 4 deletions arraycontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,11 @@ func TestArrayContainerNumberOfRuns025(t *testing.T) {
}

func TestArrayContainerIaddRangeNearMax068(t *testing.T) {
iv := []interval16{newInterval16Range(65525, 65527),
iv := []interval16{
newInterval16Range(65525, 65527),
newInterval16Range(65530, 65530),
newInterval16Range(65534, 65535)}
newInterval16Range(65534, 65535),
}
rc := newRunContainer16TakeOwnership(iv)

ac2 := rc.toArrayContainer()
Expand All @@ -262,9 +264,11 @@ func TestArrayContainerIaddRangeNearMax068(t *testing.T) {
}

func TestArrayContainerEtc070(t *testing.T) {
iv := []interval16{newInterval16Range(65525, 65527),
iv := []interval16{
newInterval16Range(65525, 65527),
newInterval16Range(65530, 65530),
newInterval16Range(65534, 65535)}
newInterval16Range(65534, 65535),
}
rc := newRunContainer16TakeOwnership(iv)
ac := rc.toArrayContainer()

Expand Down Expand Up @@ -431,6 +435,46 @@ func TestArrayContainerResetTo(t *testing.T) {
})
}

func TestArrayContainerValidation(t *testing.T) {
// TODO Use table based testing

array := newArrayContainer()
upperBound := arrayDefaultMaxSize

err := array.validate()
assert.Error(t, err)

for i := 0; i < upperBound; i++ {
array.iadd(uint16(i))
}
err = array.validate()
assert.NoError(t, err)

// Introduce a sort error
// We know that upperbound is unsorted because we populated up to upperbound
array.content[500] = uint16(upperBound + upperBound)

err = array.validate()
assert.Error(t, err)

array = newArrayContainer()

// Technically a run, but make sure the incorrect sort detection handles equal elements
for i := 0; i < upperBound; i++ {
array.iadd(uint16(1))
}
err = array.validate()
assert.NoError(t, err)

array = newArrayContainer()

for i := 0; i < 2*upperBound; i++ {
array.iadd(uint16(i))
}
err = array.validate()
assert.Error(t, err)
}

// go test -bench BenchmarkShortIteratorAdvance -run -
func BenchmarkShortIteratorAdvanceArray(b *testing.B) {
benchmarkContainerIteratorAdvance(b, newArrayContainer())
Expand Down
29 changes: 19 additions & 10 deletions bitmapcontainer.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ func (bcsi *bitmapContainerShortIterator) next() uint16 {
bcsi.i = bcsi.ptr.NextSetBit(uint(bcsi.i) + 1)
return uint16(j)
}

func (bcsi *bitmapContainerShortIterator) hasNext() bool {
return bcsi.i >= 0
}
Expand Down Expand Up @@ -241,7 +242,7 @@ func (bc *bitmapContainer) getSizeInBytes() int {
}

func (bc *bitmapContainer) serializedSizeInBytes() int {
//return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
// return bc.Msgsize()// NOO! This breaks GetSerializedSizeInBytes
return len(bc.bitmap) * 8
}

Expand Down Expand Up @@ -441,7 +442,7 @@ func (bc *bitmapContainer) ior(a container) container {
if bc.isFull() {
return newRunContainer16Range(0, MaxUint16)
}
//bc.computeCardinality()
// bc.computeCardinality()
return bc
}
panic(fmt.Errorf("unsupported container type %T", a))
Expand Down Expand Up @@ -819,9 +820,8 @@ func (bc *bitmapContainer) andBitmap(value2 *bitmapContainer) container {
}
ac := newArrayContainerSize(newcardinality)
fillArrayAND(ac.content, bc.bitmap, value2.bitmap)
ac.content = ac.content[:newcardinality] //not sure why i need this
ac.content = ac.content[:newcardinality] // not sure why i need this
return ac

}

func (bc *bitmapContainer) intersectsArray(value2 *arrayContainer) bool {
Expand All @@ -842,7 +842,6 @@ func (bc *bitmapContainer) intersectsBitmap(value2 *bitmapContainer) bool {
}
}
return false

}

func (bc *bitmapContainer) iandBitmap(value2 *bitmapContainer) container {
Expand Down Expand Up @@ -995,7 +994,7 @@ func (bc *bitmapContainer) iandNotBitmapSurely(value2 *bitmapContainer) containe
return bc
}

func (bc *bitmapContainer) contains(i uint16) bool { //testbit
func (bc *bitmapContainer) contains(i uint16) bool { // testbit
x := uint(i)
w := bc.bitmap[x>>6]
mask := uint64(1) << (x & 63)
Expand Down Expand Up @@ -1051,7 +1050,7 @@ func (bc *bitmapContainer) toArrayContainer() *arrayContainer {
}

func (bc *bitmapContainer) fillArray(container []uint16) {
//TODO: rewrite in assembly
// TODO: rewrite in assembly
pos := 0
base := 0
for k := 0; k < len(bc.bitmap); k++ {
Expand Down Expand Up @@ -1141,7 +1140,6 @@ func (bc *bitmapContainer) numberOfRuns() int {

// convert to run or array *if needed*
func (bc *bitmapContainer) toEfficientContainer() container {

numRuns := bc.numberOfRuns()

sizeAsRunContainer := runContainer16SerializedSizeInBytes(numRuns)
Expand All @@ -1159,7 +1157,6 @@ func (bc *bitmapContainer) toEfficientContainer() container {
}

func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {

if len(rc.iv) == 1 {
return newBitmapContainerwithRange(int(rc.iv[0].start), int(rc.iv[0].last()))
}
Expand All @@ -1169,7 +1166,7 @@ func newBitmapContainerFromRun(rc *runContainer16) *bitmapContainer {
setBitmapRange(bc.bitmap, int(rc.iv[i].start), int(rc.iv[i].last())+1)
bc.cardinality += int(rc.iv[i].last()) + 1 - int(rc.iv[i].start)
}
//bc.computeCardinality()
// bc.computeCardinality()
return bc
}

Expand Down Expand Up @@ -1234,3 +1231,15 @@ func (bc *bitmapContainer) addOffset(x uint16) (container, container) {

return low, high
}

// validate checks that the container size is non-negative
func (bc *bitmapContainer) validate() error {
if bc.cardinality < arrayDefaultMaxSize {
return fmt.Errorf("bitmap container size was less than: %d", arrayDefaultMaxSize)
}
if bc.cardinality > maxCapacity {
return fmt.Errorf("bitmap container size was greater than: %d", maxCapacity)
}

return nil
}
18 changes: 18 additions & 0 deletions bitmapcontainer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,3 +328,21 @@ func TestBitmapContainerIAndNot(t *testing.T) {
require.ElementsMatch(t, []uint16{12279, 12282, 12285}, bc.(*arrayContainer).content)
require.Equal(t, 3, bc.getCardinality())
}

func TestBitMapContainerValidate(t *testing.T) {
bc := newBitmapContainer()

for i := 0; i < arrayDefaultMaxSize-1; i++ {
bc.iadd(uint16(i * 3))
}
// bitmap containers should have size arrayDefaultMaxSize or larger
assert.Error(t, bc.validate())
bc.iadd(math.MaxUint16)

assert.NoError(t, bc.validate())

// Break the max cardinality invariant
bc.cardinality = maxCapacity + 1

assert.Error(t, bc.validate())
}
Loading
Loading