Skip to content

Commit

Permalink
Implement roaring64_bitmap_to_uint64_array (#569)
Browse files Browse the repository at this point in the history
* Implement roaring64_bitmap_to_uint64_array

Currently implemented naively, could probably be sped up with specialized
container functions for 64-bit integers.

Also added a benchmark.

* Fix container_iterator_read_into_uint64 when uint32(count) == 0

* Use roaring64_iterator_read for roaring64_bitmap_to_uint64_array

This is about 4x faster according to the microbenchmark on my machine.
  • Loading branch information
SLieve authored Jan 26, 2024
1 parent 152d4a4 commit e34a82a
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 8 deletions.
2 changes: 1 addition & 1 deletion include/roaring/containers/containers.h
Original file line number Diff line number Diff line change
Expand Up @@ -2583,7 +2583,7 @@ bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode,
bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
roaring_container_iterator_t *it,
uint64_t high48, uint64_t *buf,
uint64_t count, uint32_t *consumed,
uint32_t count, uint32_t *consumed,
uint16_t *value_out);

#ifdef __cplusplus
Expand Down
11 changes: 11 additions & 0 deletions include/roaring/roaring64.h
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,17 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf,
bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r,
roaring_iterator64 iterator, void *ptr);

/**
* Convert the bitmap to a sorted array `out`.
*
* Caller is responsible to ensure that there is enough memory allocated, e.g.
* ```
* out = malloc(roaring64_bitmap_get_cardinality(bitmap) * sizeof(uint64_t));
* ```
*/
void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r,
uint64_t *out);

/**
* Create an iterator object that can be used to iterate through the values.
* Caller is responsible for calling `roaring64_iterator_free()`.
Expand Down
13 changes: 13 additions & 0 deletions microbenchmarks/bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,19 @@ struct to_array {
auto ToArray = BasicBench<to_array>;
BENCHMARK(ToArray);

struct to_array64 {
static uint64_t run() {
uint64_t marker = 0;
for (size_t i = 0; i < count; ++i) {
roaring64_bitmap_to_uint64_array(bitmaps64[i], array_buffer64);
marker += array_buffer[0];
}
return marker;
}
};
auto ToArray64 = BasicBench<to_array64>;
BENCHMARK(ToArray64);

struct iterate_all {
static uint64_t run() {
uint64_t marker = 0;
Expand Down
2 changes: 2 additions & 0 deletions microbenchmarks/bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ roaring_bitmap_t **bitmaps = NULL;
roaring64_bitmap_t **bitmaps64 = NULL;
Roaring64Map **bitmaps64cpp = NULL;
uint32_t *array_buffer;
uint64_t *array_buffer64;
uint32_t maxvalue = 0;
uint32_t maxcard = 0;

Expand Down Expand Up @@ -194,6 +195,7 @@ static roaring_bitmap_t **create_all_bitmaps(size_t *howmany,
roaring_bitmap_set_copy_on_write(answer[i], copy_on_write);
}
array_buffer = (uint32_t *)malloc(maxcard * sizeof(uint32_t));
array_buffer64 = (uint64_t *)malloc(maxcard * sizeof(uint64_t));
return answer;
}

Expand Down
2 changes: 1 addition & 1 deletion src/containers/containers.c
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ bool container_iterator_read_into_uint32(const container_t *c, uint8_t typecode,
bool container_iterator_read_into_uint64(const container_t *c, uint8_t typecode,
roaring_container_iterator_t *it,
uint64_t high48, uint64_t *buf,
uint64_t count, uint32_t *consumed,
uint32_t count, uint32_t *consumed,
uint16_t *value_out) {
*consumed = 0;
if (count == 0) {
Expand Down
13 changes: 12 additions & 1 deletion src/roaring64.c
Original file line number Diff line number Diff line change
Expand Up @@ -1906,6 +1906,13 @@ bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r,
return true;
}

void roaring64_bitmap_to_uint64_array(const roaring64_bitmap_t *r,
uint64_t *out) {
roaring64_iterator_t it = {0};
roaring64_iterator_init_at(r, &it, /*first=*/true);
roaring64_iterator_read(&it, out, UINT64_MAX);
}

roaring64_iterator_t *roaring64_iterator_create(const roaring64_bitmap_t *r) {
roaring64_iterator_t *it =
(roaring64_iterator_t *)roaring_malloc(sizeof(roaring64_iterator_t));
Expand Down Expand Up @@ -2028,9 +2035,13 @@ uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf,
uint32_t container_consumed;
leaf_t *leaf = (leaf_t *)it->art_it.value;
uint16_t low16 = (uint16_t)it->value;
uint32_t container_count = UINT32_MAX;
if (count - consumed < (uint64_t)UINT32_MAX) {
container_count = count - consumed;
}
bool has_value = container_iterator_read_into_uint64(
leaf->container, leaf->typecode, &it->container_it, it->high48, buf,
count - consumed, &container_consumed, &low16);
container_count, &container_consumed, &low16);
consumed += container_consumed;
buf += container_consumed;
if (has_value) {
Expand Down
58 changes: 53 additions & 5 deletions tests/roaring64_unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@ using namespace roaring::api;

namespace {

void assert_vector_equal(const std::vector<uint64_t>& lhs,
const std::vector<uint64_t>& rhs) {
assert_int_equal(lhs.size(), rhs.size());
for (size_t i = 0; i < lhs.size(); ++i) {
if (lhs[i] != rhs[i]) {
printf("Mismatch at %zu\n", i);
assert_int_equal(lhs[i], rhs[i]);
}
}
}

DEFINE_TEST(test_copy) {
roaring64_bitmap_t* r1 = roaring64_bitmap_create();

Expand Down Expand Up @@ -1186,6 +1197,21 @@ DEFINE_TEST(test_iterate) {
roaring64_bitmap_free(r);
}

DEFINE_TEST(test_to_uint64_array) {
roaring64_bitmap_t* r = roaring64_bitmap_create();
std::vector<uint64_t> a1 = {0, 1ULL << 35, (1Ull << 35) + 1,
(1Ull << 35) + 2, 1Ull << 36};
for (uint64_t val : a1) {
roaring64_bitmap_add(r, val);
}

std::vector<uint64_t> a2(a1.size(), 0);
roaring64_bitmap_to_uint64_array(r, a2.data());
assert_vector_equal(a2, a1);

roaring64_bitmap_free(r);
}

DEFINE_TEST(test_iterator_create) {
roaring64_bitmap_t* r = roaring64_bitmap_create();
{
Expand Down Expand Up @@ -1439,18 +1465,39 @@ DEFINE_TEST(test_iterator_read) {
roaring64_bitmap_add_bulk(r, &context, v);
}

// Check that a zero count results in zero elements read.
roaring64_iterator_t* it = roaring64_iterator_create(r);
uint64_t buf[1];
assert_int_equal(roaring64_iterator_read(it, buf, 0), 0);
roaring64_iterator_free(it);
{
// Check that a zero count results in zero elements read.
roaring64_iterator_t* it = roaring64_iterator_create(r);
uint64_t buf[1];
assert_int_equal(roaring64_iterator_read(it, buf, 0), 0);
roaring64_iterator_free(it);
}

readCompare(values, r, 1);
readCompare(values, r, 2);
readCompare(values, r, values.size() - 1);
readCompare(values, r, values.size());
readCompare(values, r, values.size() + 1);

{
// A count of UINT64_MAX.
roaring64_iterator_t* it = roaring64_iterator_create(r);
std::vector<uint64_t> buf(values.size(), 0);
assert_int_equal(roaring64_iterator_read(it, buf.data(), UINT64_MAX),
1000);
assert_vector_equal(buf, values);
roaring64_iterator_free(it);
}
{
// A count that becomes zero if cast to uint32.
roaring64_iterator_t* it = roaring64_iterator_create(r);
std::vector<uint64_t> buf(values.size(), 0);
assert_int_equal(
roaring64_iterator_read(it, buf.data(), 0xFFFFFFFF00000000), 1000);
assert_vector_equal(buf, values);
roaring64_iterator_free(it);
}

roaring64_bitmap_free(r);
}

Expand Down Expand Up @@ -1504,6 +1551,7 @@ int main() {
cmocka_unit_test(test_flip_inplace),
cmocka_unit_test(test_portable_serialize),
cmocka_unit_test(test_iterate),
cmocka_unit_test(test_to_uint64_array),
cmocka_unit_test(test_iterator_create),
cmocka_unit_test(test_iterator_create_last),
cmocka_unit_test(test_iterator_reinit),
Expand Down

0 comments on commit e34a82a

Please sign in to comment.