From b8aac3027c2f79db1bee29724604a37d4be40c5a Mon Sep 17 00:00:00 2001 From: Soerian Date: Wed, 1 Jan 2025 14:23:01 +0000 Subject: [PATCH 01/16] Array-backed ART --- include/roaring/art/art.h | 103 ++- src/art/art.c | 1500 +++++++++++++++++++++++-------------- tests/art_unit.cpp | 219 +++--- 3 files changed, 1130 insertions(+), 692 deletions(-) diff --git a/include/roaring/art/art.h b/include/roaring/art/art.h index e191c1eed..e3c0257a6 100644 --- a/include/roaring/art/art.h +++ b/include/roaring/art/art.h @@ -19,8 +19,8 @@ * chunks _differ_. This means that if there are two entries with different * high 48 bits, then there is only one inner node containing the common key * prefix, and two leaves. - * * Intrusive leaves: the leaf struct is included in user values. This removes - * a layer of indirection. + * * Mostly pointer-free: nodes are referred to by index rather than pointer, + * so that the structure can be deserialized with a backing buffer. */ // Fixed length of keys in the ART. All keys are assumed to be of this length. @@ -33,25 +33,42 @@ namespace internal { #endif typedef uint8_t art_key_chunk_t; -typedef struct art_node_s art_node_t; + +// Internal node reference type. Contains the node typecode in the low 8 bits, +// and the index in the relevant node array in the high 48 bits. Has a value of +// CROARING_ART_NULL_REF when pointing to a non-existent node. +typedef uint64_t art_ref_t; + +typedef struct art_leaf_s art_leaf_t; +typedef struct art_node4_s art_node4_t; +typedef struct art_node16_s art_node16_t; +typedef struct art_node48_s art_node48_t; +typedef struct art_node256_s art_node256_t; /** - * Wrapper to allow an empty tree. + * The ART is empty when root is a null ref. + * + * Each node type has its own dynamic array of node structs, indexed by + * art_ref_t. The arrays are expanded as needed, and shrink only when + * `shrink_to_fit` is called. */ typedef struct art_s { - art_node_t *root; + art_ref_t root; + + // Indexed by node typecode, thus 1 larger than it needs to be for + // convenience. `first_free` indicates the index where the first free node + // lives, which may be equal to the capacity. + size_t first_free[6]; + size_t capacities[6]; + + art_leaf_t *leaves; + art_node4_t *node4s; + art_node16_t *node16s; + art_node48_t *node48s; + art_node256_t *node256s; } art_t; -/** - * Values inserted into the tree have to be cast-able to art_val_t. This - * improves performance by reducing indirection. - * - * NOTE: Value pointers must be unique! This is because each value struct - * contains the key corresponding to the value. - */ -typedef struct art_val_s { - art_key_chunk_t key[ART_KEY_BYTES]; -} art_val_t; +typedef uint64_t art_val_t; /** * Compares two keys, returns their relative order: @@ -63,14 +80,21 @@ int art_compare_keys(const art_key_chunk_t key1[], const art_key_chunk_t key2[]); /** - * Inserts the given key and value. + * Initializes the ART. */ -void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val); +void art_init_cleared(art_t *art); /** - * Returns the value erased, NULL if not found. + * Inserts the given key and value. Returns a pointer to the value inserted, + * valid as long as the ART is not modified. */ -art_val_t *art_erase(art_t *art, const art_key_chunk_t *key); +art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val); + +/** + * Returns true if a value was erased. Sets `*erased_val` to the value erased, + * if any. + */ +bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val); /** * Returns the value associated with the given key, NULL if not found. @@ -83,42 +107,39 @@ art_val_t *art_find(const art_t *art, const art_key_chunk_t *key); bool art_is_empty(const art_t *art); /** - * Frees the nodes of the ART except the values, which the user is expected to - * free. + * Frees the contents of the ART. Should not be called when using + * `art_deserialize_frozen_safe`. */ void art_free(art_t *art); -/** - * Returns the size in bytes of the ART. Includes size of pointers to values, - * but not the values themselves. - */ -size_t art_size_in_bytes(const art_t *art); - /** * Prints the ART using printf, useful for debugging. */ void art_printf(const art_t *art); /** - * Callback for validating the value stored in a leaf. + * Callback for validating the value stored in a leaf. `context` is a + * user-provided value passed to the callback without modification. * * Should return true if the value is valid, false otherwise * If false is returned, `*reason` should be set to a static string describing * the reason for the failure. */ -typedef bool (*art_validate_cb_t)(const art_val_t *val, const char **reason); +typedef bool (*art_validate_cb_t)(const art_val_t val, const char **reason, + void *context); /** - * Validate the ART tree, ensuring it is internally consistent. + * Validate the ART tree, ensuring it is internally consistent. `context` is a + * user-provided value passed to the callback without modification. */ bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb); + art_validate_cb_t validate_cb, void *context); /** * ART-internal iterator bookkeeping. Users should treat this as an opaque type. */ typedef struct art_iterator_frame_s { - art_node_t *node; + art_ref_t ref; uint8_t index_in_node; } art_iterator_frame_t; @@ -130,6 +151,8 @@ typedef struct art_iterator_s { art_key_chunk_t key[ART_KEY_BYTES]; art_val_t *value; + art_t *art; + uint8_t depth; // Key depth uint8_t frame; // Node depth @@ -143,19 +166,19 @@ typedef struct art_iterator_s { * depending on `first`. The iterator is not valid if there are no entries in * the ART. */ -art_iterator_t art_init_iterator(const art_t *art, bool first); +art_iterator_t art_init_iterator(art_t *art, bool first); /** * Returns an initialized iterator positioned at a key equal to or greater than * the given key, if it exists. */ -art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key); +art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key); /** * Returns an initialized iterator positioned at a key greater than the given * key, if it exists. */ -art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key); +art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key); /** * The following iterator movement functions return true if a new entry was @@ -174,14 +197,16 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, /** * Insert the value and positions the iterator at the key. */ -void art_iterator_insert(art_t *art, art_iterator_t *iterator, - const art_key_chunk_t *key, art_val_t *val); +void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, + art_val_t val); /** * Erase the value pointed at by the iterator. Moves the iterator to the next - * leaf. Returns the value erased or NULL if nothing was erased. + * leaf. + * Returns true if a value was erased. Sets `*erased_val` to the value erased, + * if any. */ -art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator); +bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val); #ifdef __cplusplus } // extern "C" diff --git a/src/art/art.c b/src/art/art.c index 7bca7eb2c..6a96531a4 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -6,33 +6,26 @@ #include #include -#define CROARING_ART_NODE4_TYPE 0 -#define CROARING_ART_NODE16_TYPE 1 -#define CROARING_ART_NODE48_TYPE 2 -#define CROARING_ART_NODE256_TYPE 3 -#define CROARING_ART_NUM_TYPES 4 +#define CROARING_ART_NULL_REF 0 + +#define CROARING_ART_LEAF_TYPE 1 +#define CROARING_ART_NODE4_TYPE 2 +#define CROARING_ART_NODE16_TYPE 3 +#define CROARING_ART_NODE48_TYPE 4 +#define CROARING_ART_NODE256_TYPE 5 // Node48 placeholder value to indicate no child is present at this key index. #define CROARING_ART_NODE48_EMPTY_VAL 48 +#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) -// We use the least significant bit of node pointers to indicate whether a node -// is a leaf or an inner node. This is never surfaced to the user. -// -// Using pointer tagging to indicate leaves not only saves a bit of memory by -// sparing the typecode, but also allows us to use an intrusive leaf struct. -// Using an intrusive leaf struct leaves leaf allocation up to the user. Upon -// deallocation of the ART, we know not to free the leaves without having to -// dereference the leaf pointers. -// -// All internal operations on leaves should use CROARING_CAST_LEAF before using -// the leaf. The only places that use CROARING_SET_LEAF are locations where a -// field is directly assigned to a leaf pointer. After using CROARING_SET_LEAF, -// the leaf should be treated as a node of unknown type. -#define CROARING_IS_LEAF(p) (((uintptr_t)(p) & 1)) -#define CROARING_SET_LEAF(p) ((art_node_t *)((uintptr_t)(p) | 1)) -#define CROARING_CAST_LEAF(p) ((art_leaf_t *)((void *)((uintptr_t)(p) & ~1))) +#define CROARING_ART_ALIGN_BUF(buf, alignment) \ + (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) -#define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) +#define CROARING_ART_ALIGN_RELATIVE(buf_cur, buf_start, alignment) \ + (char *)((buf_start) + \ + (((ptrdiff_t)((buf_cur) - (buf_start)) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1)))) #ifdef __cplusplus extern "C" { @@ -42,30 +35,20 @@ namespace internal { typedef uint8_t art_typecode_t; -// Aliasing with a "leaf" naming so that its purpose is clearer in the context -// of the trie internals. -typedef art_val_t art_leaf_t; +// All node types should count as unoccupied if zeroed with memset. -typedef struct art_internal_validate_s { - const char **reason; - art_validate_cb_t validate_cb; +typedef void art_node_t; - int depth; - art_key_chunk_t current_key[ART_KEY_BYTES]; -} art_internal_validate_t; - -// Set the reason message, and return false for convenience. -static inline bool art_validate_fail(const art_internal_validate_t *validate, - const char *msg) { - *validate->reason = msg; - return false; -} +typedef struct art_leaf_s { + bool occupied; + art_key_chunk_t key[ART_KEY_BYTES]; + art_val_t val; +} art_leaf_t; // Inner node, with prefix. // // We use a fixed-length array as a pointer would be larger than the array. typedef struct art_inner_node_s { - art_typecode_t typecode; uint8_t prefix_size; uint8_t prefix[ART_KEY_BYTES - 1]; } art_inner_node_t; @@ -77,7 +60,7 @@ typedef struct art_node4_s { art_inner_node_t base; uint8_t count; uint8_t keys[4]; - art_node_t *children[4]; + art_ref_t children[4]; } art_node4_t; // Node16: key[i] corresponds with children[i]. Keys are sorted. @@ -85,7 +68,7 @@ typedef struct art_node16_s { art_inner_node_t base; uint8_t count; uint8_t keys[16]; - art_node_t *children[16]; + art_ref_t children[16]; } art_node16_t; // Node48: key[i] corresponds with children[key[i]] if key[i] != @@ -98,7 +81,7 @@ typedef struct art_node48_s { // Because there are at most 48 children, only the bottom 48 bits are used. uint64_t available_children; uint8_t keys[256]; - art_node_t *children[48]; + art_ref_t children[48]; } art_node48_t; // Node256: children[i] is directly indexed by key chunk. A child is present if @@ -106,87 +89,283 @@ typedef struct art_node48_s { typedef struct art_node256_s { art_inner_node_t base; uint16_t count; - art_node_t *children[256]; + art_ref_t children[256]; } art_node256_t; // Helper struct to refer to a child within a node at a specific index. typedef struct art_indexed_child_s { - art_node_t *child; + art_ref_t child; uint8_t index; art_key_chunk_t key_chunk; } art_indexed_child_t; -static inline bool art_is_leaf(const art_node_t *node) { - return CROARING_IS_LEAF(node); +typedef struct art_internal_validate_s { + const char **reason; + art_validate_cb_t validate_cb; + void *context; + + int depth; + art_key_chunk_t current_key[ART_KEY_BYTES]; +} art_internal_validate_t; + +// Set the reason message, and return false for convenience. +static inline bool art_validate_fail(const art_internal_validate_t *validate, + const char *msg) { + *validate->reason = msg; + return false; +} + +static inline art_ref_t art_to_ref(uint64_t index, art_typecode_t typecode) { + return ((art_ref_t)index) << 16 | typecode; } -static void art_leaf_populate(art_leaf_t *leaf, const art_key_chunk_t key[]) { - memcpy(leaf->key, key, ART_KEY_BYTES); +static inline uint64_t art_ref_index(art_ref_t ref) { + return ((uint64_t)ref) >> 16; +} + +static inline art_typecode_t art_ref_typecode(art_ref_t ref) { + return (art_typecode_t)ref; +} + +/** + * Gets a pointer to a node from its reference. The pointer only remains valid + * under non-mutating operations. If any mutating operations occur, this + * function should be called again to get a valid pointer to the node. + */ +static art_node_t *art_deref(const art_t *art, art_ref_t ref) { + assert(ref != CROARING_ART_NULL_REF); + uint64_t index = ref >> 16; + switch (art_ref_typecode(ref)) { + case CROARING_ART_LEAF_TYPE: + return (art_node_t *)&art->leaves[index]; + case CROARING_ART_NODE4_TYPE: + return (art_node_t *)&art->node4s[index]; + case CROARING_ART_NODE16_TYPE: + return (art_node_t *)&art->node16s[index]; + case CROARING_ART_NODE48_TYPE: + return (art_node_t *)&art->node48s[index]; + case CROARING_ART_NODE256_TYPE: + return (art_node_t *)&art->node256s[index]; + default: + assert(false); + return NULL; + } +} + +static inline uint64_t art_get_index(const art_t *art, const art_node_t *node, + art_typecode_t typecode) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_leaf_t *)node - art->leaves; + case CROARING_ART_NODE4_TYPE: + return (art_node4_t *)node - art->node4s; + case CROARING_ART_NODE16_TYPE: + return (art_node16_t *)node - art->node16s; + case CROARING_ART_NODE48_TYPE: + return (art_node48_t *)node - art->node48s; + case CROARING_ART_NODE256_TYPE: + return (art_node256_t *)node - art->node256s; + default: + assert(false); + return 0; + } +} + +/** + * Creates a reference from a pointer. + */ +static inline art_ref_t art_get_ref(const art_t *art, const art_node_t *node, + art_typecode_t typecode) { + return art_to_ref(art_get_index(art, node, typecode), typecode); +} + +/** + * Extends the array of nodes of the given typecode by `items`. Invalidates + * pointers into the array obtained by `art_deref`. + */ +static void art_extend(art_t *art, art_typecode_t typecode, size_t items) { + size_t size = art->first_free[typecode]; + size_t desired_size = size + items; + size_t capacity = art->capacities[typecode]; + if (desired_size <= capacity) { + return; + } + size_t new_capacity = + (size < 1024) ? 2 * desired_size : 5 * desired_size / 4; + art->capacities[typecode] = new_capacity; + size_t increase = new_capacity - capacity; + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + art->leaves = + roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); + memset(art->leaves + capacity, 0, increase * sizeof(art_leaf_t)); + break; + case CROARING_ART_NODE4_TYPE: + art->node4s = roaring_realloc(art->node4s, + new_capacity * sizeof(art_node4_t)); + memset(art->node4s + capacity, 0, increase * sizeof(art_node4_t)); + break; + case CROARING_ART_NODE16_TYPE: + art->node16s = roaring_realloc(art->node16s, + new_capacity * sizeof(art_node16_t)); + memset(art->node16s + capacity, 0, increase * sizeof(art_node16_t)); + break; + case CROARING_ART_NODE48_TYPE: + art->node48s = roaring_realloc(art->node48s, + new_capacity * sizeof(art_node48_t)); + memset(art->node48s + capacity, 0, increase * sizeof(art_node48_t)); + break; + case CROARING_ART_NODE256_TYPE: + art->node256s = roaring_realloc( + art->node256s, new_capacity * sizeof(art_node256_t)); + memset(art->node256s + capacity, 0, + increase * sizeof(art_node256_t)); + break; + default: + assert(false); + } +} + +/** + * Returns the next free index for the given typecode, may be equal to the + * capacity of the array. + */ +static size_t art_next_free(const art_t *art, art_typecode_t typecode, + size_t start_index) { + size_t capacity = art->capacities[typecode]; + switch (typecode) { + case CROARING_ART_LEAF_TYPE: { + for (size_t i = start_index; i < capacity; ++i) { + if (!art->leaves[i].occupied) { + return i; + } + } + break; + } + case CROARING_ART_NODE4_TYPE: { + for (size_t i = start_index; i < capacity; ++i) { + if (art->node4s[i].count == 0) { + return i; + } + } + break; + } + case CROARING_ART_NODE16_TYPE: { + for (size_t i = start_index; i < capacity; ++i) { + if (art->node16s[i].count == 0) { + return i; + } + } + break; + } + case CROARING_ART_NODE48_TYPE: { + for (size_t i = start_index; i < capacity; ++i) { + if (art->node48s[i].count == 0) { + return i; + } + } + break; + } + case CROARING_ART_NODE256_TYPE: { + for (size_t i = start_index; i < capacity; ++i) { + if (art->node256s[i].count == 0) { + return i; + } + } + break; + } + default: + assert(false); + return 0; + } + return capacity; +} + +/** + * Marks an index for the given typecode as used, expanding the relevant node + * array if necessary. + */ +static size_t art_allocate_index(art_t *art, art_typecode_t typecode) { + size_t first_free = art->first_free[typecode]; + if (first_free == art->capacities[typecode]) { + art_extend(art, typecode, 1); + } + art->first_free[typecode] = art_next_free(art, typecode, first_free + 1); + return first_free; } -static inline uint8_t art_get_type(const art_inner_node_t *node) { - return node->typecode; +static inline bool art_is_leaf(art_ref_t ref) { + return art_ref_typecode(ref) == CROARING_ART_LEAF_TYPE; } static inline void art_init_inner_node(art_inner_node_t *node, - art_typecode_t typecode, const art_key_chunk_t prefix[], uint8_t prefix_size) { - node->typecode = typecode; node->prefix_size = prefix_size; memcpy(node->prefix, prefix, prefix_size * sizeof(art_key_chunk_t)); } -static void art_free_node(art_node_t *node); +static void art_node_free(art_t *art, art_node_t *node, + art_typecode_t typecode); // ===================== Start of node-specific functions ====================== -static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], +static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], + art_val_t val) { + uint64_t index = art_allocate_index(art, CROARING_ART_LEAF_TYPE); + art_leaf_t *leaf = art->leaves + index; + leaf->occupied = true; + memcpy(leaf->key, key, ART_KEY_BYTES); + leaf->val = val; + return art_to_ref(index, CROARING_ART_LEAF_TYPE); +} + +static inline void art_leaf_clear(art_leaf_t *leaf) { leaf->occupied = false; } + +static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size); -static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, - uint8_t key); -static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, - uint8_t key); -static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, - uint8_t key); -static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, - uint8_t key); +static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, + art_ref_t child, uint8_t key); +static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, + art_ref_t child, uint8_t key); +static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, + art_ref_t child, uint8_t key); +static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, + art_ref_t child, uint8_t key); -static art_node4_t *art_node4_create(const art_key_chunk_t prefix[], +static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node4_t *node = (art_node4_t *)roaring_malloc(sizeof(art_node4_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE4_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE4_TYPE); + art_node4_t *node = art->node4s + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; return node; } -static void art_free_node4(art_node4_t *node) { - for (size_t i = 0; i < node->count; ++i) { - art_free_node(node->children[i]); - } - roaring_free(node); -} +static inline void art_node4_clear(art_node4_t *node) { node->count = 0; } -static inline art_node_t *art_node4_find_child(const art_node4_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node4_find_child(const art_node4_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return NULL; + return CROARING_ART_NULL_REF; } -static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node4_insert(art_t *art, art_node4_t *node, + art_ref_t child, uint8_t key) { if (node->count < 4) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -199,26 +378,26 @@ static art_node_t *art_node4_insert(art_node4_t *node, art_node_t *child, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_node_t *)); + after * sizeof(art_ref_t)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); } art_node16_t *new_node = - art_node16_create(node->base.prefix, node->base.prefix_size); + art_node16_create(art, node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node16_insert(new_node, node->children[i], node->keys[i]); + art_node16_insert(art, new_node, node->children[i], node->keys[i]); } - roaring_free(node); - return art_node16_insert(new_node, child, key); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); + return art_node16_insert(art, new_node, child, key); } -static inline art_node_t *art_node4_erase(art_node4_t *node, - art_key_chunk_t key_chunk) { +static inline art_ref_t art_node4_erase(art_t *art, art_node4_t *node, + art_key_chunk_t key_chunk) { int idx = -1; for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { @@ -226,17 +405,18 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, } } if (idx == -1) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); } if (node->count == 2) { // Only one child remains after erasing, so compress the path by // removing this node. uint8_t other_idx = idx ^ 1; - art_node_t *remaining_child = node->children[other_idx]; + art_ref_t remaining_child = node->children[other_idx]; art_key_chunk_t remaining_child_key = node->keys[other_idx]; if (!art_is_leaf(remaining_child)) { // Correct the prefix of the child node. - art_inner_node_t *inner_node = (art_inner_node_t *)remaining_child; + art_inner_node_t *inner_node = + (art_inner_node_t *)art_deref(art, remaining_child); memmove(inner_node->prefix + node->base.prefix_size + 1, inner_node->prefix, inner_node->prefix_size); memcpy(inner_node->prefix, node->base.prefix, @@ -244,7 +424,7 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, inner_node->prefix[node->base.prefix_size] = remaining_child_key; inner_node->prefix_size += node->base.prefix_size + 1; } - roaring_free(node); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); return remaining_child; } // Shift other keys to maintain sorted order. @@ -252,14 +432,14 @@ static inline art_node_t *art_node4_erase(art_node4_t *node, memmove(node->keys + idx, node->keys + idx + 1, after_next * sizeof(art_key_chunk_t)); memmove(node->children + idx, node->children + idx + 1, - after_next * sizeof(art_node_t *)); + after_next * sizeof(art_ref_t)); node->count--; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE4_TYPE); } static inline void art_node4_replace(art_node4_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -273,7 +453,7 @@ static inline art_indexed_child_t art_node4_next_child(const art_node4_t *node, art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -290,7 +470,7 @@ static inline art_indexed_child_t art_node4_prev_child(const art_node4_t *node, index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -303,7 +483,7 @@ static inline art_indexed_child_t art_node4_child_at(const art_node4_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -323,14 +503,15 @@ static inline art_indexed_child_t art_node4_lower_bound( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_internal_validate_at(const art_node_t *node, +static bool art_internal_validate_at(const art_t *art, art_ref_t ref, art_internal_validate_t validator); -static bool art_node4_internal_validate(const art_node4_t *node, +static bool art_node4_internal_validate(const art_t *art, + const art_node4_t *node, art_internal_validate_t validator) { if (node->count == 0) { return art_validate_fail(&validator, "Node4 has no children"); @@ -357,41 +538,37 @@ static bool art_node4_internal_validate(const art_node4_t *node, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(node->children[i], validator)) { + if (!art_internal_validate_at(art, node->children[i], validator)) { return false; } } return true; } -static art_node16_t *art_node16_create(const art_key_chunk_t prefix[], +static art_node16_t *art_node16_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node16_t *node = (art_node16_t *)roaring_malloc(sizeof(art_node16_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE16_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE16_TYPE); + art_node16_t *node = art->node16s + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; return node; } -static void art_free_node16(art_node16_t *node) { - for (size_t i = 0; i < node->count; ++i) { - art_free_node(node->children[i]); - } - roaring_free(node); -} +static inline void art_node16_clear(art_node16_t *node) { node->count = 0; } -static inline art_node_t *art_node16_find_child(const art_node16_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node16_find_child(const art_node16_t *node, + art_key_chunk_t key) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key) { return node->children[i]; } } - return NULL; + return CROARING_ART_NULL_REF; } -static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node16_insert(art_t *art, art_node16_t *node, + art_ref_t child, uint8_t key) { if (node->count < 16) { size_t idx = 0; for (; idx < node->count; ++idx) { @@ -404,24 +581,24 @@ static art_node_t *art_node16_insert(art_node16_t *node, art_node_t *child, memmove(node->keys + idx + 1, node->keys + idx, after * sizeof(art_key_chunk_t)); memmove(node->children + idx + 1, node->children + idx, - after * sizeof(art_node_t *)); + after * sizeof(art_ref_t)); node->children[idx] = child; node->keys[idx] = key; node->count++; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); } art_node48_t *new_node = - art_node48_create(node->base.prefix, node->base.prefix_size); + art_node48_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 16; ++i) { - art_node48_insert(new_node, node->children[i], node->keys[i]); + art_node48_insert(art, new_node, node->children[i], node->keys[i]); } - roaring_free(node); - return art_node48_insert(new_node, child, key); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return art_node48_insert(art, new_node, child, key); } -static inline art_node_t *art_node16_erase(art_node16_t *node, - uint8_t key_chunk) { +static inline art_ref_t art_node16_erase(art_t *art, art_node16_t *node, + uint8_t key_chunk) { for (size_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { // Shift other keys to maintain sorted order. @@ -429,28 +606,28 @@ static inline art_node_t *art_node16_erase(art_node16_t *node, memmove(node->keys + i, node->keys + i + 1, after_next * sizeof(key_chunk)); memmove(node->children + i, node->children + i + 1, - after_next * sizeof(art_node_t *)); + after_next * sizeof(art_ref_t)); node->count--; break; } } if (node->count > 4) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); } art_node4_t *new_node = - art_node4_create(node->base.prefix, node->base.prefix_size); + art_node4_create(art, node->base.prefix, node->base.prefix_size); // Instead of calling insert, this could be specialized to 2x memcpy and // setting the count. for (size_t i = 0; i < 4; ++i) { - art_node4_insert(new_node, node->children[i], node->keys[i]); + art_node4_insert(art, new_node, node->children[i], node->keys[i]); } - roaring_free(node); - return (art_node_t *)new_node; + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE16_TYPE); + return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE4_TYPE); } static inline void art_node16_replace(art_node16_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { for (uint8_t i = 0; i < node->count; ++i) { if (node->keys[i] == key_chunk) { node->children[i] = new_child; @@ -464,7 +641,7 @@ static inline art_indexed_child_t art_node16_next_child( art_indexed_child_t indexed_child; index++; if (index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -481,7 +658,7 @@ static inline art_indexed_child_t art_node16_prev_child( index--; art_indexed_child_t indexed_child; if (index < 0) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -494,7 +671,7 @@ static inline art_indexed_child_t art_node16_child_at(const art_node16_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= node->count) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -514,11 +691,12 @@ static inline art_indexed_child_t art_node16_lower_bound( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_node16_internal_validate(const art_node16_t *node, +static bool art_node16_internal_validate(const art_t *art, + const art_node16_t *node, art_internal_validate_t validator) { if (node->count <= 4) { return art_validate_fail(&validator, "Node16 has too few children"); @@ -541,18 +719,19 @@ static bool art_node16_internal_validate(const art_node16_t *node, } } validator.current_key[validator.depth - 1] = node->keys[i]; - if (!art_internal_validate_at(node->children[i], validator)) { + if (!art_internal_validate_at(art, node->children[i], validator)) { return false; } } return true; } -static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], +static art_node48_t *art_node48_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node48_t *node = (art_node48_t *)roaring_malloc(sizeof(art_node48_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE48_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE48_TYPE); + art_node48_t *node = art->node48s + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; node->available_children = CROARING_NODE48_AVAILABLE_CHILDREN_MASK; for (size_t i = 0; i < 256; ++i) { @@ -561,29 +740,19 @@ static art_node48_t *art_node48_create(const art_key_chunk_t prefix[], return node; } -static void art_free_node48(art_node48_t *node) { - uint64_t used_children = - (node->available_children) ^ CROARING_NODE48_AVAILABLE_CHILDREN_MASK; - while (used_children != 0) { - // We checked above that used_children is not zero - uint8_t child_idx = roaring_trailing_zeroes(used_children); - art_free_node(node->children[child_idx]); - used_children &= ~(UINT64_C(1) << child_idx); - } - roaring_free(node); -} +static inline void art_node48_clear(art_node48_t *node) { node->count = 0; } -static inline art_node_t *art_node48_find_child(const art_node48_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node48_find_child(const art_node48_t *node, + art_key_chunk_t key) { uint8_t val_idx = node->keys[key]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { return node->children[val_idx]; } - return NULL; + return CROARING_ART_NULL_REF; } -static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node48_insert(art_t *art, art_node48_t *node, + art_ref_t child, uint8_t key) { if (node->count < 48) { // node->available_children is only zero when the node is full (count == // 48), we just checked count < 48 @@ -592,48 +761,48 @@ static art_node_t *art_node48_insert(art_node48_t *node, art_node_t *child, node->children[val_idx] = child; node->count++; node->available_children &= ~(UINT64_C(1) << val_idx); - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); } art_node256_t *new_node = - art_node256_create(node->base.prefix, node->base.prefix_size); + art_node256_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { uint8_t val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node256_insert(new_node, node->children[val_idx], i); + art_node256_insert(art, new_node, node->children[val_idx], i); } } - roaring_free(node); - return art_node256_insert(new_node, child, key); + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return art_node256_insert(art, new_node, child, key); } -static inline art_node_t *art_node48_erase(art_node48_t *node, - uint8_t key_chunk) { +static inline art_ref_t art_node48_erase(art_t *art, art_node48_t *node, + uint8_t key_chunk) { uint8_t val_idx = node->keys[key_chunk]; if (val_idx == CROARING_ART_NODE48_EMPTY_VAL) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); } node->keys[key_chunk] = CROARING_ART_NODE48_EMPTY_VAL; node->available_children |= UINT64_C(1) << val_idx; node->count--; if (node->count > 16) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); } art_node16_t *new_node = - art_node16_create(node->base.prefix, node->base.prefix_size); + art_node16_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { val_idx = node->keys[i]; if (val_idx != CROARING_ART_NODE48_EMPTY_VAL) { - art_node16_insert(new_node, node->children[val_idx], i); + art_node16_insert(art, new_node, node->children[val_idx], i); } } - roaring_free(node); - return (art_node_t *)new_node; + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE48_TYPE); + return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE16_TYPE); } static inline void art_node48_replace(art_node48_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { uint8_t val_idx = node->keys[key_chunk]; assert(val_idx != CROARING_ART_NODE48_EMPTY_VAL); node->children[val_idx] = new_child; @@ -651,7 +820,7 @@ static inline art_indexed_child_t art_node48_next_child( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -670,7 +839,7 @@ static inline art_indexed_child_t art_node48_prev_child( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -678,7 +847,7 @@ static inline art_indexed_child_t art_node48_child_at(const art_node48_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -698,11 +867,12 @@ static inline art_indexed_child_t art_node48_lower_bound( return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_node48_internal_validate(const art_node48_t *node, +static bool art_node48_internal_validate(const art_t *art, + const art_node48_t *node, art_internal_validate_t validator) { if (node->count <= 16) { return art_validate_fail(&validator, "Node48 has too few children"); @@ -719,8 +889,8 @@ static bool art_node48_internal_validate(const art_node48_t *node, &validator, "Node48 keys point to the same child index"); } - art_node_t *child = node->children[child_idx]; - if (child == NULL) { + art_ref_t child = node->children[child_idx]; + if (child == CROARING_ART_NULL_REF) { return art_validate_fail(&validator, "Node48 has a NULL child"); } used_children |= UINT64_C(1) << child_idx; @@ -752,7 +922,7 @@ static bool art_node48_internal_validate(const art_node48_t *node, for (int i = 0; i < 256; ++i) { if (node->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(node->children[node->keys[i]], + if (!art_internal_validate_at(art, node->children[node->keys[i]], validator)) { return false; } @@ -761,62 +931,55 @@ static bool art_node48_internal_validate(const art_node48_t *node, return true; } -static art_node256_t *art_node256_create(const art_key_chunk_t prefix[], +static art_node256_t *art_node256_create(art_t *art, + const art_key_chunk_t prefix[], uint8_t prefix_size) { - art_node256_t *node = - (art_node256_t *)roaring_malloc(sizeof(art_node256_t)); - art_init_inner_node(&node->base, CROARING_ART_NODE256_TYPE, prefix, - prefix_size); + uint64_t index = art_allocate_index(art, CROARING_ART_NODE256_TYPE); + art_node256_t *node = art->node256s + index; + art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; for (size_t i = 0; i < 256; ++i) { - node->children[i] = NULL; + node->children[i] = CROARING_ART_NULL_REF; } return node; } -static void art_free_node256(art_node256_t *node) { - for (size_t i = 0; i < 256; ++i) { - if (node->children[i] != NULL) { - art_free_node(node->children[i]); - } - } - roaring_free(node); -} +static inline void art_node256_clear(art_node256_t *node) { node->count = 0; } -static inline art_node_t *art_node256_find_child(const art_node256_t *node, - art_key_chunk_t key) { +static inline art_ref_t art_node256_find_child(const art_node256_t *node, + art_key_chunk_t key) { return node->children[key]; } -static art_node_t *art_node256_insert(art_node256_t *node, art_node_t *child, - uint8_t key) { +static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, + art_ref_t child, uint8_t key) { node->children[key] = child; node->count++; - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); } -static inline art_node_t *art_node256_erase(art_node256_t *node, - uint8_t key_chunk) { - node->children[key_chunk] = NULL; +static inline art_ref_t art_node256_erase(art_t *art, art_node256_t *node, + uint8_t key_chunk) { + node->children[key_chunk] = CROARING_ART_NULL_REF; node->count--; if (node->count > 48) { - return (art_node_t *)node; + return art_get_ref(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); } art_node48_t *new_node = - art_node48_create(node->base.prefix, node->base.prefix_size); + art_node48_create(art, node->base.prefix, node->base.prefix_size); for (size_t i = 0; i < 256; ++i) { - if (node->children[i] != NULL) { - art_node48_insert(new_node, node->children[i], i); + if (node->children[i] != CROARING_ART_NULL_REF) { + art_node48_insert(art, new_node, node->children[i], i); } } - roaring_free(node); - return (art_node_t *)new_node; + art_node_free(art, (art_node_t *)node, CROARING_ART_NODE256_TYPE); + return art_get_ref(art, (art_node_t *)new_node, CROARING_ART_NODE48_TYPE); } static inline void art_node256_replace(art_node256_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { + art_ref_t new_child) { node->children[key_chunk] = new_child; } @@ -825,14 +988,14 @@ static inline art_indexed_child_t art_node256_next_child( art_indexed_child_t indexed_child; index++; for (size_t i = index; i < 256; ++i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -844,14 +1007,14 @@ static inline art_indexed_child_t art_node256_prev_child( index--; art_indexed_child_t indexed_child; for (int i = index; i >= 0; --i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } @@ -859,7 +1022,7 @@ static inline art_indexed_child_t art_node256_child_at( const art_node256_t *node, int index) { art_indexed_child_t indexed_child; if (index < 0 || index >= 256) { - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } indexed_child.index = index; @@ -872,18 +1035,19 @@ static inline art_indexed_child_t art_node256_lower_bound( art_node256_t *node, art_key_chunk_t key_chunk) { art_indexed_child_t indexed_child; for (size_t i = key_chunk; i < 256; ++i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { indexed_child.index = i; indexed_child.child = node->children[i]; indexed_child.key_chunk = i; return indexed_child; } } - indexed_child.child = NULL; + indexed_child.child = CROARING_ART_NULL_REF; return indexed_child; } -static bool art_node256_internal_validate(const art_node256_t *node, +static bool art_node256_internal_validate(const art_t *art, + const art_node256_t *node, art_internal_validate_t validator) { if (node->count <= 48) { return art_validate_fail(&validator, "Node256 has too few children"); @@ -894,7 +1058,7 @@ static bool art_node256_internal_validate(const art_node256_t *node, validator.depth++; int actual_count = 0; for (int i = 0; i < 256; ++i) { - if (node->children[i] != NULL) { + if (node->children[i] != CROARING_ART_NULL_REF) { actual_count++; for (int j = i + 1; j < 256; ++j) { @@ -905,7 +1069,7 @@ static bool art_node256_internal_validate(const art_node256_t *node, } validator.current_key[validator.depth - 1] = i; - if (!art_internal_validate_at(node->children[i], validator)) { + if (!art_internal_validate_at(art, node->children[i], validator)) { return false; } } @@ -919,9 +1083,10 @@ static bool art_node256_internal_validate(const art_node256_t *node, // Finds the child with the given key chunk in the inner node, returns NULL if // no such child is found. -static art_node_t *art_find_child(const art_inner_node_t *node, - art_key_chunk_t key_chunk) { - switch (art_get_type(node)) { +static art_ref_t art_find_child(const art_inner_node_t *node, + art_typecode_t typecode, + art_key_chunk_t key_chunk) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: return art_node4_find_child((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -932,14 +1097,14 @@ static art_node_t *art_find_child(const art_inner_node_t *node, return art_node256_find_child((art_node256_t *)node, key_chunk); default: assert(false); - return NULL; + return CROARING_ART_NULL_REF; } } // Replaces the child with the given key chunk in the inner node. -static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk, - art_node_t *new_child) { - switch (art_get_type(node)) { +static void art_replace(art_inner_node_t *node, art_typecode_t typecode, + art_key_chunk_t key_chunk, art_ref_t new_child) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: art_node4_replace((art_node4_t *)node, key_chunk, new_child); break; @@ -959,62 +1124,70 @@ static void art_replace(art_inner_node_t *node, art_key_chunk_t key_chunk, // Erases the child with the given key chunk from the inner node, returns the // updated node (the same as the initial node if it was not shrunk). -static art_node_t *art_node_erase(art_inner_node_t *node, - art_key_chunk_t key_chunk) { - switch (art_get_type(node)) { +static art_ref_t art_node_erase(art_t *art, art_inner_node_t *node, + art_typecode_t typecode, + art_key_chunk_t key_chunk) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: - return art_node4_erase((art_node4_t *)node, key_chunk); + return art_node4_erase(art, (art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_erase((art_node16_t *)node, key_chunk); + return art_node16_erase(art, (art_node16_t *)node, key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_erase((art_node48_t *)node, key_chunk); + return art_node48_erase(art, (art_node48_t *)node, key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_erase((art_node256_t *)node, key_chunk); + return art_node256_erase(art, (art_node256_t *)node, key_chunk); default: assert(false); - return NULL; + return CROARING_ART_NULL_REF; } } // Inserts the leaf with the given key chunk in the inner node, returns a // pointer to the (possibly expanded) node. -static art_node_t *art_node_insert_leaf(art_inner_node_t *node, - art_key_chunk_t key_chunk, - art_leaf_t *leaf) { - art_node_t *child = (art_node_t *)(CROARING_SET_LEAF(leaf)); - switch (art_get_type(node)) { +static art_ref_t art_node_insert_leaf(art_t *art, art_inner_node_t *node, + art_typecode_t typecode, + art_key_chunk_t key_chunk, + art_ref_t leaf) { + switch (typecode) { case CROARING_ART_NODE4_TYPE: - return art_node4_insert((art_node4_t *)node, child, key_chunk); + return art_node4_insert(art, (art_node4_t *)node, leaf, key_chunk); case CROARING_ART_NODE16_TYPE: - return art_node16_insert((art_node16_t *)node, child, key_chunk); + return art_node16_insert(art, (art_node16_t *)node, leaf, + key_chunk); case CROARING_ART_NODE48_TYPE: - return art_node48_insert((art_node48_t *)node, child, key_chunk); + return art_node48_insert(art, (art_node48_t *)node, leaf, + key_chunk); case CROARING_ART_NODE256_TYPE: - return art_node256_insert((art_node256_t *)node, child, key_chunk); + return art_node256_insert(art, (art_node256_t *)node, leaf, + key_chunk); default: assert(false); - return NULL; + return CROARING_ART_NULL_REF; } } -// Frees the node and its children. Leaves are freed by the user. -static void art_free_node(art_node_t *node) { - if (art_is_leaf(node)) { - // We leave it up to the user to free leaves. - return; +// Marks the node as unoccopied and frees its index. +static void art_node_free(art_t *art, art_node_t *node, + art_typecode_t typecode) { + uint64_t index = art_get_index(art, node, typecode); + if (index < art->first_free[typecode]) { + art->first_free[typecode] = index; } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + art_leaf_clear((art_leaf_t *)node); + break; case CROARING_ART_NODE4_TYPE: - art_free_node4((art_node4_t *)node); + art_node4_clear((art_node4_t *)node); break; case CROARING_ART_NODE16_TYPE: - art_free_node16((art_node16_t *)node); + art_node16_clear((art_node16_t *)node); break; case CROARING_ART_NODE48_TYPE: - art_free_node48((art_node48_t *)node); + art_node48_clear((art_node48_t *)node); break; case CROARING_ART_NODE256_TYPE: - art_free_node256((art_node256_t *)node); + art_node256_clear((art_node256_t *)node); break; default: assert(false); @@ -1024,13 +1197,15 @@ static void art_free_node(art_node_t *node) { // Returns the next child in key order, or NULL if called on a leaf. // Provided index may be in the range [-1, 255]. static art_indexed_child_t art_node_next_child(const art_node_t *node, + art_typecode_t typecode, int index) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_next_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -1048,13 +1223,15 @@ static art_indexed_child_t art_node_next_child(const art_node_t *node, // Returns the previous child in key order, or NULL if called on a leaf. // Provided index may be in the range [0, 256]. static art_indexed_child_t art_node_prev_child(const art_node_t *node, + art_typecode_t typecode, int index) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_prev_child((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -1069,16 +1246,19 @@ static art_indexed_child_t art_node_prev_child(const art_node_t *node, } } -// Returns the child found at the provided index, or NULL if called on a leaf. -// Provided index is only valid if returned by art_node_(next|prev)_child. +// Returns the child found at the provided index, or NULL if called on a +// leaf. Provided index is only valid if returned by +// art_node_(next|prev)_child. static art_indexed_child_t art_node_child_at(const art_node_t *node, + art_typecode_t typecode, int index) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_child_at((art_node4_t *)node, index); case CROARING_ART_NODE16_TYPE: @@ -1093,16 +1273,18 @@ static art_indexed_child_t art_node_child_at(const art_node_t *node, } } -// Returns the child with the smallest key equal to or greater than the given -// key chunk, NULL if called on a leaf or no such child was found. +// Returns the child with the smallest key equal to or greater than the +// given key chunk, NULL if called on a leaf or no such child was found. static art_indexed_child_t art_node_lower_bound(const art_node_t *node, + art_typecode_t typecode, art_key_chunk_t key_chunk) { - if (art_is_leaf(node)) { - art_indexed_child_t indexed_child; - indexed_child.child = NULL; - return indexed_child; - } - switch (art_get_type((art_inner_node_t *)node)) { + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return (art_indexed_child_t){ + .child = CROARING_ART_NULL_REF, + .index = 0, + .key_chunk = 0, + }; case CROARING_ART_NODE4_TYPE: return art_node4_lower_bound((art_node4_t *)node, key_chunk); case CROARING_ART_NODE16_TYPE: @@ -1117,7 +1299,7 @@ static art_indexed_child_t art_node_lower_bound(const art_node_t *node, } } -// ====================== End of node-specific functions ======================= +// ====================== End of node-specific functions ====================== // Compares the given ranges of two keys, returns their relative order: // * Key range 1 < key range 2: a negative value @@ -1155,45 +1337,59 @@ static uint8_t art_common_prefix(const art_key_chunk_t key1[], return offset; } -// Returns a pointer to the rootmost node where the value was inserted, may not -// be equal to `node`. -static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], - uint8_t depth, art_leaf_t *new_leaf) { - if (art_is_leaf(node)) { - art_leaf_t *leaf = CROARING_CAST_LEAF(node); +// Returns a pointer to the rootmost node where the value was inserted, may +// not be equal to `node`. +static art_ref_t art_insert_at(art_t *art, art_ref_t ref, + const art_key_chunk_t key[], uint8_t depth, + art_ref_t new_leaf) { + if (art_is_leaf(ref)) { + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix( leaf->key, depth, ART_KEY_BYTES, key, depth, ART_KEY_BYTES); - // Previously this was a leaf, create an inner node instead and add both - // the existing and new leaf to it. + // Previously this was a leaf, create an inner node instead and add + // both the existing and new leaf to it. art_node_t *new_node = - (art_node_t *)art_node4_create(key + depth, common_prefix); + (art_node_t *)art_node4_create(art, key + depth, common_prefix); - new_node = art_node_insert_leaf((art_inner_node_t *)new_node, - leaf->key[depth + common_prefix], leaf); - new_node = art_node_insert_leaf((art_inner_node_t *)new_node, - key[depth + common_prefix], new_leaf); + art_ref_t new_ref = art_node_insert_leaf( + art, (art_inner_node_t *)new_node, CROARING_ART_NODE4_TYPE, + leaf->key[depth + common_prefix], ref); + new_ref = art_node_insert_leaf(art, (art_inner_node_t *)new_node, + CROARING_ART_NODE4_TYPE, + key[depth + common_prefix], new_leaf); // The new inner node is now the rootmost node. - return new_node; + return new_ref; } - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); // Not a leaf: inner node uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { - // Partial prefix match. Create a new internal node to hold the common + // Partial prefix match. Create a new internal node to hold the common // prefix. - art_node4_t *node4 = - art_node4_create(inner_node->prefix, common_prefix); + // We create a copy of the node's prefix as the creation of a new + // node may invalidate the prefix pointer. + art_key_chunk_t *prefix_copy = (art_key_chunk_t *)roaring_malloc( + common_prefix * sizeof(art_key_chunk_t)); + memcpy(prefix_copy, inner_node->prefix, + common_prefix * sizeof(art_key_chunk_t)); + art_node4_t *node4 = art_node4_create(art, prefix_copy, common_prefix); + roaring_free(prefix_copy); + + // Deref as a new node was created. + inner_node = (art_inner_node_t *)art_deref(art, ref); // Make the existing internal node a child of the new internal node. - node4 = (art_node4_t *)art_node4_insert( - node4, node, inner_node->prefix[common_prefix]); + art_node4_insert(art, node4, ref, inner_node->prefix[common_prefix]); + + // Deref again as a new node was created. + inner_node = (art_inner_node_t *)art_deref(art, ref); - // Correct the prefix of the moved internal node, trimming off the chunk - // inserted into the new internal node. + // Correct the prefix of the moved internal node, trimming off the + // chunk inserted into the new internal node. inner_node->prefix_size = inner_node->prefix_size - common_prefix - 1; if (inner_node->prefix_size > 0) { // Move the remaining prefix to the correct position. @@ -1202,55 +1398,67 @@ static art_node_t *art_insert_at(art_node_t *node, const art_key_chunk_t key[], } // Insert the value in the new internal node. - return art_node_insert_leaf(&node4->base, key[common_prefix + depth], - new_leaf); + return art_node_insert_leaf(art, (art_inner_node_t *)node4, + CROARING_ART_NODE4_TYPE, + key[common_prefix + depth], new_leaf); } // Prefix matches entirely or node has no prefix. Look for an existing // child. art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_node_t *child = art_find_child(inner_node, key_chunk); - if (child != NULL) { - art_node_t *new_child = - art_insert_at(child, key, depth + common_prefix + 1, new_leaf); + art_ref_t child = + art_find_child(inner_node, art_ref_typecode(ref), key_chunk); + if (child != CROARING_ART_NULL_REF) { + art_ref_t new_child = + art_insert_at(art, child, key, depth + common_prefix + 1, new_leaf); if (new_child != child) { + // Deref again as a new node may have been created. + inner_node = (art_inner_node_t *)art_deref(art, ref); // Node type changed. - art_replace(inner_node, key_chunk, new_child); + art_replace(inner_node, art_ref_typecode(ref), key_chunk, + new_child); } - return node; + return ref; } - return art_node_insert_leaf(inner_node, key_chunk, new_leaf); + return art_node_insert_leaf(art, inner_node, art_ref_typecode(ref), + key_chunk, new_leaf); } // Erase helper struct. typedef struct art_erase_result_s { - // The rootmost node where the value was erased, may not be equal to `node`. - // If no value was removed, this is null. - art_node_t *rootmost_node; + // The rootmost node where the value was erased, may not be equal to + // the original node. If no value was removed, this is + // CROARING_ART_NULL_REF. + art_ref_t rootmost_node; - // Value removed, null if not removed. - art_val_t *value_erased; + // True if a value was erased. + bool erased; + + // Value removed, if any. + art_val_t value_erased; } art_erase_result_t; // Searches for the given key starting at `node`, erases it if found. -static art_erase_result_t art_erase_at(art_node_t *node, +static art_erase_result_t art_erase_at(art_t *art, art_ref_t ref, const art_key_chunk_t *key, uint8_t depth) { art_erase_result_t result; - result.rootmost_node = NULL; - result.value_erased = NULL; + result.rootmost_node = CROARING_ART_NULL_REF; + result.erased = false; - if (art_is_leaf(node)) { - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + if (art_is_leaf(ref)) { + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix != ART_KEY_BYTES) { // Leaf key mismatch. return result; } - result.value_erased = (art_val_t *)leaf; + result.erased = true; + result.value_erased = leaf->val; + art_node_free(art, (art_node_t *)leaf, CROARING_ART_LEAF_TYPE); return result; } - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); @@ -1259,101 +1467,76 @@ static art_erase_result_t art_erase_at(art_node_t *node, return result; } art_key_chunk_t key_chunk = key[depth + common_prefix]; - art_node_t *child = art_find_child(inner_node, key_chunk); - if (child == NULL) { + art_ref_t child = + art_find_child(inner_node, art_ref_typecode(ref), key_chunk); + if (child == CROARING_ART_NULL_REF) { // No child with key chunk. return result; } - // Try to erase the key further down. Skip the key chunk associated with the - // child in the node. + // Try to erase the key further down. Skip the key chunk associated with + // the child in the node. art_erase_result_t child_result = - art_erase_at(child, key, depth + common_prefix + 1); - if (child_result.value_erased == NULL) { + art_erase_at(art, child, key, depth + common_prefix + 1); + if (!child_result.erased) { return result; } + result.erased = true; result.value_erased = child_result.value_erased; - result.rootmost_node = node; - if (child_result.rootmost_node == NULL) { + result.rootmost_node = ref; + + // Deref again as nodes may have changed location. + inner_node = (art_inner_node_t *)art_deref(art, ref); + if (child_result.rootmost_node == CROARING_ART_NULL_REF) { // Child node was fully erased, erase it from this node's children. - result.rootmost_node = art_node_erase(inner_node, key_chunk); + result.rootmost_node = + art_node_erase(art, inner_node, art_ref_typecode(ref), key_chunk); } else if (child_result.rootmost_node != child) { // Child node was not fully erased, update the pointer to it in this // node. - art_replace(inner_node, key_chunk, child_result.rootmost_node); + art_replace(inner_node, art_ref_typecode(ref), key_chunk, + child_result.rootmost_node); } return result; } -// Searches for the given key starting at `node`, returns NULL if the key was -// not found. -static art_val_t *art_find_at(const art_node_t *node, +// Searches for the given key starting at `node`, returns NULL if the key +// was not found. +static art_val_t *art_find_at(const art_t *art, art_ref_t ref, const art_key_chunk_t *key, uint8_t depth) { - while (!art_is_leaf(node)) { - art_inner_node_t *inner_node = (art_inner_node_t *)node; + while (!art_is_leaf(ref)) { + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); uint8_t common_prefix = art_common_prefix(inner_node->prefix, 0, inner_node->prefix_size, key, depth, ART_KEY_BYTES); if (common_prefix != inner_node->prefix_size) { return NULL; } - art_node_t *child = - art_find_child(inner_node, key[depth + inner_node->prefix_size]); - if (child == NULL) { + art_ref_t child = art_find_child(inner_node, art_ref_typecode(ref), + key[depth + inner_node->prefix_size]); + if (child == CROARING_ART_NULL_REF) { return NULL; } - node = child; + ref = child; // Include both the prefix and the child key chunk in the depth. depth += inner_node->prefix_size + 1; } - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); if (depth >= ART_KEY_BYTES) { - return (art_val_t *)leaf; + return &leaf->val; } uint8_t common_prefix = art_common_prefix(leaf->key, 0, ART_KEY_BYTES, key, 0, ART_KEY_BYTES); if (common_prefix == ART_KEY_BYTES) { - return (art_val_t *)leaf; + return &leaf->val; } return NULL; } -// Returns the size in bytes of the subtrie. -static size_t art_size_in_bytes_at(const art_node_t *node) { - if (art_is_leaf(node)) { - return 0; - } - size_t size = 0; - switch (art_get_type((art_inner_node_t *)node)) { - case CROARING_ART_NODE4_TYPE: { - size += sizeof(art_node4_t); - } break; - case CROARING_ART_NODE16_TYPE: { - size += sizeof(art_node16_t); - } break; - case CROARING_ART_NODE48_TYPE: { - size += sizeof(art_node48_t); - } break; - case CROARING_ART_NODE256_TYPE: { - size += sizeof(art_node256_t); - } break; - default: - assert(false); - break; - } - art_indexed_child_t indexed_child = art_node_next_child(node, -1); - while (indexed_child.child != NULL) { - size += art_size_in_bytes_at(indexed_child.child); - indexed_child = art_node_next_child(node, indexed_child.index); - } - return size; -} - -static void art_node_print_type(const art_node_t *node) { - if (art_is_leaf(node)) { - printf("Leaf"); - return; - } - switch (art_get_type((art_inner_node_t *)node)) { +static void art_node_print_type(art_ref_t ref) { + switch (art_ref_typecode(ref)) { + case CROARING_ART_LEAF_TYPE: + printf("Leaf"); + return; case CROARING_ART_NODE4_TYPE: printf("Node4"); return; @@ -1372,10 +1555,10 @@ static void art_node_print_type(const art_node_t *node) { } } -static void art_node_printf(const art_node_t *node, uint8_t depth) { - if (art_is_leaf(node)) { +void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { + if (art_is_leaf(ref)) { printf("{ type: Leaf, key: "); - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); for (size_t i = 0; i < ART_KEY_BYTES; ++i) { printf("%02x", leaf->key[i]); } @@ -1387,10 +1570,10 @@ static void art_node_printf(const art_node_t *node, uint8_t depth) { printf("%*s", depth, ""); printf("type: "); - art_node_print_type(node); + art_node_print_type(ref); printf("\n"); - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); printf("%*s", depth, ""); printf("prefix_size: %d\n", inner_node->prefix_size); @@ -1401,41 +1584,42 @@ static void art_node_printf(const art_node_t *node, uint8_t depth) { } printf("\n"); - switch (art_get_type(inner_node)) { + switch (art_ref_typecode(ref)) { case CROARING_ART_NODE4_TYPE: { - art_node4_t *node4 = (art_node4_t *)node; + art_node4_t *node4 = (art_node4_t *)inner_node; for (uint8_t i = 0; i < node4->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node4->keys[i]); - art_node_printf(node4->children[i], depth); + art_node_printf(art, node4->children[i], depth); } } break; case CROARING_ART_NODE16_TYPE: { - art_node16_t *node16 = (art_node16_t *)node; + art_node16_t *node16 = (art_node16_t *)inner_node; for (uint8_t i = 0; i < node16->count; ++i) { printf("%*s", depth, ""); printf("key: %02x ", node16->keys[i]); - art_node_printf(node16->children[i], depth); + art_node_printf(art, node16->children[i], depth); } } break; case CROARING_ART_NODE48_TYPE: { - art_node48_t *node48 = (art_node48_t *)node; + art_node48_t *node48 = (art_node48_t *)inner_node; for (int i = 0; i < 256; ++i) { if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { printf("%*s", depth, ""); printf("key: %02x ", i); printf("child: %02x ", node48->keys[i]); - art_node_printf(node48->children[node48->keys[i]], depth); + art_node_printf(art, node48->children[node48->keys[i]], + depth); } } } break; case CROARING_ART_NODE256_TYPE: { - art_node256_t *node256 = (art_node256_t *)node; + art_node256_t *node256 = (art_node256_t *)inner_node; for (int i = 0; i < 256; ++i) { - if (node256->children[i] != NULL) { + if (node256->children[i] != CROARING_ART_NULL_REF) { printf("%*s", depth, ""); printf("key: %02x ", i); - art_node_printf(node256->children[i], depth); + art_node_printf(art, node256->children[i], depth); } } } break; @@ -1448,118 +1632,301 @@ static void art_node_printf(const art_node_t *node, uint8_t depth) { printf("}\n"); } -void art_insert(art_t *art, const art_key_chunk_t *key, art_val_t *val) { - art_leaf_t *leaf = (art_leaf_t *)val; - art_leaf_populate(leaf, key); - if (art->root == NULL) { - art->root = (art_node_t *)CROARING_SET_LEAF(leaf); +/** + * Moves the node at `ref` to the earliest free index before it (if any), + * returns the new ref. + */ +static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { + size_t idx = art_ref_index(ref); + art_typecode_t typecode = art_ref_typecode(ref); + size_t first_free = art->first_free[typecode]; + assert(idx != first_free); + if (idx < first_free) { + return ref; + } + size_t from = idx; + size_t to = first_free; + switch (typecode) { + case CROARING_ART_LEAF_TYPE: { + memcpy(art->leaves + to, art->leaves + from, sizeof(art_leaf_t)); + art_leaf_clear(&art->leaves[from]); + break; + } + case CROARING_ART_NODE4_TYPE: { + memcpy(art->node4s + to, art->node4s + from, sizeof(art_node4_t)); + art_node4_clear(&art->node4s[from]); + break; + } + case CROARING_ART_NODE16_TYPE: { + memcpy(art->node16s + to, art->node16s + from, + sizeof(art_node16_t)); + art_node16_clear(&art->node16s[from]); + break; + } + case CROARING_ART_NODE48_TYPE: { + memcpy(art->node48s + to, art->node48s + from, + sizeof(art_node48_t)); + art_node48_clear(&art->node48s[from]); + break; + } + case CROARING_ART_NODE256_TYPE: { + memcpy(art->node256s + to, art->node256s + from, + sizeof(art_node256_t)); + art_node256_clear(&art->node256s[from]); + break; + } + default: { + assert(false); + return 0; + } + } + art->first_free[typecode] = art_next_free(art, typecode, to + 1); + return art_to_ref(to, typecode); +} + +/** + * Shrinks all node arrays to `first_free`. Assumes all indices after + * `first_free` are unused. + */ +static size_t art_shrink_node_arrays(art_t *art) { + size_t freed = 0; + if (art->first_free[CROARING_ART_LEAF_TYPE] < + art->capacities[CROARING_ART_LEAF_TYPE]) { + size_t new_capacity = art->first_free[CROARING_ART_LEAF_TYPE]; + art->leaves = + roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); + freed += art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity; + art->capacities[CROARING_ART_LEAF_TYPE] = new_capacity; + } + if (art->first_free[CROARING_ART_NODE4_TYPE] < + art->capacities[CROARING_ART_NODE4_TYPE]) { + size_t new_capacity = art->first_free[CROARING_ART_NODE4_TYPE]; + art->node4s = + roaring_realloc(art->node4s, new_capacity * sizeof(art_node4_t)); + freed += art->capacities[CROARING_ART_NODE4_TYPE] - new_capacity; + art->capacities[CROARING_ART_NODE4_TYPE] = new_capacity; + } + if (art->first_free[CROARING_ART_NODE16_TYPE] < + art->capacities[CROARING_ART_NODE16_TYPE]) { + size_t new_capacity = art->first_free[CROARING_ART_NODE16_TYPE]; + art->node16s = + roaring_realloc(art->node16s, new_capacity * sizeof(art_node16_t)); + freed += art->capacities[CROARING_ART_NODE16_TYPE] - new_capacity; + art->capacities[CROARING_ART_NODE16_TYPE] = new_capacity; + } + if (art->first_free[CROARING_ART_NODE48_TYPE] < + art->capacities[CROARING_ART_NODE48_TYPE]) { + size_t new_capacity = art->first_free[CROARING_ART_NODE48_TYPE]; + art->node48s = + roaring_realloc(art->node48s, new_capacity * sizeof(art_node48_t)); + freed += art->capacities[CROARING_ART_NODE48_TYPE] - new_capacity; + art->capacities[CROARING_ART_NODE48_TYPE] = new_capacity; + } + if (art->first_free[CROARING_ART_NODE256_TYPE] < + art->capacities[CROARING_ART_NODE256_TYPE]) { + size_t new_capacity = art->first_free[CROARING_ART_NODE256_TYPE]; + art->node256s = roaring_realloc(art->node256s, + new_capacity * sizeof(art_node256_t)); + freed += art->capacities[CROARING_ART_NODE256_TYPE] - new_capacity; + art->capacities[CROARING_ART_NODE256_TYPE] = new_capacity; + } + return freed; +} + +/** + * Traverses the ART, moving nodes to earlier free indices and modifying their + * references along the way. + */ +static void art_shrink_at(art_t *art, art_ref_t ref) { + if (art_is_leaf(ref)) { return; } - art->root = art_insert_at(art->root, key, 0, leaf); + switch (art_ref_typecode(ref)) { + case CROARING_ART_NODE4_TYPE: { + art_node4_t *node4 = (art_node4_t *)art_deref(art, ref); + for (uint8_t i = 0; i < node4->count; ++i) { + node4->children[i] = + art_move_node_to_shrink(art, node4->children[i]); + art_shrink_at(art, node4->children[i]); + } + } break; + case CROARING_ART_NODE16_TYPE: { + art_node16_t *node16 = (art_node16_t *)art_deref(art, ref); + for (uint8_t i = 0; i < node16->count; ++i) { + node16->children[i] = + art_move_node_to_shrink(art, node16->children[i]); + art_shrink_at(art, node16->children[i]); + } + } break; + case CROARING_ART_NODE48_TYPE: { + art_node48_t *node48 = (art_node48_t *)art_deref(art, ref); + for (int i = 0; i < 256; ++i) { + if (node48->keys[i] != CROARING_ART_NODE48_EMPTY_VAL) { + uint8_t idx = node48->keys[i]; + node48->children[idx] = + art_move_node_to_shrink(art, node48->children[idx]); + art_shrink_at(art, node48->children[idx]); + } + } + } break; + case CROARING_ART_NODE256_TYPE: { + art_node256_t *node256 = (art_node256_t *)art_deref(art, ref); + for (int i = 0; i < 256; ++i) { + if (node256->children[i] != CROARING_ART_NULL_REF) { + node256->children[i] = + art_move_node_to_shrink(art, node256->children[i]); + art_shrink_at(art, node256->children[i]); + } + } + } break; + default: + assert(false); + break; + } } -art_val_t *art_erase(art_t *art, const art_key_chunk_t *key) { - if (art->root == NULL) { - return NULL; +void art_init_cleared(art_t *art) { + art->root = CROARING_ART_NULL_REF; + memset(art->first_free, 0, sizeof(art->first_free)); + memset(art->capacities, 0, sizeof(art->capacities)); + art->leaves = NULL; + art->node4s = NULL; + art->node16s = NULL; + art->node48s = NULL; + art->node256s = NULL; +} + +size_t art_shrink_to_fit(art_t *art) { + if (art->root != CROARING_ART_NULL_REF) { + art->root = art_move_node_to_shrink(art, art->root); + art_shrink_at(art, art->root); } - art_erase_result_t result = art_erase_at(art->root, key, 0); - if (result.value_erased == NULL) { - return NULL; + return art_shrink_node_arrays(art); +} + +art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val) { + art_ref_t leaf = art_leaf_create(art, key, val); + if (art->root == CROARING_ART_NULL_REF) { + art->root = leaf; + return &((art_leaf_t *)art_deref(art, leaf))->val; + } + art->root = art_insert_at(art, art->root, key, 0, leaf); + return &((art_leaf_t *)art_deref(art, leaf))->val; +} + +bool art_erase(art_t *art, const art_key_chunk_t *key, art_val_t *erased_val) { + art_val_t erased_val_local; + if (erased_val == NULL) { + erased_val = &erased_val_local; + } + if (art->root == CROARING_ART_NULL_REF) { + return false; + } + art_erase_result_t result = art_erase_at(art, art->root, key, 0); + if (!result.erased) { + return false; } art->root = result.rootmost_node; - return result.value_erased; + *erased_val = result.value_erased; + return true; } art_val_t *art_find(const art_t *art, const art_key_chunk_t *key) { - if (art->root == NULL) { + if (art->root == CROARING_ART_NULL_REF) { return NULL; } - return art_find_at(art->root, key, 0); + return art_find_at(art, art->root, key, 0); } -bool art_is_empty(const art_t *art) { return art->root == NULL; } - -void art_free(art_t *art) { - if (art->root == NULL) { - return; - } - art_free_node(art->root); +bool art_is_empty(const art_t *art) { + return art->root == CROARING_ART_NULL_REF; } -size_t art_size_in_bytes(const art_t *art) { - size_t size = sizeof(art_t); - if (art->root != NULL) { - size += art_size_in_bytes_at(art->root); - } - return size; +void art_free(art_t *art) { + roaring_free(art->leaves); + roaring_free(art->node4s); + roaring_free(art->node16s); + roaring_free(art->node48s); + roaring_free(art->node256s); } void art_printf(const art_t *art) { - if (art->root == NULL) { + if (art->root == CROARING_ART_NULL_REF) { return; } - art_node_printf(art->root, 0); + art_node_printf(art, art->root, 0); +} + +// Returns a reference to the current node that the iterator is positioned +// at. +static inline art_ref_t art_iterator_ref(art_iterator_t *iterator) { + return iterator->frames[iterator->frame].ref; } // Returns the current node that the iterator is positioned at. static inline art_node_t *art_iterator_node(art_iterator_t *iterator) { - return iterator->frames[iterator->frame].node; + return art_deref(iterator->art, art_iterator_ref(iterator)); } -// Sets the iterator key and value to the leaf's key and value. Always returns -// true for convenience. +// Sets the iterator key and value to the leaf's key and value. Always +// returns true for convenience. static inline bool art_iterator_valid_loc(art_iterator_t *iterator, - art_leaf_t *leaf) { - iterator->frames[iterator->frame].node = CROARING_SET_LEAF(leaf); + art_ref_t leaf_ref) { + iterator->frames[iterator->frame].ref = leaf_ref; iterator->frames[iterator->frame].index_in_node = 0; + art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, leaf_ref); memcpy(iterator->key, leaf->key, ART_KEY_BYTES); - iterator->value = (art_val_t *)leaf; + iterator->value = &leaf->val; return true; } -// Invalidates the iterator key and value. Always returns false for convenience. +// Invalidates the iterator key and value. Always returns false for +// convenience. static inline bool art_iterator_invalid_loc(art_iterator_t *iterator) { memset(iterator->key, 0, ART_KEY_BYTES); iterator->value = NULL; return false; } -// Moves the iterator one level down in the tree, given a node at the current -// level and the index of the child that we're going down to. +// Moves the iterator one level down in the tree, given a node at the +// current level and the index of the child that we're going down to. // // Note: does not set the index at the new level. -static void art_iterator_down(art_iterator_t *iterator, - const art_inner_node_t *node, +static void art_iterator_down(art_iterator_t *iterator, art_ref_t ref, uint8_t index_in_node) { - iterator->frames[iterator->frame].node = (art_node_t *)node; + iterator->frames[iterator->frame].ref = ref; iterator->frames[iterator->frame].index_in_node = index_in_node; iterator->frame++; - art_indexed_child_t indexed_child = - art_node_child_at((art_node_t *)node, index_in_node); - assert(indexed_child.child != NULL); - iterator->frames[iterator->frame].node = indexed_child.child; + art_inner_node_t *node = (art_inner_node_t *)art_deref(iterator->art, ref); + art_indexed_child_t indexed_child = art_node_child_at( + (art_node_t *)node, art_ref_typecode(ref), index_in_node); + assert(indexed_child.child != CROARING_ART_NULL_REF); + iterator->frames[iterator->frame].ref = indexed_child.child; iterator->depth += node->prefix_size + 1; } -// Moves the iterator to the next/previous child of the current node. Returns -// the child moved to, or NULL if there is no neighboring child. -static art_node_t *art_iterator_neighbor_child( - art_iterator_t *iterator, const art_inner_node_t *inner_node, - bool forward) { +// Moves the iterator to the next/previous child of the current node. +// Returns the child moved to, or NULL if there is no neighboring child. +static art_ref_t art_iterator_neighbor_child(art_iterator_t *iterator, + bool forward) { art_iterator_frame_t frame = iterator->frames[iterator->frame]; + art_node_t *node = art_deref(iterator->art, frame.ref); art_indexed_child_t indexed_child; if (forward) { - indexed_child = art_node_next_child(frame.node, frame.index_in_node); + indexed_child = art_node_next_child(node, art_ref_typecode(frame.ref), + frame.index_in_node); } else { - indexed_child = art_node_prev_child(frame.node, frame.index_in_node); + indexed_child = art_node_prev_child(node, art_ref_typecode(frame.ref), + frame.index_in_node); } - if (indexed_child.child != NULL) { - art_iterator_down(iterator, inner_node, indexed_child.index); + if (indexed_child.child != CROARING_ART_NULL_REF) { + art_iterator_down(iterator, frame.ref, indexed_child.index); } return indexed_child.child; } -// Moves the iterator one level up in the tree, returns false if not possible. +// Moves the iterator one level up in the tree, returns false if not +// possible. static bool art_iterator_up(art_iterator_t *iterator) { if (iterator->frame == 0) { return false; @@ -1571,8 +1938,8 @@ static bool art_iterator_up(art_iterator_t *iterator) { return true; } -// Moves the iterator one level, followed by a move to the next / previous leaf. -// Sets the status of the iterator. +// Moves the iterator one level, followed by a move to the next / previous +// leaf. Sets the status of the iterator. static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { if (!art_iterator_up(iterator)) { // We're at the root. @@ -1583,27 +1950,29 @@ static bool art_iterator_up_and_move(art_iterator_t *iterator, bool forward) { // Initializes the iterator at the first / last leaf of the given node. // Returns true for convenience. -static bool art_node_init_iterator(const art_node_t *node, - art_iterator_t *iterator, bool first) { - while (!art_is_leaf(node)) { +static bool art_node_init_iterator(art_ref_t ref, art_iterator_t *iterator, + bool first) { + while (!art_is_leaf(ref)) { + art_node_t *node = art_deref(iterator->art, ref); art_indexed_child_t indexed_child; if (first) { - indexed_child = art_node_next_child(node, -1); + indexed_child = + art_node_next_child(node, art_ref_typecode(ref), -1); } else { - indexed_child = art_node_prev_child(node, 256); + indexed_child = + art_node_prev_child(node, art_ref_typecode(ref), 256); } - art_iterator_down(iterator, (art_inner_node_t *)node, - indexed_child.index); - node = indexed_child.child; + art_iterator_down(iterator, ref, indexed_child.index); + ref = indexed_child.child; } // We're at a leaf. - iterator->frames[iterator->frame].node = (art_node_t *)node; + iterator->frames[iterator->frame].ref = ref; iterator->frames[iterator->frame].index_in_node = 0; // Should not matter. - return art_iterator_valid_loc(iterator, CROARING_CAST_LEAF(node)); + return art_iterator_valid_loc(iterator, ref); } bool art_iterator_move(art_iterator_t *iterator, bool forward) { - if (art_is_leaf(art_iterator_node(iterator))) { + if (art_is_leaf(art_iterator_ref(iterator))) { bool went_up = art_iterator_up(iterator); if (!went_up) { // This leaf is the root, we're done. @@ -1611,67 +1980,69 @@ bool art_iterator_move(art_iterator_t *iterator, bool forward) { } } // Advance within inner node. - art_node_t *neighbor_child = art_iterator_neighbor_child( - iterator, (art_inner_node_t *)art_iterator_node(iterator), forward); - if (neighbor_child != NULL) { - // There is another child at this level, go down to the first or last - // leaf. + art_ref_t neighbor_child = art_iterator_neighbor_child(iterator, forward); + if (neighbor_child != CROARING_ART_NULL_REF) { + // There is another child at this level, go down to the first or + // last leaf. return art_node_init_iterator(neighbor_child, iterator, forward); } // No more children at this level, go up. return art_iterator_up_and_move(iterator, forward); } -// Assumes the iterator is positioned at a node with an equal prefix path up to -// the depth of the iterator. -static bool art_node_iterator_lower_bound(const art_node_t *node, +// Assumes the iterator is positioned at a node with an equal prefix path up +// to the depth of the iterator. +static bool art_node_iterator_lower_bound(art_ref_t ref, art_iterator_t *iterator, const art_key_chunk_t key[]) { - while (!art_is_leaf(node)) { - art_inner_node_t *inner_node = (art_inner_node_t *)node; + while (!art_is_leaf(ref)) { + art_inner_node_t *inner_node = + (art_inner_node_t *)art_deref(iterator->art, ref); int prefix_comparison = art_compare_prefix(inner_node->prefix, 0, key, iterator->depth, inner_node->prefix_size); if (prefix_comparison < 0) { // Prefix so far has been equal, but we've found a smaller key. - // Since we take the lower bound within each node, we can return the - // next leaf. + // Since we take the lower bound within each node, we can return + // the next leaf. return art_iterator_up_and_move(iterator, true); } else if (prefix_comparison > 0) { - // No key equal to the key we're looking for, return the first leaf. - return art_node_init_iterator(node, iterator, true); + // No key equal to the key we're looking for, return the first + // leaf. + return art_node_init_iterator(ref, iterator, true); } // Prefix is equal, move to lower bound child. art_key_chunk_t key_chunk = key[iterator->depth + inner_node->prefix_size]; - art_indexed_child_t indexed_child = - art_node_lower_bound(node, key_chunk); - if (indexed_child.child == NULL) { + art_indexed_child_t indexed_child = art_node_lower_bound( + (art_node_t *)inner_node, art_ref_typecode(ref), key_chunk); + if (indexed_child.child == CROARING_ART_NULL_REF) { // Only smaller keys among children. return art_iterator_up_and_move(iterator, true); } if (indexed_child.key_chunk > key_chunk) { // Only larger children, return the first larger child. - art_iterator_down(iterator, inner_node, indexed_child.index); + art_iterator_down(iterator, ref, indexed_child.index); return art_node_init_iterator(indexed_child.child, iterator, true); } // We found a child with an equal prefix. - art_iterator_down(iterator, inner_node, indexed_child.index); - node = indexed_child.child; + art_iterator_down(iterator, ref, indexed_child.index); + ref = indexed_child.child; } - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + art_leaf_t *leaf = (art_leaf_t *)art_deref(iterator->art, ref); if (art_compare_keys(leaf->key, key) >= 0) { // Leaf has an equal or larger key. - return art_iterator_valid_loc(iterator, leaf); + return art_iterator_valid_loc(iterator, ref); } - // Leaf has an equal prefix, but the full key is smaller. Move to the next - // leaf. + // Leaf has an equal prefix, but the full key is smaller. Move to the + // next leaf. return art_iterator_up_and_move(iterator, true); } -art_iterator_t art_init_iterator(const art_t *art, bool first) { +art_iterator_t art_init_iterator(art_t *art, bool first) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - if (art->root == NULL) { + iterator.art = art; + if (art->root == CROARING_ART_NULL_REF) { return iterator; } art_node_init_iterator(art->root, &iterator, first); @@ -1689,12 +2060,12 @@ bool art_iterator_prev(art_iterator_t *iterator) { bool art_iterator_lower_bound(art_iterator_t *iterator, const art_key_chunk_t *key) { if (iterator->value == NULL) { - // We're beyond the end / start of the ART so the iterator does not have - // a valid key. Start from the root. + // We're beyond the end / start of the ART so the iterator does not + // have a valid key. Start from the root. iterator->frame = 0; iterator->depth = 0; - art_node_t *root = art_iterator_node(iterator); - if (root == NULL) { + art_ref_t root = art_iterator_ref(iterator); + if (root == CROARING_ART_NULL_REF) { return false; } return art_node_iterator_lower_bound(root, iterator, key); @@ -1709,7 +2080,7 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, // Only smaller keys found. return art_iterator_invalid_loc(iterator); } else { - return art_node_init_iterator(art_iterator_node(iterator), + return art_node_init_iterator(art_iterator_ref(iterator), iterator, true); } } @@ -1722,24 +2093,26 @@ bool art_iterator_lower_bound(art_iterator_t *iterator, iterator->depth + inner_node->prefix_size); } if (compare_result > 0) { - return art_node_init_iterator(art_iterator_node(iterator), iterator, + return art_node_init_iterator(art_iterator_ref(iterator), iterator, true); } - return art_node_iterator_lower_bound(art_iterator_node(iterator), iterator, + return art_node_iterator_lower_bound(art_iterator_ref(iterator), iterator, key); } -art_iterator_t art_lower_bound(const art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_lower_bound(art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - if (art->root != NULL) { + iterator.art = art; + if (art->root != CROARING_ART_NULL_REF) { art_node_iterator_lower_bound(art->root, &iterator, key); } return iterator; } -art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { +art_iterator_t art_upper_bound(art_t *art, const art_key_chunk_t *key) { art_iterator_t iterator = CROARING_ZERO_INITIALIZER; - if (art->root != NULL) { + iterator.art = art; + if (art->root != CROARING_ART_NULL_REF) { if (art_node_iterator_lower_bound(art->root, &iterator, key) && art_compare_keys(iterator.key, key) == 0) { art_iterator_next(&iterator); @@ -1748,90 +2121,100 @@ art_iterator_t art_upper_bound(const art_t *art, const art_key_chunk_t *key) { return iterator; } -void art_iterator_insert(art_t *art, art_iterator_t *iterator, - const art_key_chunk_t *key, art_val_t *val) { +void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, + art_val_t val) { // TODO: This can likely be faster. - art_insert(art, key, val); - assert(art->root != NULL); + art_insert(iterator->art, key, val); + assert(iterator->art->root != CROARING_ART_NULL_REF); iterator->frame = 0; iterator->depth = 0; - art_node_iterator_lower_bound(art->root, iterator, key); + art_node_iterator_lower_bound(iterator->art->root, iterator, key); } -// TODO: consider keeping `art_t *art` in the iterator. -art_val_t *art_iterator_erase(art_t *art, art_iterator_t *iterator) { +bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val) { + art_val_t erased_val_local; + if (erased_val == NULL) { + erased_val = &erased_val_local; + } if (iterator->value == NULL) { - return NULL; + return false; } art_key_chunk_t initial_key[ART_KEY_BYTES]; memcpy(initial_key, iterator->key, ART_KEY_BYTES); - art_val_t *value_erased = iterator->value; + *erased_val = *iterator->value; + // Erase the leaf. + art_node_free(iterator->art, art_iterator_node(iterator), + art_ref_typecode(art_iterator_ref(iterator))); bool went_up = art_iterator_up(iterator); if (!went_up) { // We're erasing the root. - art->root = NULL; + iterator->art->root = CROARING_ART_NULL_REF; art_iterator_invalid_loc(iterator); - return value_erased; + return true; } - // Erase the leaf. + // Erase the leaf in its parent. + art_ref_t parent_ref = art_iterator_ref(iterator); art_inner_node_t *parent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_parent = iterator->key[iterator->depth + parent_node->prefix_size]; - art_node_t *new_parent_node = - art_node_erase(parent_node, key_chunk_in_parent); + art_ref_t new_parent_ref = + art_node_erase(iterator->art, parent_node, art_ref_typecode(parent_ref), + key_chunk_in_parent); - if (new_parent_node != ((art_node_t *)parent_node)) { + if (new_parent_ref != parent_ref) { // Replace the pointer to the inner node we erased from in its // parent (it may be a leaf now). - iterator->frames[iterator->frame].node = new_parent_node; + iterator->frames[iterator->frame].ref = new_parent_ref; went_up = art_iterator_up(iterator); if (went_up) { + art_ref_t grandparent_ref = art_iterator_ref(iterator); art_inner_node_t *grandparent_node = (art_inner_node_t *)art_iterator_node(iterator); art_key_chunk_t key_chunk_in_grandparent = iterator->key[iterator->depth + grandparent_node->prefix_size]; - art_replace(grandparent_node, key_chunk_in_grandparent, - new_parent_node); + art_replace(grandparent_node, art_ref_typecode(grandparent_ref), + key_chunk_in_grandparent, new_parent_ref); } else { // We were already at the rootmost node. - art->root = new_parent_node; + iterator->art->root = new_parent_ref; } } iterator->frame = 0; iterator->depth = 0; - // Do a lower bound search for the initial key, which will find the first - // greater key if it exists. This can likely be mildly faster if we instead - // start from the current position. - art_node_iterator_lower_bound(art->root, iterator, initial_key); - return value_erased; + // Do a lower bound search for the initial key, which will find the + // first greater key if it exists. This can likely be mildly faster if + // we instead start from the current position. + art_node_iterator_lower_bound(iterator->art->root, iterator, initial_key); + return true; } -static bool art_internal_validate_at(const art_node_t *node, +static bool art_internal_validate_at(const art_t *art, art_ref_t ref, art_internal_validate_t validator) { - if (node == NULL) { + if (ref == CROARING_ART_NULL_REF) { return art_validate_fail(&validator, "node is null"); } - if (art_is_leaf(node)) { - art_leaf_t *leaf = CROARING_CAST_LEAF(node); + if (art_is_leaf(ref)) { + art_leaf_t *leaf = (art_leaf_t *)art_deref(art, ref); if (art_compare_prefix(leaf->key, 0, validator.current_key, 0, validator.depth) != 0) { - return art_validate_fail( - &validator, - "leaf key does not match its position's prefix in the tree"); + return art_validate_fail(&validator, + "leaf key does not match its " + "position's prefix in the tree"); } if (validator.validate_cb != NULL && - !validator.validate_cb(leaf, validator.reason)) { + !validator.validate_cb(leaf->val, validator.reason, + validator.context)) { if (*validator.reason == NULL) { *validator.reason = "leaf validation failed"; } return false; } } else { - art_inner_node_t *inner_node = (art_inner_node_t *)node; + art_inner_node_t *inner_node = (art_inner_node_t *)art_deref(art, ref); if (validator.depth + inner_node->prefix_size + 1 > ART_KEY_BYTES) { return art_validate_fail(&validator, @@ -1841,28 +2224,28 @@ static bool art_internal_validate_at(const art_node_t *node, inner_node->prefix_size); validator.depth += inner_node->prefix_size; - switch (inner_node->typecode) { + switch (art_ref_typecode(ref)) { case CROARING_ART_NODE4_TYPE: - if (!art_node4_internal_validate((art_node4_t *)inner_node, + if (!art_node4_internal_validate(art, (art_node4_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE16_TYPE: - if (!art_node16_internal_validate((art_node16_t *)inner_node, - validator)) { + if (!art_node16_internal_validate( + art, (art_node16_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE48_TYPE: - if (!art_node48_internal_validate((art_node48_t *)inner_node, - validator)) { + if (!art_node48_internal_validate( + art, (art_node48_t *)inner_node, validator)) { return false; } break; case CROARING_ART_NODE256_TYPE: - if (!art_node256_internal_validate((art_node256_t *)inner_node, - validator)) { + if (!art_node256_internal_validate( + art, (art_node256_t *)inner_node, validator)) { return false; } break; @@ -1874,23 +2257,38 @@ static bool art_internal_validate_at(const art_node_t *node, } bool art_internal_validate(const art_t *art, const char **reason, - art_validate_cb_t validate_cb) { + art_validate_cb_t validate_cb, void *context) { const char *reason_local; if (reason == NULL) { // Always allow assigning through *reason reason = &reason_local; } *reason = NULL; - if (art->root == NULL) { + if (art->root == CROARING_ART_NULL_REF) { return true; } art_internal_validate_t validator = { .reason = reason, .validate_cb = validate_cb, + .context = context, .depth = 0, - .current_key = {0}, + .current_key = CROARING_ZERO_INITIALIZER, }; - return art_internal_validate_at(art->root, validator); + for (art_typecode_t type = CROARING_ART_LEAF_TYPE; + type <= CROARING_ART_NODE256_TYPE; ++type) { + size_t capacity = art->capacities[type]; + for (size_t i = 0; i < capacity; ++i) { + size_t first_free = art->first_free[type]; + if (first_free > capacity) { + return art_validate_fail(&validator, "first_free > capacity"); + } + size_t next_free = art_next_free(art, type, 0); + if (first_free != next_free) { + return art_validate_fail(&validator, "first_free != next_free"); + } + } + } + return art_internal_validate_at(art, art->root, validator); } #ifdef __cplusplus diff --git a/tests/art_unit.cpp b/tests/art_unit.cpp index 06d647492..104465b83 100644 --- a/tests/art_unit.cpp +++ b/tests/art_unit.cpp @@ -18,7 +18,7 @@ namespace { void print_key(const art_key_chunk_t* key) { for (size_t i = 0; i < ART_KEY_BYTES; ++i) { - printf("%x", *(key + i)); + printf("%02x", *(key + i)); } } @@ -37,7 +37,7 @@ void assert_key_eq(const art_key_chunk_t* key1, const art_key_chunk_t* key2) { void assert_art_valid(art_t* art) { const char* reason = nullptr; - if (!art_internal_validate(art, &reason, nullptr)) { + if (!art_internal_validate(art, &reason, nullptr, nullptr)) { fail_msg("ART is invalid: '%s'\n", reason); } } @@ -80,26 +80,20 @@ class Key { std::array key_; }; -struct Value : art_val_t { - Value() {} - Value(uint64_t val_) : val(val_) {} - bool operator==(const Value& other) const { return val == other.val; } - - uint64_t val = 0; -}; - class ShadowedART { public: + ShadowedART() { art_init_cleared(&art_); } ~ShadowedART() { art_free(&art_); } - void insert(Key key, Value value) { + void insert(Key key, art_val_t value) { shadow_[key] = value; - art_insert(&art_, key.data(), &shadow_[key]); + art_insert(&art_, key.data(), shadow_[key]); } void erase(Key key) { - art_erase(&art_, key.data()); - shadow_.erase(key); + bool erased = art_erase(&art_, key.data(), nullptr); + bool shadow_erased = shadow_.erase(key) == 1; + assert_true(erased == shadow_erased); } void assertLowerBoundValid(Key key) { @@ -118,17 +112,17 @@ class ShadowedART { for (const auto& entry : shadow_) { auto& key = entry.first; auto& value = entry.second; - Value* found_val = (Value*)art_find(&art_, key.data()); + art_val_t* found_val = art_find(&art_, key.data()); if (found_val == nullptr) { printf("Key %s is not null in shadow but null in ART\n", key.string().c_str()); assert_true(found_val != nullptr); break; } - if (found_val->val != value.val) { + if (*found_val != value) { printf("Key %s: ART value %" PRIu64 " != shadow value %" PRIu64 "\n", - key.string().c_str(), found_val->val, value.val); + key.string().c_str(), *found_val, value); assert_true(*found_val == value); break; } @@ -136,7 +130,7 @@ class ShadowedART { } private: - void assertIteratorValid(std::map::iterator& shadow_it, + void assertIteratorValid(std::map::iterator& shadow_it, art_iterator_t* art_it) { if (shadow_it != shadow_.end() && art_it->value == nullptr) { printf("Iterator for key %s is null\n", @@ -155,40 +149,45 @@ class ShadowedART { assert_true(shadow_it->first == Key(art_it->key)); } } - std::map shadow_; - art_t art_{NULL}; + std::map shadow_; + art_t art_; }; DEFINE_TEST(test_art_simple) { std::vector keys = { "000001", "000002", "000003", "000004", "001005", }; - std::vector values = {{1}, {2}, {3}, {4}, {5}}; + std::vector values = {1, 2, 3, 4, 5}; - art_t art{NULL}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i], values[i]); } - Value* found_val = (Value*)art_find(&art, (uint8_t*)keys[0]); - assert_true(*found_val == values[0]); - Value* erased_val = (Value*)art_erase(&art, (uint8_t*)keys[0]); - assert_true(*erased_val == values[0]); + art_val_t found_val = *art_find(&art, (uint8_t*)keys[0]); + assert_true(found_val == values[0]); + art_val_t erased_val; + assert_true(art_erase(&art, (uint8_t*)keys[0], &erased_val)); + assert_true(erased_val == values[0]); art_free(&art); } DEFINE_TEST(test_art_erase_all) { std::vector keys = {"000001", "000002"}; - std::vector values = {{1}, {2}}; + std::vector values = {1, 2}; - art_t art{NULL}; - art_insert(&art, (uint8_t*)keys[0], &values[0]); - art_insert(&art, (uint8_t*)keys[1], &values[1]); + art_t art; + art_init_cleared(&art); + art_insert(&art, (uint8_t*)keys[0], values[0]); + art_insert(&art, (uint8_t*)keys[1], values[1]); assert_art_valid(&art); - Value* erased_val1 = (Value*)art_erase(&art, (uint8_t*)keys[0]); - Value* erased_val2 = (Value*)art_erase(&art, (uint8_t*)keys[1]); - assert_true(*erased_val1 == values[0]); - assert_true(*erased_val2 == values[1]); + art_val_t erased_val1; + art_val_t erased_val2; + assert_true(art_erase(&art, (uint8_t*)keys[0], &erased_val1)); + assert_true(art_erase(&art, (uint8_t*)keys[1], &erased_val2)); + assert_true(erased_val1 == values[0]); + assert_true(erased_val2 == values[1]); assert_art_valid(&art); art_free(&art); @@ -198,14 +197,14 @@ DEFINE_TEST(test_art_is_empty) { std::vector keys = { "000001", "000002", "000003", "000004", "001005", }; - std::vector values = {{1}, {2}, {3}, {4}, {5}}; + std::vector values = {1, 2, 3, 4, 5}; - art_t art{NULL}; + art_t art; + art_init_cleared(&art); assert_art_valid(&art); assert_true(art_is_empty(&art)); const char* key = "000001"; - Value val{1}; - art_insert(&art, (art_key_chunk_t*)key, &val); + art_insert(&art, (art_key_chunk_t*)key, 1); assert_art_valid(&art); assert_false(art_is_empty(&art)); art_free(&art); @@ -215,19 +214,20 @@ DEFINE_TEST(test_art_iterator_next) { { // ART with multiple node sizes. std::vector> keys; - std::vector values; + std::vector values; std::vector sizes = {4, 16, 48, 256}; for (size_t i = 0; i < sizes.size(); i++) { - uint8_t size = static_cast(sizes[i]); + size_t size = sizes[i]; for (size_t j = 0; j < size; j++) { keys.push_back({0, 0, 0, static_cast(i), static_cast(j)}); - values.push_back({static_cast(i) * j}); + values.push_back(i * j); } } - art_t art{NULL}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i].data(), &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i].data(), values[i]); assert_art_valid(&art); } @@ -235,7 +235,7 @@ DEFINE_TEST(test_art_iterator_next) { size_t i = 0; do { assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i].data()); - assert_true(iterator.value == &values[i]); + assert_true(*iterator.value == values[i]); ++i; } while (art_iterator_next(&iterator)); art_free(&art); @@ -247,10 +247,11 @@ DEFINE_TEST(test_art_iterator_next) { {0, 0, 0, 1, 0, 0}, {0, 0, 1, 0, 0, 0}, {0, 1, 0, 0, 0, 0}, {1, 0, 0, 0, 0, 0}, }; - std::vector values = {{0, 1, 2, 3, 4, 5, 6}}; - art_t art{NULL}; + std::vector values = {0, 1, 2, 3, 4, 5, 6}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i].data(), &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i].data(), values[i]); assert_art_valid(&art); } @@ -258,7 +259,7 @@ DEFINE_TEST(test_art_iterator_next) { size_t i = 0; do { assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i].data()); - assert_true(iterator.value == &values[i]); + assert_true(*iterator.value == values[i]); ++i; } while (art_iterator_next(&iterator)); art_free(&art); @@ -269,19 +270,20 @@ DEFINE_TEST(test_art_iterator_prev) { { // ART with multiple node sizes. std::vector> keys; - std::vector values; + std::vector values; std::vector sizes = {4, 16, 48, 256}; for (size_t i = 0; i < sizes.size(); i++) { uint8_t size = static_cast(sizes[i]); for (size_t j = 0; j < size; j++) { keys.push_back({0, 0, 0, static_cast(i), static_cast(j)}); - values.push_back({static_cast(i) * j}); + values.push_back(static_cast(i) * j); } } - art_t art{NULL}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i].data(), &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i].data(), values[i]); assert_art_valid(&art); } @@ -300,10 +302,11 @@ DEFINE_TEST(test_art_iterator_prev) { {0, 0, 0, 1, 0, 0}, {0, 0, 1, 0, 0, 0}, {0, 1, 0, 0, 0, 0}, {1, 0, 0, 0, 0, 0}, }; - std::vector values = {{0, 1, 2, 3, 4, 5, 6}}; - art_t art{NULL}; + std::vector values = {0, 1, 2, 3, 4, 5, 6}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i].data(), &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i].data(), values[i]); assert_art_valid(&art); } @@ -311,7 +314,7 @@ DEFINE_TEST(test_art_iterator_prev) { size_t i = keys.size() - 1; do { assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i].data()); - assert_true(iterator.value == &values[i]); + assert_true(*iterator.value == values[i]); --i; } while (art_iterator_prev(&iterator)); art_free(&art); @@ -320,7 +323,8 @@ DEFINE_TEST(test_art_iterator_prev) { DEFINE_TEST(test_art_iterator_lower_bound) { { - art_t art{NULL}; + art_t art; + art_init_cleared(&art); art_iterator_t iterator = art_init_iterator(&art, true); assert_null(iterator.value); assert_false( @@ -333,10 +337,11 @@ DEFINE_TEST(test_art_iterator_lower_bound) { std::vector keys = { "000001", "000002", "000003", "000004", "001005", }; - std::vector values = {{1}, {2}, {3}, {4}, {5}}; - art_t art{NULL}; + std::vector values = {1, 2, 3, 4, 5}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i], values[i]); assert_art_valid(&art); } @@ -353,10 +358,11 @@ DEFINE_TEST(test_art_iterator_lower_bound) { // Lower bound search within a node's children. std::vector keys = {"000001", "000003", "000004", "001005"}; - std::vector values = {{1}, {3}, {4}, {5}}; - art_t art{NULL}; + std::vector values = {1, 3, 4, 5}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i], values[i]); assert_art_valid(&art); } art_iterator_t iterator = art_init_iterator(&art, true); @@ -378,10 +384,11 @@ DEFINE_TEST(test_art_iterator_lower_bound) { // Lower bound search with leaf where prefix is equal but full key is // smaller. std::vector keys = {"000100", "000200", "000300"}; - std::vector values = {{1}, {2}, {3}}; - art_t art{NULL}; + std::vector values = {1, 2, 3}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i], values[i]); assert_art_valid(&art); } art_iterator_t iterator = art_init_iterator(&art, true); @@ -427,9 +434,10 @@ DEFINE_TEST(test_art_iterator_lower_bound) { { // Lower bound search with only a single leaf. const char* key1 = "000001"; - Value value{1}; - art_t art{NULL}; - art_insert(&art, (art_key_chunk_t*)key1, &value); + art_val_t value{1}; + art_t art; + art_init_cleared(&art); + art_insert(&art, (art_key_chunk_t*)key1, value); art_iterator_t iterator = art_init_iterator(&art, true); @@ -454,10 +462,11 @@ DEFINE_TEST(test_art_lower_bound) { std::vector keys = { "000001", "000002", "000003", "000004", "001005", }; - std::vector values = {{1}, {2}, {3}, {4}, {5}}; - art_t art{NULL}; + std::vector values = {1, 2, 3, 4, 5}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i], values[i]); assert_art_valid(&art); } @@ -468,7 +477,7 @@ DEFINE_TEST(test_art_lower_bound) { do { assert_true(iterator.value != NULL); assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); - assert_true(iterator.value == &values[i]); + assert_true(*iterator.value == values[i]); ++i; } while (art_iterator_next(&iterator)); } @@ -477,7 +486,7 @@ DEFINE_TEST(test_art_lower_bound) { art_iterator_t iterator = art_lower_bound(&art, (art_key_chunk_t*)key); assert_true(iterator.value != NULL); assert_key_eq(iterator.key, (art_key_chunk_t*)keys[4]); - assert_true(iterator.value == &values[4]); + assert_true(*iterator.value == values[4]); assert_false(art_iterator_next(&iterator)); } { @@ -492,10 +501,11 @@ DEFINE_TEST(test_art_upper_bound) { std::vector keys = { "000001", "000002", "000003", "000004", "001005", }; - std::vector values = {{1}, {2}, {3}, {4}, {5}}; - art_t art{NULL}; + std::vector values = {1, 2, 3, 4, 5}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i], &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i], values[i]); assert_art_valid(&art); } @@ -506,7 +516,7 @@ DEFINE_TEST(test_art_upper_bound) { do { assert_true(iterator.value != NULL); assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); - assert_true(iterator.value == &values[i]); + assert_true(*iterator.value == values[i]); ++i; } while (art_iterator_next(&iterator)); } @@ -515,7 +525,7 @@ DEFINE_TEST(test_art_upper_bound) { art_iterator_t iterator = art_upper_bound(&art, (art_key_chunk_t*)key); assert_true(iterator.value != NULL); assert_key_eq(iterator.key, (art_key_chunk_t*)keys[4]); - assert_true(iterator.value == &values[4]); + assert_true(*iterator.value == values[4]); assert_false(art_iterator_next(&iterator)); } { @@ -528,27 +538,30 @@ DEFINE_TEST(test_art_upper_bound) { DEFINE_TEST(test_art_iterator_erase) { std::vector> keys; - std::vector values; + std::vector values; std::vector sizes = {1, 4, 16, 48, 256}; for (size_t i = 0; i < sizes.size(); i++) { uint8_t size = static_cast(sizes[i]); for (size_t j = 0; j < size; j++) { keys.push_back( {0, 0, 0, static_cast(i), static_cast(j)}); - values.push_back({static_cast(i) * j}); + values.push_back(static_cast(i) * j); } } - art_t art{NULL}; + art_t art; + art_init_cleared(&art); for (size_t i = 0; i < keys.size(); ++i) { - art_insert(&art, (art_key_chunk_t*)keys[i].data(), &values[i]); + art_insert(&art, (art_key_chunk_t*)keys[i].data(), values[i]); assert_art_valid(&art); } art_iterator_t iterator = art_init_iterator(&art, true); size_t i = 0; do { assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i].data()); - assert_true(iterator.value == &values[i]); - assert_true(art_iterator_erase(&art, &iterator) == &values[i]); + assert_true(*iterator.value == values[i]); + art_val_t erased_val; + assert_true(art_iterator_erase(&iterator, &erased_val)); + assert_true(erased_val == values[i]); assert_art_valid(&art); assert_false(art_find(&art, (art_key_chunk_t*)keys[i].data())); ++i; @@ -561,16 +574,16 @@ DEFINE_TEST(test_art_iterator_insert) { std::vector keys = { "000001", "000002", "000003", "000004", "001005", }; - std::vector values = {{1}, {2}, {3}, {4}, {5}}; - art_t art{NULL}; - art_insert(&art, (art_key_chunk_t*)keys[0], &values[0]); + std::vector values = {1, 2, 3, 4, 5}; + art_t art; + art_init_cleared(&art); + art_insert(&art, (art_key_chunk_t*)keys[0], values[0]); art_iterator_t iterator = art_init_iterator(&art, true); for (size_t i = 1; i < keys.size(); ++i) { - art_iterator_insert(&art, &iterator, (art_key_chunk_t*)keys[i], - &values[i]); + art_iterator_insert(&iterator, (art_key_chunk_t*)keys[i], values[i]); assert_art_valid(&art); assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i]); - assert_true(iterator.value == &values[i]); + assert_true(*iterator.value == values[i]); } art_free(&art); } @@ -591,21 +604,23 @@ DEFINE_TEST(test_art_shadowed) { } DEFINE_TEST(test_art_shrink_grow_node48) { - art_t art{nullptr}; - std::vector values(48); + art_t art; + art_init_cleared(&art); + std::vector values(48); // Make a full node48. for (int i = 0; i < 48; i++) { auto key = Key(i); - values[i].val = i; - art_insert(&art, key.data(), &values[i]); + values[i] = i; + art_insert(&art, key.data(), values[i]); assert_art_valid(&art); } // Remove the first several containers for (int i = 0; i < 8; i++) { auto key = Key(i); - Value* removed_val = (Value*)(art_erase(&art, key.data())); + art_val_t erased_val; + assert_true(art_erase(&art, key.data(), &erased_val)); assert_art_valid(&art); - assert_int_equal(removed_val->val, i); + assert_int_equal(erased_val, i); } { art_iterator_t iterator = art_init_iterator(&art, true); @@ -613,7 +628,7 @@ DEFINE_TEST(test_art_shrink_grow_node48) { do { auto key = Key(i); assert_key_eq(iterator.key, key.data()); - assert_true(iterator.value == &values[i]); + assert_true(*iterator.value == values[i]); ++i; } while (art_iterator_next(&iterator)); assert_int_equal(i, 48); @@ -622,8 +637,8 @@ DEFINE_TEST(test_art_shrink_grow_node48) { // Fill the containers back up for (int i = 0; i < 8; i++) { auto key = Key(i); - values[i].val = i; - art_insert(&art, key.data(), &values[i]); + values[i] = i; + art_insert(&art, key.data(), values[i]); } { art_iterator_t iterator = art_init_iterator(&art, true); @@ -631,7 +646,7 @@ DEFINE_TEST(test_art_shrink_grow_node48) { do { auto key = Key(i); assert_key_eq(iterator.key, key.data()); - assert_true(iterator.value == &values[i]); + assert_true(*iterator.value == values[i]); ++i; } while (art_iterator_next(&iterator)); assert_int_equal(i, 48); From 04e5eeac82831e42f86d244e5db21213941145c1 Mon Sep 17 00:00:00 2001 From: Soerian Date: Wed, 1 Jan 2025 14:35:55 +0000 Subject: [PATCH 02/16] Array-backed r64 --- include/roaring/roaring64.h | 2 +- src/roaring64.c | 940 ++++++++++++++++++++---------------- 2 files changed, 523 insertions(+), 419 deletions(-) diff --git a/include/roaring/roaring64.h b/include/roaring/roaring64.h index 8022f160d..5001bf880 100644 --- a/include/roaring/roaring64.h +++ b/include/roaring/roaring64.h @@ -17,7 +17,7 @@ namespace api { #endif typedef struct roaring64_bitmap_s roaring64_bitmap_t; -typedef struct roaring64_leaf_s roaring64_leaf_t; +typedef uint64_t roaring64_leaf_t; typedef struct roaring64_iterator_s roaring64_iterator_t; /** diff --git a/src/roaring64.c b/src/roaring64.c index 208c198de..d1ca33310 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -27,22 +27,19 @@ namespace api { typedef struct roaring64_bitmap_s { art_t art; uint8_t flags; + size_t first_free; + size_t capacity; + container_t **containers; } roaring64_bitmap_t; // Leaf type of the ART used to keep the high 48 bits of each entry. -typedef struct roaring64_leaf_s { - art_val_t _pad; - uint8_t typecode; - container_t *container; -} roaring64_leaf_t; - -// Alias to make it easier to work with, since it's an internal-only type -// anyway. -typedef struct roaring64_leaf_s leaf_t; +// Low 8 bits: typecode +// High 56 bits: container index +typedef roaring64_leaf_t leaf_t; // Iterator struct to hold iteration state. typedef struct roaring64_iterator_s { - const roaring64_bitmap_t *parent; + const roaring64_bitmap_t *r; art_iterator_t art_it; roaring_container_iterator_t container_it; uint64_t high48; // Key that art_it points to. @@ -77,20 +74,85 @@ static inline uint64_t minimum(uint64_t a, uint64_t b) { return (a < b) ? a : b; } -static inline leaf_t *create_leaf(container_t *container, uint8_t typecode) { - leaf_t *leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - leaf->container = container; - leaf->typecode = typecode; - return leaf; +static inline leaf_t create_leaf(uint64_t container_index, uint8_t typecode) { + return (container_index << 8) | typecode; } -static inline leaf_t *copy_leaf_container(const leaf_t *leaf) { - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - result_leaf->typecode = leaf->typecode; +static inline uint8_t get_typecode(leaf_t leaf) { return (uint8_t)leaf; } + +static inline size_t get_index(leaf_t leaf) { return leaf >> 8; } + +static inline container_t *get_container(const roaring64_bitmap_t *r, + leaf_t leaf) { + return r->containers[get_index(leaf)]; +} + +// Replaces the container of `leaf` with the given container. Returns the +// modified leaf for convenience. +static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, + container_t *container, + uint8_t typecode) { + size_t index = get_index(*leaf); + r->containers[index] = container; + *leaf = create_leaf(index, typecode); + return *leaf; +} + +static void extend_containers(roaring64_bitmap_t *r, size_t items) { + size_t desired_cap = r->capacity + items; + if (desired_cap <= r->capacity) { + return; + } + size_t new_capacity = + (r->capacity < 1024) ? 2 * desired_cap : 5 * desired_cap / 4; + size_t increase = new_capacity - r->capacity; + r->containers = + roaring_realloc(r->containers, new_capacity * sizeof(container_t *)); + memset(r->containers + r->capacity, 0, increase * sizeof(container_t *)); + r->capacity = new_capacity; +} + +static size_t next_free_container_idx(const roaring64_bitmap_t *r) { + for (size_t i = r->first_free + 1; i < r->capacity; ++i) { + if (r->containers[i] == NULL) { + return i; + } + } + return r->capacity; +} + +static size_t allocate_index(roaring64_bitmap_t *r) { + size_t first_free = r->first_free; + if (first_free == r->capacity) { + extend_containers(r, 1); + } + r->first_free = next_free_container_idx(r); + return first_free; +} + +static leaf_t add_container(roaring64_bitmap_t *r, container_t *container, + uint8_t typecode) { + size_t index = allocate_index(r); + r->containers[index] = container; + return create_leaf(index, typecode); +} + +static void remove_container(roaring64_bitmap_t *r, leaf_t leaf) { + size_t index = get_index(leaf); + r->containers[index] = NULL; + if (index < r->first_free) { + r->first_free = index; + } +} + +// Copies the container referenced by `leaf` from `r1` to `r2`. +static inline leaf_t copy_leaf_container(const roaring64_bitmap_t *r1, + roaring64_bitmap_t *r2, leaf_t leaf) { + uint8_t typecode = get_typecode(leaf); // get_copy_of_container modifies the typecode passed in. - result_leaf->container = get_copy_of_container( - leaf->container, &result_leaf->typecode, /*copy_on_write=*/false); - return result_leaf; + container_t *container = get_copy_of_container( + get_container(r1, leaf), &typecode, /*copy_on_write=*/false); + return add_container(r2, container, typecode); } static inline void free_leaf(leaf_t *leaf) { roaring_free(leaf); } @@ -103,10 +165,10 @@ static inline int compare_high48(art_key_chunk_t key1[], static inline bool roaring64_iterator_init_at_leaf_first( roaring64_iterator_t *it) { it->high48 = combine_key(it->art_it.key, 0); - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = 0; - it->container_it = - container_init_iterator(leaf->container, leaf->typecode, &low16); + it->container_it = container_init_iterator(get_container(it->r, leaf), + get_typecode(leaf), &low16); it->value = it->high48 | low16; return (it->has_value = true); } @@ -114,18 +176,18 @@ static inline bool roaring64_iterator_init_at_leaf_first( static inline bool roaring64_iterator_init_at_leaf_last( roaring64_iterator_t *it) { it->high48 = combine_key(it->art_it.key, 0); - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = 0; - it->container_it = - container_init_iterator_last(leaf->container, leaf->typecode, &low16); + it->container_it = container_init_iterator_last(get_container(it->r, leaf), + get_typecode(leaf), &low16); it->value = it->high48 | low16; return (it->has_value = true); } static inline roaring64_iterator_t *roaring64_iterator_init_at( const roaring64_bitmap_t *r, roaring64_iterator_t *it, bool first) { - it->parent = r; - it->art_it = art_init_iterator(&r->art, first); + it->r = r; + it->art_it = art_init_iterator((art_t *)&r->art, first); it->has_value = it->art_it.value != NULL; if (it->has_value) { if (first) { @@ -142,8 +204,11 @@ static inline roaring64_iterator_t *roaring64_iterator_init_at( roaring64_bitmap_t *roaring64_bitmap_create(void) { roaring64_bitmap_t *r = (roaring64_bitmap_t *)roaring_malloc(sizeof(roaring64_bitmap_t)); - r->art.root = NULL; + art_init_cleared(&r->art); r->flags = 0; + r->capacity = 0; + r->first_free = 0; + r->containers = NULL; return r; } @@ -153,26 +218,27 @@ void roaring64_bitmap_free(roaring64_bitmap_t *r) { } art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); + leaf_t leaf = (leaf_t)*it.value; + container_free(get_container(r, leaf), get_typecode(leaf)); art_iterator_next(&it); } art_free(&r->art); + roaring_free(r->containers); roaring_free(r); } roaring64_bitmap_t *roaring64_bitmap_copy(const roaring64_bitmap_t *r) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - uint8_t result_typecode = leaf->typecode; + leaf_t leaf = (leaf_t)*it.value; + uint8_t result_typecode = get_typecode(leaf); container_t *result_container = get_copy_of_container( - leaf->container, &result_typecode, /*copy_on_write=*/false); - leaf_t *result_leaf = create_leaf(result_container, result_typecode); - art_insert(&result->art, it.key, (art_val_t *)result_leaf); + get_container(r, leaf), &result_typecode, /*copy_on_write=*/false); + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it.key, (art_val_t)result_leaf); art_iterator_next(&it); } return result; @@ -199,8 +265,8 @@ static void move_from_roaring32_offset(roaring64_bitmap_t *dst, uint8_t high48[ART_KEY_BYTES]; uint64_t high48_bits = key_base | ((uint64_t)key << 16); split_key(high48_bits, high48); - leaf_t *leaf = create_leaf(container, typecode); - art_insert(&dst->art, high48, (art_val_t *)leaf); + leaf_t leaf = add_container(dst, container, typecode); + art_insert(&dst->art, high48, (art_val_t)leaf); } // We stole all the containers, so leave behind a size of zero src->high_low_container.size = 0; @@ -242,8 +308,8 @@ roaring64_bitmap_t *roaring64_bitmap_from_range(uint64_t min, uint64_t max, uint8_t high48[ART_KEY_BYTES]; split_key(min, high48); - leaf_t *leaf = create_leaf(container, typecode); - art_insert(&r->art, high48, (art_val_t *)leaf); + leaf_t leaf = add_container(r, container, typecode); + art_insert(&r->art, high48, (art_val_t)leaf); uint64_t gap = container_max - container_min + step - 1; uint64_t increment = gap - (gap % step); @@ -267,13 +333,14 @@ static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, uint16_t low16, leaf_t *leaf) { if (leaf != NULL) { + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = - container_add(leaf->container, low16, leaf->typecode, &typecode2); - if (container2 != leaf->container) { - container_free(leaf->container, leaf->typecode); - leaf->container = container2; - leaf->typecode = typecode2; + container_add(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, leaf, container2, typecode2); } return leaf; } else { @@ -282,9 +349,8 @@ static inline leaf_t *containerptr_roaring64_bitmap_add(roaring64_bitmap_t *r, container_t *container = container_add(ac, low16, ARRAY_CONTAINER_TYPE, &typecode); assert(ac == container); - leaf = create_leaf(container, typecode); - art_insert(&r->art, high48, (art_val_t *)leaf); - return leaf; + leaf_t new_leaf = add_container(r, container, typecode); + return (leaf_t *)art_insert(&r->art, high48, (art_val_t)new_leaf); } } @@ -302,12 +368,12 @@ bool roaring64_bitmap_add_checked(roaring64_bitmap_t *r, uint64_t val) { int old_cardinality = 0; if (leaf != NULL) { - old_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); + old_cardinality = container_get_cardinality(get_container(r, *leaf), + get_typecode(*leaf)); } leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); int new_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); return old_cardinality != new_cardinality; } @@ -316,22 +382,22 @@ void roaring64_bitmap_add_bulk(roaring64_bitmap_t *r, uint64_t val) { uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - if (context->leaf != NULL && - compare_high48(context->high_bytes, high48) == 0) { + leaf_t *leaf = context->leaf; + if (leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { // We're at a container with the correct high bits. + uint8_t typecode1 = get_typecode(*leaf); + container_t *container1 = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = - container_add(context->leaf->container, low16, - context->leaf->typecode, &typecode2); - if (container2 != context->leaf->container) { - container_free(context->leaf->container, context->leaf->typecode); - context->leaf->container = container2; - context->leaf->typecode = typecode2; + container_add(container1, low16, typecode1, &typecode2); + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r, leaf, container2, typecode2); } } else { // We're not positioned anywhere yet or the high bits of the key // differ. - leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); + leaf = (leaf_t *)art_find(&r->art, high48); context->leaf = containerptr_roaring64_bitmap_add(r, high48, low16, leaf); memcpy(context->high_bytes, high48, ART_KEY_BYTES); @@ -351,17 +417,19 @@ void roaring64_bitmap_add_many(roaring64_bitmap_t *r, size_t n_args, } } -static inline void add_range_closed_at(art_t *art, uint8_t *high48, - uint16_t min, uint16_t max) { +static inline void add_range_closed_at(roaring64_bitmap_t *r, art_t *art, + uint8_t *high48, uint16_t min, + uint16_t max) { leaf_t *leaf = (leaf_t *)art_find(art, high48); if (leaf != NULL) { + uint8_t typecode1 = get_typecode(*leaf); + container_t *container1 = get_container(r, *leaf); uint8_t typecode2; - container_t *container2 = container_add_range( - leaf->container, leaf->typecode, min, max, &typecode2); - if (container2 != leaf->container) { - container_free(leaf->container, leaf->typecode); - leaf->container = container2; - leaf->typecode = typecode2; + container_t *container2 = + container_add_range(container1, typecode1, min, max, &typecode2); + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r, leaf, container2, typecode2); } return; } @@ -369,8 +437,8 @@ static inline void add_range_closed_at(art_t *art, uint8_t *high48, // container_add_range is inclusive, but `container_range_of_ones` is // exclusive. container_t *container = container_range_of_ones(min, max + 1, &typecode); - leaf = create_leaf(container, typecode); - art_insert(art, high48, (art_val_t *)leaf); + leaf_t new_leaf = add_container(r, container, typecode); + art_insert(art, high48, (art_val_t)new_leaf); } void roaring64_bitmap_add_range(roaring64_bitmap_t *r, uint64_t min, @@ -394,22 +462,22 @@ void roaring64_bitmap_add_range_closed(roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); if (compare_high48(min_high48, max_high48) == 0) { // Only populate range within one container. - add_range_closed_at(art, min_high48, min_low16, max_low16); + add_range_closed_at(r, art, min_high48, min_low16, max_low16); return; } // Populate a range across containers. Fill intermediate containers // entirely. - add_range_closed_at(art, min_high48, min_low16, 0xffff); + add_range_closed_at(r, art, min_high48, min_low16, 0xffff); uint64_t min_high_bits = min >> 16; uint64_t max_high_bits = max >> 16; for (uint64_t current = min_high_bits + 1; current < max_high_bits; ++current) { uint8_t current_high48[ART_KEY_BYTES]; split_key(current << 16, current_high48); - add_range_closed_at(art, current_high48, 0, 0xffff); + add_range_closed_at(r, art, current_high48, 0, 0xffff); } - add_range_closed_at(art, max_high48, 0, max_low16); + add_range_closed_at(r, art, max_high48, 0, max_low16); } bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { @@ -417,7 +485,8 @@ bool roaring64_bitmap_contains(const roaring64_bitmap_t *r, uint64_t val) { uint16_t low16 = split_key(val, high48); leaf_t *leaf = (leaf_t *)art_find(&r->art, high48); if (leaf != NULL) { - return container_contains(leaf->container, low16, leaf->typecode); + return container_contains(get_container(r, *leaf), low16, + get_typecode(*leaf)); } return false; } @@ -434,7 +503,7 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); uint64_t max_high48_bits = (max - 1) & 0xFFFFFFFFFFFF0000; // Inclusive - art_iterator_t it = art_lower_bound(&r->art, min_high48); + art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); if (it.value == NULL || combine_key(it.key, 0) > min) { return false; } @@ -451,7 +520,7 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, return false; } - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; uint32_t container_min = 0; if (compare_high48(it.key, min_high48) == 0) { container_min = min_low16; @@ -464,11 +533,13 @@ bool roaring64_bitmap_contains_range(const roaring64_bitmap_t *r, uint64_t min, // For the first and last containers we use container_contains_range, // for the intermediate containers we can use container_is_full. if (container_min == 0 && container_max == 0xFFFF + 1) { - if (!container_is_full(leaf->container, leaf->typecode)) { + if (!container_is_full(get_container(r, leaf), + get_typecode(leaf))) { return false; } - } else if (!container_contains_range(leaf->container, container_min, - container_max, leaf->typecode)) { + } else if (!container_contains_range(get_container(r, leaf), + container_min, container_max, + get_typecode(leaf))) { return false; } prev_high48_bits = current_high48_bits; @@ -494,24 +565,24 @@ bool roaring64_bitmap_contains_bulk(const roaring64_bitmap_t *r, context->leaf = leaf; memcpy(context->high_bytes, high48, ART_KEY_BYTES); } - return container_contains(context->leaf->container, low16, - context->leaf->typecode); + return container_contains(get_container(r, *context->leaf), low16, + get_typecode(*context->leaf)); } bool roaring64_bitmap_select(const roaring64_bitmap_t *r, uint64_t rank, uint64_t *element) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t start_rank = 0; while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - uint64_t cardinality = - container_get_cardinality(leaf->container, leaf->typecode); + leaf_t leaf = (leaf_t)*it.value; + uint64_t cardinality = container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); if (start_rank + cardinality > rank) { uint32_t uint32_start = 0; uint32_t uint32_rank = rank - start_rank; uint32_t uint32_element = 0; - if (container_select(leaf->container, leaf->typecode, &uint32_start, - uint32_rank, &uint32_element)) { + if (container_select(get_container(r, leaf), get_typecode(leaf), + &uint32_start, uint32_rank, &uint32_element)) { *element = combine_key(it.key, (uint16_t)uint32_element); return true; } @@ -527,16 +598,17 @@ uint64_t roaring64_bitmap_rank(const roaring64_bitmap_t *r, uint64_t val) { uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t rank = 0; while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; int compare_result = compare_high48(it.key, high48); if (compare_result < 0) { - rank += container_get_cardinality(leaf->container, leaf->typecode); + rank += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); } else if (compare_result == 0) { - return rank + - container_rank(leaf->container, leaf->typecode, low16); + return rank + container_rank(get_container(r, leaf), + get_typecode(leaf), low16); } else { return rank; } @@ -550,16 +622,17 @@ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, uint8_t high48[ART_KEY_BYTES]; uint16_t low16 = split_key(val, high48); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t index = 0; while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; int compare_result = compare_high48(it.key, high48); if (compare_result < 0) { - index += container_get_cardinality(leaf->container, leaf->typecode); + index += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); } else if (compare_result == 0) { - int index16 = - container_get_index(leaf->container, leaf->typecode, low16); + int index16 = container_get_index(get_container(r, leaf), + get_typecode(leaf), low16); if (index16 < 0) { return false; } @@ -573,31 +646,31 @@ bool roaring64_bitmap_get_index(const roaring64_bitmap_t *r, uint64_t val, return false; } -static inline leaf_t *containerptr_roaring64_bitmap_remove( - roaring64_bitmap_t *r, uint8_t *high48, uint16_t low16, leaf_t *leaf) { +// Returns true if a container was removed. +static inline bool containerptr_roaring64_bitmap_remove(roaring64_bitmap_t *r, + uint8_t *high48, + uint16_t low16, + leaf_t *leaf) { if (leaf == NULL) { - return NULL; + return false; } - container_t *container = leaf->container; - uint8_t typecode = leaf->typecode; + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); uint8_t typecode2; container_t *container2 = container_remove(container, low16, typecode, &typecode2); if (container2 != container) { container_free(container, typecode); - leaf->container = container2; - leaf->typecode = typecode2; + replace_container(r, leaf, container2, typecode2); } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - leaf = (leaf_t *)art_erase(&r->art, high48); - if (leaf != NULL) { - free_leaf(leaf); - } - return NULL; + bool erased = art_erase(&r->art, high48, (art_val_t *)leaf); + assert(erased); + return true; } - return leaf; + return false; } void roaring64_bitmap_remove(roaring64_bitmap_t *r, uint64_t val) { @@ -619,13 +692,12 @@ bool roaring64_bitmap_remove_checked(roaring64_bitmap_t *r, uint64_t val) { return false; } int old_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); - leaf = containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); - if (leaf == NULL) { + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); + if (containerptr_roaring64_bitmap_remove(r, high48, low16, leaf)) { return true; } int new_cardinality = - container_get_cardinality(leaf->container, leaf->typecode); + container_get_cardinality(get_container(r, *leaf), get_typecode(*leaf)); return new_cardinality != old_cardinality; } @@ -638,26 +710,28 @@ void roaring64_bitmap_remove_bulk(roaring64_bitmap_t *r, if (context->leaf != NULL && compare_high48(context->high_bytes, high48) == 0) { // We're at a container with the correct high bits. + uint8_t typecode = get_typecode(*context->leaf); + container_t *container = get_container(r, *context->leaf); uint8_t typecode2; container_t *container2 = - container_remove(context->leaf->container, low16, - context->leaf->typecode, &typecode2); - if (container2 != context->leaf->container) { - container_free(context->leaf->container, context->leaf->typecode); - context->leaf->container = container2; - context->leaf->typecode = typecode2; + container_remove(container, low16, typecode, &typecode2); + if (container2 != container) { + container_free(container, typecode); + replace_container(r, context->leaf, container2, typecode2); } if (!container_nonzero_cardinality(container2, typecode2)) { - leaf_t *leaf = (leaf_t *)art_erase(art, high48); container_free(container2, typecode2); - free_leaf(leaf); + leaf_t leaf; + bool erased = art_erase(art, high48, (art_val_t *)&leaf); + assert(erased); + remove_container(r, leaf); } } else { // We're not positioned anywhere yet or the high bits of the key // differ. leaf_t *leaf = (leaf_t *)art_find(art, high48); - context->leaf = - containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + containerptr_roaring64_bitmap_remove(r, high48, low16, leaf); + context->leaf = leaf; memcpy(context->high_bytes, high48, ART_KEY_BYTES); } } @@ -675,23 +749,26 @@ void roaring64_bitmap_remove_many(roaring64_bitmap_t *r, size_t n_args, } } -static inline void remove_range_closed_at(art_t *art, uint8_t *high48, - uint16_t min, uint16_t max) { +static inline void remove_range_closed_at(roaring64_bitmap_t *r, art_t *art, + uint8_t *high48, uint16_t min, + uint16_t max) { leaf_t *leaf = (leaf_t *)art_find(art, high48); if (leaf == NULL) { return; } + uint8_t typecode = get_typecode(*leaf); + container_t *container = get_container(r, *leaf); uint8_t typecode2; - container_t *container2 = container_remove_range( - leaf->container, leaf->typecode, min, max, &typecode2); - if (container2 != leaf->container) { - container_free(leaf->container, leaf->typecode); + container_t *container2 = + container_remove_range(container, typecode, min, max, &typecode2); + if (container2 != container) { + container_free(container, typecode); if (container2 != NULL) { - leaf->container = container2; - leaf->typecode = typecode2; + replace_container(r, leaf, container2, typecode2); } else { - art_erase(art, high48); - free_leaf(leaf); + bool erased = art_erase(art, high48, NULL); + assert(erased); + remove_container(r, *leaf); } } } @@ -717,21 +794,23 @@ void roaring64_bitmap_remove_range_closed(roaring64_bitmap_t *r, uint64_t min, uint16_t max_low16 = split_key(max, max_high48); if (compare_high48(min_high48, max_high48) == 0) { // Only remove a range within one container. - remove_range_closed_at(art, min_high48, min_low16, max_low16); + remove_range_closed_at(r, art, min_high48, min_low16, max_low16); return; } // Remove a range across containers. Remove intermediate containers // entirely. - remove_range_closed_at(art, min_high48, min_low16, 0xffff); + remove_range_closed_at(r, art, min_high48, min_low16, 0xffff); art_iterator_t it = art_upper_bound(art, min_high48); while (it.value != NULL && art_compare_keys(it.key, max_high48) < 0) { - leaf_t *leaf = (leaf_t *)art_iterator_erase(art, &it); - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); + leaf_t leaf; + bool erased = art_iterator_erase(&it, (art_val_t *)&leaf); + assert(erased); + container_free(get_container(r, leaf), get_typecode(leaf)); + remove_container(r, leaf); } - remove_range_closed_at(art, max_high48, 0, max_low16); + remove_range_closed_at(r, art, max_high48, 0, max_low16); } void roaring64_bitmap_clear(roaring64_bitmap_t *r) { @@ -739,12 +818,12 @@ void roaring64_bitmap_clear(roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_get_cardinality(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t cardinality = 0; while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; - cardinality += - container_get_cardinality(leaf->container, leaf->typecode); + leaf_t leaf = (leaf_t)*it.value; + cardinality += container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); art_iterator_next(&it); } return cardinality; @@ -773,7 +852,7 @@ uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, uint8_t max_high48[ART_KEY_BYTES]; uint16_t max_low16 = split_key(max, max_high48); - art_iterator_t it = art_lower_bound(&r->art, min_high48); + art_iterator_t it = art_lower_bound((art_t *)&r->art, min_high48); while (it.value != NULL) { int max_compare_result = compare_high48(it.key, max_high48); if (max_compare_result > 0) { @@ -781,23 +860,22 @@ uint64_t roaring64_bitmap_range_closed_cardinality(const roaring64_bitmap_t *r, break; } - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); if (max_compare_result == 0) { // We're at the max high key, add only the range up to the low // 16 bits of max. - cardinality += - container_rank(leaf->container, leaf->typecode, max_low16); + cardinality += container_rank(container, typecode, max_low16); } else { // We're not yet at the max high key, add the full container // range. - cardinality += - container_get_cardinality(leaf->container, leaf->typecode); + cardinality += container_get_cardinality(container, typecode); } if (compare_high48(it.key, min_high48) == 0 && min_low16 > 0) { // We're at the min high key, remove the range up to the low 16 // bits of min. - cardinality -= - container_rank(leaf->container, leaf->typecode, min_low16 - 1); + cardinality -= container_rank(container, typecode, min_low16 - 1); } art_iterator_next(&it); } @@ -809,23 +887,23 @@ bool roaring64_bitmap_is_empty(const roaring64_bitmap_t *r) { } uint64_t roaring64_bitmap_minimum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); if (it.value == NULL) { return UINT64_MAX; } - leaf_t *leaf = (leaf_t *)it.value; - return combine_key(it.key, - container_minimum(leaf->container, leaf->typecode)); + leaf_t leaf = (leaf_t)*it.value; + return combine_key( + it.key, container_minimum(get_container(r, leaf), get_typecode(leaf))); } uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/false); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/false); if (it.value == NULL) { return 0; } - leaf_t *leaf = (leaf_t *)it.value; - return combine_key(it.key, - container_maximum(leaf->container, leaf->typecode)); + leaf_t leaf = (leaf_t)*it.value; + return combine_key( + it.key, container_maximum(get_container(r, leaf), get_typecode(leaf))); } bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { @@ -836,9 +914,9 @@ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { uint8_t new_typecode; // We don't need to free the existing container if a new one was // created, convert_run_optimize does that internally. - leaf->container = convert_run_optimize(leaf->container, leaf->typecode, - &new_typecode); - leaf->typecode = new_typecode; + container_t *new_container = convert_run_optimize( + get_container(r, *leaf), get_typecode(*leaf), &new_typecode); + replace_container(r, leaf, new_container, new_typecode); has_run_container |= new_typecode == RUN_CONTAINER_TYPE; art_iterator_next(&it); } @@ -855,15 +933,16 @@ void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, stat->min_value = roaring64_bitmap_minimum(r); stat->max_value = roaring64_bitmap_maximum(r); - art_iterator_t it = art_init_iterator(&r->art, true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, true); while (it.value != NULL) { - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; stat->n_containers++; - uint8_t truetype = get_container_type(leaf->container, leaf->typecode); - uint32_t card = - container_get_cardinality(leaf->container, leaf->typecode); + uint8_t truetype = + get_container_type(get_container(r, leaf), get_typecode(leaf)); + uint32_t card = container_get_cardinality(get_container(r, leaf), + get_typecode(leaf)); uint32_t sbytes = - container_size_in_bytes(leaf->container, leaf->typecode); + container_size_in_bytes(get_container(r, leaf), get_typecode(leaf)); stat->cardinality += card; switch (truetype) { case BITSET_CONTAINER_TYPE: @@ -889,31 +968,34 @@ void roaring64_bitmap_statistics(const roaring64_bitmap_t *r, } } -static bool roaring64_leaf_internal_validate(const art_val_t *val, - const char **reason) { - leaf_t *leaf = (leaf_t *)val; - return container_internal_validate(leaf->container, leaf->typecode, reason); +static bool roaring64_leaf_internal_validate(const art_val_t val, + const char **reason, + void *context) { + leaf_t leaf = (leaf_t)val; + roaring64_bitmap_t *r = (roaring64_bitmap_t *)context; + return container_internal_validate(get_container(r, leaf), + get_typecode(leaf), reason); } bool roaring64_bitmap_internal_validate(const roaring64_bitmap_t *r, const char **reason) { return art_internal_validate(&r->art, reason, - roaring64_leaf_internal_validate); + roaring64_leaf_internal_validate, (void *)r); } bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { if (compare_high48(it1.key, it2.key) != 0) { return false; } - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - if (!container_equals(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode)) { + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + if (!container_equals(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2))) { return false; } art_iterator_next(&it1); @@ -924,8 +1006,8 @@ bool roaring64_bitmap_equals(const roaring64_bitmap_t *r1, bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL) { bool it2_present = it2.value != NULL; @@ -934,10 +1016,11 @@ bool roaring64_bitmap_is_subset(const roaring64_bitmap_t *r1, if (it2_present) { compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - if (!container_is_subset(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode)) { + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + if (!container_is_subset( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2))) { return false; } art_iterator_next(&it1); @@ -964,8 +1047,8 @@ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -975,19 +1058,20 @@ roaring64_bitmap_t *roaring64_bitmap_and(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - result_leaf->container = container_and( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - - if (container_nonzero_cardinality(result_leaf->container, - result_leaf->typecode)) { - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_and(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); } else { - container_free(result_leaf->container, result_leaf->typecode); - free_leaf(result_leaf); + container_free(result_container, result_typecode); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -1006,8 +1090,8 @@ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { uint64_t result = 0; - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -1017,11 +1101,11 @@ uint64_t roaring64_bitmap_and_cardinality(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - result += - container_and_cardinality(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + result += container_and_cardinality( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2)); art_iterator_next(&it1); art_iterator_next(&it2); } else if (compare_result < 0) { @@ -1042,7 +1126,7 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, return; } art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -1058,7 +1142,7 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 2a: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t leaf2 = (leaf_t)*it2.value; // We do the computation "in place" only when c1 is not a // shared container. Rationale: using a shared container @@ -1066,28 +1150,31 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, // copy and then doing the computation in place which is // likely less efficient than avoiding in place entirely and // always generating a new container. + uint8_t typecode = get_typecode(*leaf1); + container_t *container = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_and( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + if (typecode == SHARED_CONTAINER_TYPE) { + container2 = container_and(container, typecode, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } else { container2 = container_iand( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + container, typecode, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } - if (container2 != leaf1->container) { - container_free(leaf1->container, leaf1->typecode); - leaf1->container = container2; - leaf1->typecode = typecode2; + if (container2 != container) { + container_free(container, typecode); } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - art_iterator_erase(&r1->art, &it1); - free_leaf(leaf1); + art_iterator_erase(&it1, NULL); + remove_container(r1, *leaf1); } else { + if (container2 != container) { + replace_container(r1, leaf1, container2, typecode2); + } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -1098,10 +1185,11 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, if (!it2_present || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t *leaf = (leaf_t *)art_iterator_erase(&r1->art, &it1); - assert(leaf != NULL); - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); + leaf_t leaf; + bool erased = art_iterator_erase(&it1, (art_val_t *)&leaf); + assert(erased); + container_free(get_container(r1, leaf), get_typecode(leaf)); + remove_container(r1, leaf); } else if (compare_result > 0) { // Case 2c: it1 is after it2. art_iterator_lower_bound(&it2, it1.key); @@ -1112,8 +1200,8 @@ void roaring64_bitmap_and_inplace(roaring64_bitmap_t *r1, bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { bool intersect = false; - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL && it2.value != NULL) { // Cases: @@ -1123,10 +1211,11 @@ bool roaring64_bitmap_intersect(const roaring64_bitmap_t *r1, int compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - intersect |= container_intersect(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + intersect |= container_intersect( + get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2)); art_iterator_next(&it1); art_iterator_next(&it2); } else if (compare_result < 0) { @@ -1166,8 +1255,8 @@ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -1185,26 +1274,31 @@ roaring64_bitmap_t *roaring64_bitmap_or(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - result_leaf->container = container_or( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_or(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + leaf_t result_leaf = + add_container(result, result_container, result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); art_iterator_next(&it1); art_iterator_next(&it2); } } if ((it1_present && !it2_present) || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - art_insert(&result->art, it2.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r2, result, (leaf_t)*it2.value); + art_insert(&result->art, it2.key, (art_val_t)result_leaf); art_iterator_next(&it2); } } @@ -1225,7 +1319,7 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, return; } art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -1244,22 +1338,23 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 3b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_or(leaf1->container, leaf1->typecode, - leaf2->container, leaf2->typecode, - &typecode2); + if (get_typecode(*leaf1) == SHARED_CONTAINER_TYPE) { + container2 = container_or(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } else { - container2 = container_ior( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + container2 = container_ior(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } - if (container2 != leaf1->container) { - container_free(leaf1->container, leaf1->typecode); - leaf1->container = container2; - leaf1->typecode = typecode2; + if (container2 != container1) { + container_free(container1, typecode1); + replace_container(r1, leaf1, container2, typecode2); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -1270,9 +1365,9 @@ void roaring64_bitmap_or_inplace(roaring64_bitmap_t *r1, art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - art_iterator_insert(&r1->art, &it1, it2.key, - (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r2, r1, (leaf_t)*it2.value); + art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); art_iterator_next(&it2); } } @@ -1282,8 +1377,8 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -1301,19 +1396,20 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 3b: iterators at the same high key position. - leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); - result_leaf->container = container_xor( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - if (container_nonzero_cardinality(result_leaf->container, - result_leaf->typecode)) { - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t leaf1 = (leaf_t)*it1.value; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = + container_xor(get_container(r1, leaf1), get_typecode(leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = add_container(result, result_container, + result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); } else { - container_free(result_leaf->container, - result_leaf->typecode); - free_leaf(result_leaf); + container_free(result_container, result_typecode); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -1321,13 +1417,15 @@ roaring64_bitmap_t *roaring64_bitmap_xor(const roaring64_bitmap_t *r1, } if ((it1_present && !it2_present) || compare_result < 0) { // Cases 1 and 3a: it1 is the only iterator or is before it2. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); - art_insert(&result->art, it2.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r2, result, (leaf_t)*it2.value); + art_insert(&result->art, it2.key, (art_val_t)result_leaf); art_iterator_next(&it2); } } @@ -1346,7 +1444,7 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { assert(r1 != r2); art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL || it2.value != NULL) { bool it1_present = it1.value != NULL; @@ -1365,15 +1463,15 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 3b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - container_t *container1 = leaf1->container; - uint8_t typecode1 = leaf1->typecode; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { - container2 = container_xor( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + if (typecode1 == SHARED_CONTAINER_TYPE) { + container2 = container_xor(container1, typecode1, + get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); if (container2 != container1) { // We only free when doing container_xor, not // container_ixor, as ixor frees the original @@ -1382,17 +1480,19 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, } } else { container2 = container_ixor( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } - leaf1->container = container2; - leaf1->typecode = typecode2; if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - art_iterator_erase(&r1->art, &it1); - free_leaf(leaf1); + bool erased = art_iterator_erase(&it1, NULL); + assert(erased); + remove_container(r1, *leaf1); } else { + if (container2 != container1) { + replace_container(r1, leaf1, container2, typecode2); + } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -1405,13 +1505,13 @@ void roaring64_bitmap_xor_inplace(roaring64_bitmap_t *r1, art_iterator_next(&it1); } else if ((!it1_present && it2_present) || compare_result > 0) { // Cases 2 and 3c: it2 is the only iterator or is before it1. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it2.value); + leaf_t result_leaf = + copy_leaf_container(r2, r1, (leaf_t)*it2.value); if (it1_present) { - art_iterator_insert(&r1->art, &it1, it2.key, - (art_val_t *)result_leaf); + art_iterator_insert(&it1, it2.key, (art_val_t)result_leaf); art_iterator_next(&it1); } else { - art_insert(&r1->art, it2.key, (art_val_t *)result_leaf); + art_insert(&r1->art, it2.key, (art_val_t)result_leaf); } art_iterator_next(&it2); } @@ -1422,8 +1522,8 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { roaring64_bitmap_t *result = roaring64_bitmap_create(); - art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it1 = art_init_iterator((art_t *)&r1->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -1438,20 +1538,21 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, compare_result = compare_high48(it1.key, it2.key); if (compare_result == 0) { // Case 2b: iterators at the same high key position. - leaf_t *result_leaf = (leaf_t *)roaring_malloc(sizeof(leaf_t)); leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - result_leaf->container = container_andnot( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &result_leaf->typecode); - - if (container_nonzero_cardinality(result_leaf->container, - result_leaf->typecode)) { - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t result_typecode; + container_t *result_container = container_andnot( + get_container(r1, *leaf1), get_typecode(*leaf1), + get_container(r2, leaf2), get_typecode(leaf2), + &result_typecode); + + if (container_nonzero_cardinality(result_container, + result_typecode)) { + leaf_t result_leaf = add_container(result, result_container, + result_typecode); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); } else { - container_free(result_leaf->container, - result_leaf->typecode); - free_leaf(result_leaf); + container_free(result_container, result_typecode); } art_iterator_next(&it1); art_iterator_next(&it2); @@ -1459,8 +1560,9 @@ roaring64_bitmap_t *roaring64_bitmap_andnot(const roaring64_bitmap_t *r1, } if (!it2_present || compare_result < 0) { // Cases 1 and 2a: it1 is the only iterator or is before it2. - leaf_t *result_leaf = copy_leaf_container((leaf_t *)it1.value); - art_insert(&result->art, it1.key, (art_val_t *)result_leaf); + leaf_t result_leaf = + copy_leaf_container(r1, result, (leaf_t)*it1.value); + art_insert(&result->art, it1.key, (art_val_t)result_leaf); art_iterator_next(&it1); } else if (compare_result > 0) { // Case 2c: it1 is after it2. @@ -1480,7 +1582,7 @@ uint64_t roaring64_bitmap_andnot_cardinality(const roaring64_bitmap_t *r1, void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, const roaring64_bitmap_t *r2) { art_iterator_t it1 = art_init_iterator(&r1->art, /*first=*/true); - art_iterator_t it2 = art_init_iterator(&r2->art, /*first=*/true); + art_iterator_t it2 = art_init_iterator((art_t *)&r2->art, /*first=*/true); while (it1.value != NULL) { // Cases: @@ -1496,15 +1598,15 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, if (compare_result == 0) { // Case 2b: iterators at the same high key position. leaf_t *leaf1 = (leaf_t *)it1.value; - leaf_t *leaf2 = (leaf_t *)it2.value; - container_t *container1 = leaf1->container; - uint8_t typecode1 = leaf1->typecode; + leaf_t leaf2 = (leaf_t)*it2.value; + uint8_t typecode1 = get_typecode(*leaf1); + container_t *container1 = get_container(r1, *leaf1); uint8_t typecode2; container_t *container2; - if (leaf1->typecode == SHARED_CONTAINER_TYPE) { + if (typecode1 == SHARED_CONTAINER_TYPE) { container2 = container_andnot( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); if (container2 != container1) { // We only free when doing container_andnot, not // container_iandnot, as iandnot frees the original @@ -1513,19 +1615,19 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, } } else { container2 = container_iandnot( - leaf1->container, leaf1->typecode, leaf2->container, - leaf2->typecode, &typecode2); - } - if (container2 != container1) { - leaf1->container = container2; - leaf1->typecode = typecode2; + container1, typecode1, get_container(r2, leaf2), + get_typecode(leaf2), &typecode2); } if (!container_nonzero_cardinality(container2, typecode2)) { container_free(container2, typecode2); - art_iterator_erase(&r1->art, &it1); - free_leaf(leaf1); + bool erased = art_iterator_erase(&it1, NULL); + assert(erased); + remove_container(r1, *leaf1); } else { + if (container2 != container1) { + replace_container(r1, leaf1, container2, typecode2); + } // Only advance the iterator if we didn't delete the // leaf, as erasing advances by itself. art_iterator_next(&it1); @@ -1544,38 +1646,39 @@ void roaring64_bitmap_andnot_inplace(roaring64_bitmap_t *r1, } /** - * Flips the leaf at high48 in the range [min, max), returning a new leaf with a - * new container. If the high48 key is not found in the existing bitmap, a new - * container is created. Returns null if the negation results in an empty range. + * Flips the leaf at high48 in the range [min, max), adding the result to + * `r2`. If the high48 key is not found in `r1`, a new container is created. */ -static leaf_t *roaring64_flip_leaf(const roaring64_bitmap_t *r, - uint8_t high48[], uint32_t min, - uint32_t max) { - leaf_t *leaf1 = (leaf_t *)art_find(&r->art, high48); - container_t *container2; +static void roaring64_flip_leaf(const roaring64_bitmap_t *r1, + roaring64_bitmap_t *r2, uint8_t high48[], + uint32_t min, uint32_t max) { + leaf_t *leaf1 = (leaf_t *)art_find(&r1->art, high48); uint8_t typecode2; + container_t *container2; if (leaf1 == NULL) { // No container at this key, create a full container. container2 = container_range_of_ones(min, max, &typecode2); } else if (min == 0 && max > 0xFFFF) { // Flip whole container. - container2 = - container_not(leaf1->container, leaf1->typecode, &typecode2); + container2 = container_not(get_container(r1, *leaf1), + get_typecode(*leaf1), &typecode2); } else { // Partially flip a container. - container2 = container_not_range(leaf1->container, leaf1->typecode, min, - max, &typecode2); + container2 = + container_not_range(get_container(r1, *leaf1), get_typecode(*leaf1), + min, max, &typecode2); } if (container_nonzero_cardinality(container2, typecode2)) { - return create_leaf(container2, typecode2); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, high48, (art_val_t)leaf2); + } else { + container_free(container2, typecode2); } - container_free(container2, typecode2); - return NULL; } /** - * Flips the leaf at high48 in the range [min, max). If the high48 key is not - * found in the bitmap, a new container is created. Deletes the leaf and + * Flips the leaf at high48 in the range [min, max). If the high48 key is + * not found in the bitmap, a new container is created. Deletes the leaf and * associated container if the negation results in an empty range. */ static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], @@ -1586,28 +1689,28 @@ static void roaring64_flip_leaf_inplace(roaring64_bitmap_t *r, uint8_t high48[], if (leaf == NULL) { // No container at this key, insert a full container. container2 = container_range_of_ones(min, max, &typecode2); - art_insert(&r->art, high48, - (art_val_t *)create_leaf(container2, typecode2)); + leaf_t new_leaf = add_container(r, container2, typecode2); + art_insert(&r->art, high48, (art_val_t)new_leaf); return; } if (min == 0 && max > 0xFFFF) { // Flip whole container. - container2 = - container_inot(leaf->container, leaf->typecode, &typecode2); + container2 = container_inot(get_container(r, *leaf), + get_typecode(*leaf), &typecode2); } else { // Partially flip a container. - container2 = container_inot_range(leaf->container, leaf->typecode, min, - max, &typecode2); + container2 = container_inot_range( + get_container(r, *leaf), get_typecode(*leaf), min, max, &typecode2); } - leaf->container = container2; - leaf->typecode = typecode2; - - if (!container_nonzero_cardinality(leaf->container, leaf->typecode)) { - art_erase(&r->art, high48); - container_free(leaf->container, leaf->typecode); - free_leaf(leaf); + if (container_nonzero_cardinality(container2, typecode2)) { + replace_container(r, leaf, container2, typecode2); + } else { + bool erased = art_erase(&r->art, high48, NULL); + assert(erased); + container_free(container2, typecode2); + remove_container(r, *leaf); } } @@ -1632,20 +1735,21 @@ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; roaring64_bitmap_t *r2 = roaring64_bitmap_create(); - art_iterator_t it = art_init_iterator(&r1->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r1->art, /*first=*/true); // Copy the containers before min unchanged. while (it.value != NULL && compare_high48(it.key, min_high48_key) < 0) { - leaf_t *leaf1 = (leaf_t *)it.value; - uint8_t typecode2 = leaf1->typecode; + leaf_t leaf1 = (leaf_t)*it.value; + uint8_t typecode2 = get_typecode(leaf1); container_t *container2 = get_copy_of_container( - leaf1->container, &typecode2, /*copy_on_write=*/false); - art_insert(&r2->art, it.key, - (art_val_t *)create_leaf(container2, typecode2)); + get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, it.key, (art_val_t)leaf2); art_iterator_next(&it); } - // Flip the range (including non-existent containers!) between min and max. + // Flip the range (including non-existent containers!) between min and + // max. for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; high48_bits++) { uint8_t current_high48_key[ART_KEY_BYTES]; @@ -1660,22 +1764,19 @@ roaring64_bitmap_t *roaring64_bitmap_flip_closed(const roaring64_bitmap_t *r1, max_container = max_low16 + 1; // Exclusive. } - leaf_t *leaf = roaring64_flip_leaf(r1, current_high48_key, - min_container, max_container); - if (leaf != NULL) { - art_insert(&r2->art, current_high48_key, (art_val_t *)leaf); - } + roaring64_flip_leaf(r1, r2, current_high48_key, min_container, + max_container); } // Copy the containers after max unchanged. - it = art_upper_bound(&r1->art, max_high48_key); + it = art_upper_bound((art_t *)&r1->art, max_high48_key); while (it.value != NULL) { - leaf_t *leaf1 = (leaf_t *)it.value; - uint8_t typecode2 = leaf1->typecode; + leaf_t leaf1 = (leaf_t)*it.value; + uint8_t typecode2 = get_typecode(leaf1); container_t *container2 = get_copy_of_container( - leaf1->container, &typecode2, /*copy_on_write=*/false); - art_insert(&r2->art, it.key, - (art_val_t *)create_leaf(container2, typecode2)); + get_container(r1, leaf1), &typecode2, /*copy_on_write=*/false); + leaf_t leaf2 = add_container(r2, container2, typecode2); + art_insert(&r2->art, it.key, (art_val_t)leaf2); art_iterator_next(&it); } @@ -1700,7 +1801,8 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, uint64_t min_high48_bits = (min & 0xFFFFFFFFFFFF0000ULL) >> 16; uint64_t max_high48_bits = (max & 0xFFFFFFFFFFFF0000ULL) >> 16; - // Flip the range (including non-existent containers!) between min and max. + // Flip the range (including non-existent containers!) between min and + // max. for (uint64_t high48_bits = min_high48_bits; high48_bits <= max_high48_bits; high48_bits++) { uint8_t current_high48_key[ART_KEY_BYTES]; @@ -1722,7 +1824,7 @@ void roaring64_bitmap_flip_closed_inplace(roaring64_bitmap_t *r, uint64_t min, // Returns the number of distinct high 32-bit entries in the bitmap. static inline uint64_t count_high32(const roaring64_bitmap_t *r) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint64_t high32_count = 0; uint32_t prev_high32 = 0; while (it.value != NULL) { @@ -1751,7 +1853,7 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { uint64_t high32_count; size += sizeof(high32_count); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint32_t prev_high32 = 0; roaring_bitmap_t *bitmap32 = NULL; @@ -1760,7 +1862,8 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { uint32_t current_high32 = (uint32_t)(combine_key(it.key, 0) >> 32); if (bitmap32 == NULL || prev_high32 != current_high32) { if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. + // Write as uint32 the most significant 32 bits of the + // bucket. size += sizeof(prev_high32); // Write the 32-bit Roaring bitmaps representing the least @@ -1782,10 +1885,10 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r) { prev_high32 = current_high32; } - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high32 >> 16), leaf->container, - leaf->typecode); + (uint16_t)(current_high32 >> 16), get_container(r, leaf), + get_typecode(leaf)); art_iterator_next(&it); } @@ -1816,7 +1919,7 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, memcpy(buf, &high32_count, sizeof(high32_count)); buf += sizeof(high32_count); - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); uint32_t prev_high32 = 0; roaring_bitmap_t *bitmap32 = NULL; @@ -1826,7 +1929,8 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, uint32_t current_high32 = (uint32_t)(current_high48 >> 32); if (bitmap32 == NULL || prev_high32 != current_high32) { if (bitmap32 != NULL) { - // Write as uint32 the most significant 32 bits of the bucket. + // Write as uint32 the most significant 32 bits of the + // bucket. memcpy(buf, &prev_high32, sizeof(prev_high32)); buf += sizeof(prev_high32); @@ -1849,10 +1953,10 @@ size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r, prev_high32 = current_high32; } - leaf_t *leaf = (leaf_t *)it.value; + leaf_t leaf = (leaf_t)*it.value; ra_append(&bitmap32->high_low_container, - (uint16_t)(current_high48 >> 16), leaf->container, - leaf->typecode); + (uint16_t)(current_high48 >> 16), get_container(r, leaf), + get_typecode(leaf)); art_iterator_next(&it); } @@ -1903,8 +2007,8 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, buf += sizeof(high32); read_bytes += sizeof(high32); - // Read the 32-bit Roaring bitmaps representing the least significant - // bits of a set of elements. + // Read the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( buf, maxbytes - read_bytes); if (bitmap32_size == 0) { @@ -1959,8 +2063,8 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( } previous_high32 = high32; - // Read the 32-bit Roaring bitmaps representing the least significant - // bits of a set of elements. + // Read the 32-bit Roaring bitmaps representing the least + // significant bits of a set of elements. size_t bitmap32_size = roaring_bitmap_portable_deserialize_size( buf, maxbytes - read_bytes); if (bitmap32_size == 0) { @@ -2004,14 +2108,14 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, roaring_iterator64 iterator, void *ptr) { - art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); while (it.value != NULL) { uint64_t high48 = combine_key(it.key, 0); uint64_t high32 = high48 & 0xFFFFFFFF00000000ULL; uint32_t low32 = high48; - leaf_t *leaf = (leaf_t *)it.value; - if (!container_iterate64(leaf->container, leaf->typecode, low32, - iterator, high32, ptr)) { + leaf_t leaf = (leaf_t)*it.value; + if (!container_iterate64(get_container(r, leaf), get_typecode(leaf), + low32, iterator, high32, ptr)) { return false; } art_iterator_next(&it); @@ -2071,12 +2175,12 @@ bool roaring64_iterator_advance(roaring64_iterator_t *it) { if (it->saturated_forward) { return (it->has_value = false); } - roaring64_iterator_init_at(it->parent, it, /*first=*/true); + roaring64_iterator_init_at(it->r, it, /*first=*/true); return it->has_value; } - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_next(leaf->container, leaf->typecode, + if (container_iterator_next(get_container(it->r, leaf), get_typecode(leaf), &it->container_it, &low16)) { it->value = it->high48 | low16; return (it->has_value = true); @@ -2094,12 +2198,12 @@ bool roaring64_iterator_previous(roaring64_iterator_t *it) { // Saturated backward. return (it->has_value = false); } - roaring64_iterator_init_at(it->parent, it, /*first=*/false); + roaring64_iterator_init_at(it->r, it, /*first=*/false); return it->has_value; } - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_prev(leaf->container, leaf->typecode, + if (container_iterator_prev(get_container(it->r, leaf), get_typecode(leaf), &it->container_it, &low16)) { it->value = it->high48 | low16; return (it->has_value = true); @@ -2117,8 +2221,8 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, uint16_t val_low16 = split_key(val, val_high48); if (!it->has_value || it->high48 != (val & 0xFFFFFFFFFFFF0000)) { // The ART iterator is before or after the high48 bits of `val` (or - // beyond the ART altogether), so we need to move to a leaf with a key - // equal or greater. + // beyond the ART altogether), so we need to move to a leaf with a + // key equal or greater. if (!art_iterator_lower_bound(&it->art_it, val_high48)) { // Only smaller keys found. it->saturated_forward = true; @@ -2129,13 +2233,13 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, } if (it->high48 == (val & 0xFFFFFFFFFFFF0000)) { - // We're at equal high bits, check if a suitable value can be found in - // this container. - leaf_t *leaf = (leaf_t *)it->art_it.value; + // We're at equal high bits, check if a suitable value can be found + // in this container. + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = (uint16_t)it->value; - if (container_iterator_lower_bound(leaf->container, leaf->typecode, - &it->container_it, &low16, - val_low16)) { + if (container_iterator_lower_bound( + get_container(it->r, leaf), get_typecode(leaf), + &it->container_it, &low16, val_low16)) { it->value = it->high48 | low16; return (it->has_value = true); } @@ -2146,8 +2250,8 @@ bool roaring64_iterator_move_equalorlarger(roaring64_iterator_t *it, } } - // We're at a leaf with high bits greater than `val`, so the first entry in - // this container is our result. + // We're at a leaf with high bits greater than `val`, so the first entry + // in this container is our result. return roaring64_iterator_init_at_leaf_first(it); } @@ -2156,15 +2260,15 @@ uint64_t roaring64_iterator_read(roaring64_iterator_t *it, uint64_t *buf, uint64_t consumed = 0; while (it->has_value && consumed < count) { uint32_t container_consumed; - leaf_t *leaf = (leaf_t *)it->art_it.value; + leaf_t leaf = (leaf_t)*it->art_it.value; uint16_t low16 = (uint16_t)it->value; uint32_t container_count = UINT32_MAX; if (count - consumed < (uint64_t)UINT32_MAX) { container_count = count - consumed; } bool has_value = container_iterator_read_into_uint64( - leaf->container, leaf->typecode, &it->container_it, it->high48, buf, - container_count, &container_consumed, &low16); + get_container(it->r, leaf), get_typecode(leaf), &it->container_it, + it->high48, buf, container_count, &container_consumed, &low16); consumed += container_consumed; buf += container_consumed; if (has_value) { From ac3d16fc929bd4ce429dfa8d6394a87595fa4ead Mon Sep 17 00:00:00 2001 From: Soerian Date: Wed, 1 Jan 2025 14:39:10 +0000 Subject: [PATCH 03/16] ART serialization --- include/roaring/art/art.h | 28 +++++++ src/art/art.c | 158 ++++++++++++++++++++++++++++++++++++++ tests/art_unit.cpp | 76 ++++++++++++++++++ 3 files changed, 262 insertions(+) diff --git a/include/roaring/art/art.h b/include/roaring/art/art.h index e3c0257a6..7a9e4eb25 100644 --- a/include/roaring/art/art.h +++ b/include/roaring/art/art.h @@ -208,6 +208,34 @@ void art_iterator_insert(art_iterator_t *iterator, const art_key_chunk_t *key, */ bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val); +/** + * Shrinks the internal arrays in the ART to remove any unused elements. Returns + * the number of bytes freed. + */ +size_t art_shrink_to_fit(art_t *art); + +/** + * Returns the serialized size in bytes. + * Requires `art_shrink_to_fit` to be called first. + */ +size_t art_size_in_bytes(const art_t *art); + +/** + * Serializes the ART and returns the number of bytes written. Returns 0 on + * error. Requires `art_shrink_to_fit` to be called first. + */ +size_t art_serialize(const art_t *art, char *buf); + +/** + * Deserializes the ART from a serialized buffer, reading up to `maxbytes` + * bytes. Returns 0 on error. Requires `buf` to be 8 byte aligned. + * + * An ART deserialized in this way should only be used in a readonly context.The + * underlying buffer must not be freed before the ART. `art_free` should not be + * called on the ART deserialized in this way. + */ +size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art); + #ifdef __cplusplus } // extern "C" } // namespace roaring diff --git a/src/art/art.c b/src/art/art.c index 6a96531a4..24d8aa8a3 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -1,4 +1,5 @@ #include +#include #include #include @@ -2291,6 +2292,163 @@ bool art_internal_validate(const art_t *art, const char **reason, return art_internal_validate_at(art, art->root, validator); } +_Static_assert(alignof(art_leaf_t) == alignof(art_node4_t), + "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node16_t), + "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node48_t), + "Serialization assumes node type alignment is equal"); +_Static_assert(alignof(art_leaf_t) == alignof(art_node256_t), + "Serialization assumes node type alignment is equal"); + +size_t art_size_in_bytes(const art_t *art) { + // Root. + size_t size = sizeof(art->root); + // Node counts. + size += sizeof(art->capacities); + // Alignment for leaves. The rest of the nodes are aligned the same way. + size += + ((size + alignof(art_leaf_t) - 1) & ~(alignof(art_leaf_t) - 1)) - size; + size += art->capacities[CROARING_ART_LEAF_TYPE] * sizeof(art_leaf_t); + size += art->capacities[CROARING_ART_NODE4_TYPE] * sizeof(art_node4_t); + size += art->capacities[CROARING_ART_NODE16_TYPE] * sizeof(art_node16_t); + size += art->capacities[CROARING_ART_NODE48_TYPE] * sizeof(art_node48_t); + size += art->capacities[CROARING_ART_NODE256_TYPE] * sizeof(art_node256_t); + return size; +} + +size_t art_serialize(const art_t *art, char *buf) { + if (buf == NULL) { + return 0; + } + const char *initial_buf = buf; + + // Root. + memcpy(buf, &art->root, sizeof(art->root)); + buf += sizeof(art->root); + + // Node counts. + memcpy(buf, art->capacities, sizeof(art->capacities)); + buf += sizeof(art->capacities); + + if (art->capacities[CROARING_ART_LEAF_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_LEAF_TYPE] * sizeof(art_leaf_t); + // Alignment for leaves. The rest of the nodes are aligned the same way. + buf = + CROARING_ART_ALIGN_RELATIVE(buf, initial_buf, alignof(art_leaf_t)); + memcpy(buf, art->leaves, size); + buf += size; + } + if (art->capacities[CROARING_ART_NODE4_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_NODE4_TYPE] * sizeof(art_node4_t); + memcpy(buf, art->node4s, size); + buf += size; + } + if (art->capacities[CROARING_ART_NODE16_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_NODE16_TYPE] * sizeof(art_node16_t); + memcpy(buf, art->node16s, size); + buf += size; + } + if (art->capacities[CROARING_ART_NODE48_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_NODE48_TYPE] * sizeof(art_node48_t); + memcpy(buf, art->node48s, size); + buf += size; + } + if (art->capacities[CROARING_ART_NODE256_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_NODE256_TYPE] * sizeof(art_node256_t); + memcpy(buf, art->node256s, size); + buf += size; + } + + return buf - initial_buf; +} + +size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art) { + if (buf == NULL || art == NULL) { + return 0; + } + const char *initial_buf = buf; + art_init_cleared(art); + + if (maxbytes < sizeof(art->root)) { + return 0; + } + memcpy(&art->root, buf, sizeof(art->root)); + buf += sizeof(art->root); + maxbytes -= sizeof(art->root); + + if (maxbytes < sizeof(art->capacities)) { + return 0; + } + _Static_assert(sizeof(art->first_free) == sizeof(art->capacities), + "first_free is read from capacities"); + memcpy(art->first_free, buf, sizeof(art->capacities)); + memcpy(art->capacities, buf, sizeof(art->capacities)); + buf += sizeof(art->capacities); + maxbytes -= sizeof(art->capacities); + + if (art->capacities[CROARING_ART_LEAF_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_LEAF_TYPE] * sizeof(art_leaf_t); + const char *before_align = buf; + // Alignment for leaves. The rest of the nodes are aligned the same way. + buf = CROARING_ART_ALIGN_BUF(buf, alignof(art_leaf_t)); + if (maxbytes < (buf - before_align) + size) { + return 0; + } + art->leaves = (art_leaf_t *)buf; + buf += size; + maxbytes -= (buf - before_align); + } + if (art->capacities[CROARING_ART_NODE4_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_NODE4_TYPE] * sizeof(art_node4_t); + if (maxbytes < size) { + return 0; + } + art->node4s = (art_node4_t *)buf; + buf += size; + maxbytes -= size; + } + if (art->capacities[CROARING_ART_NODE16_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_NODE16_TYPE] * sizeof(art_node16_t); + if (maxbytes < size) { + return 0; + } + art->node16s = (art_node16_t *)buf; + buf += size; + maxbytes -= size; + } + if (art->capacities[CROARING_ART_NODE48_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_NODE48_TYPE] * sizeof(art_node48_t); + if (maxbytes < size) { + return 0; + } + art->node48s = (art_node48_t *)buf; + buf += size; + maxbytes -= size; + } + if (art->capacities[CROARING_ART_NODE256_TYPE] > 0) { + size_t size = + art->capacities[CROARING_ART_NODE256_TYPE] * sizeof(art_node256_t); + if (maxbytes < size) { + art_free(art); + return 0; + } + art->node256s = (art_node256_t *)buf; + buf += size; + maxbytes -= size; + } + return buf - initial_buf; +} + #ifdef __cplusplus } // extern "C" } // namespace roaring diff --git a/tests/art_unit.cpp b/tests/art_unit.cpp index 104465b83..e3f3ffc3f 100644 --- a/tests/art_unit.cpp +++ b/tests/art_unit.cpp @@ -9,6 +9,7 @@ #include #include +#include #include "test.h" @@ -654,6 +655,80 @@ DEFINE_TEST(test_art_shrink_grow_node48) { art_free(&art); } +DEFINE_TEST(test_art_frozen_view) { + { + // ART with multiple node sizes. + std::vector> keys; + std::vector values; + std::vector sizes = {4, 16, 48, 256}; + for (size_t i = 0; i < sizes.size(); i++) { + size_t size = sizes[i]; + for (size_t j = 0; j < size; j++) { + keys.push_back({0, 0, 0, static_cast(i), + static_cast(j)}); + values.push_back(i * j); + } + } + art_t art1; + art_init_cleared(&art1); + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art1, (art_key_chunk_t*)keys[i].data(), values[i]); + assert_art_valid(&art1); + } + + size_t serialized_size = art_size_in_bytes(&art1); + char* buf = (char*)roaring_aligned_malloc(8, serialized_size); + assert_int_equal(art_serialize(&art1, buf), serialized_size); + art_free(&art1); + + art_t art2; + assert_int_equal(art_frozen_view(buf, serialized_size, &art2), + serialized_size); + + art_iterator_t iterator = art_init_iterator(&art2, true); + size_t i = 0; + do { + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i].data()); + assert_true(*iterator.value == values[i]); + ++i; + } while (art_iterator_next(&iterator)); + roaring_aligned_free(buf); + } + { + // Max-depth ART. + std::vector> keys{ + {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 1, 0}, + {0, 0, 0, 1, 0, 0}, {0, 0, 1, 0, 0, 0}, {0, 1, 0, 0, 0, 0}, + {1, 0, 0, 0, 0, 0}, + }; + std::vector values = {0, 1, 2, 3, 4, 5, 6}; + art_t art1; + art_init_cleared(&art1); + for (size_t i = 0; i < keys.size(); ++i) { + art_insert(&art1, (art_key_chunk_t*)keys[i].data(), values[i]); + assert_art_valid(&art1); + } + + size_t serialized_size = art_size_in_bytes(&art1); + char* buf = (char*)roaring_aligned_malloc(8, serialized_size); + assert_int_equal(art_serialize(&art1, buf), serialized_size); + art_free(&art1); + + art_t art2; + assert_int_equal(art_frozen_view(buf, serialized_size, &art2), + serialized_size); + + art_iterator_t iterator = art_init_iterator(&art2, true); + size_t i = 0; + do { + assert_key_eq(iterator.key, (art_key_chunk_t*)keys[i].data()); + assert_true(*iterator.value == values[i]); + ++i; + } while (art_iterator_next(&iterator)); + roaring_aligned_free(buf); + } +} + } // namespace int main() { @@ -670,6 +745,7 @@ int main() { cmocka_unit_test(test_art_iterator_insert), cmocka_unit_test(test_art_shadowed), cmocka_unit_test(test_art_shrink_grow_node48), + cmocka_unit_test(test_art_frozen_view), }; return cmocka_run_group_tests(tests, NULL, NULL); } From 6c2eb797c753204ec014383c654cf254c497c6c2 Mon Sep 17 00:00:00 2001 From: Soerian Date: Wed, 1 Jan 2025 14:39:47 +0000 Subject: [PATCH 04/16] r64 frozen serialization --- include/roaring/roaring64.h | 53 +++++ src/roaring64.c | 386 +++++++++++++++++++++++++++++++++++- tests/roaring64_unit.cpp | 42 ++++ 3 files changed, 479 insertions(+), 2 deletions(-) diff --git a/include/roaring/roaring64.h b/include/roaring/roaring64.h index 5001bf880..3506918e6 100644 --- a/include/roaring/roaring64.h +++ b/include/roaring/roaring64.h @@ -312,6 +312,12 @@ uint64_t roaring64_bitmap_maximum(const roaring64_bitmap_t *r); */ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r); +/** + * Shrinks internal arrays to eliminate any unused capacity. Returns the number + * of bytes freed. + */ +size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r); + /** * (For advanced users.) * Collect statistics about the bitmap @@ -564,6 +570,53 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf, roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes); +/** + * Returns the number of bytes required to serialize this bitmap in a "frozen" + * format. This is not compatible with any other serialization formats. + * + * `roaring64_bitmap_shrink_to_fit()` must be called before this method. + */ +size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r); + +/** + * Serializes the bitmap in a "frozen" format. The given buffer must be at least + * `roaring64_bitmap_frozen_size_in_bytes()` in size. Returns the number of + * bytes used for serialization. + * + * `roaring64_bitmap_shrink_to_fit()` must be called before this method. + * + * The frozen format is optimized for speed of (de)serialization, as well as + * allowing the user to create a bitmap based on a memory mapped file, which is + * possible because the format mimics the memory layout of the bitmap. + * + * Because the format mimics the memory layout of the bitmap, the format is not + * fixed across releases of Roaring Bitmaps, and may change in future releases. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. + */ +size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, + char *buf); + +/** + * Creates a readonly bitmap that is a view of the given buffer. The buffer + * should be created with `roaring64_bitmap_frozen_serialize()`, and must be + * aligned by 64 bytes. + * + * Returns NULL if deserialization fails. + * + * The returned bitmap must only be used in a readonly manner. The bitmap must + * be freed using `roaring64_bitmap_free()` as normal. The backing buffer must + * only be freed after the bitmap. + * + * This function is endian-sensitive. If you have a big-endian system (e.g., a + * mainframe IBM s390x), the data format is going to be big-endian and not + * compatible with little-endian systems. + */ +roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, + size_t maxbytes); + /** * Iterate over the bitmap elements. The function `iterator` is called once for * all the values with `ptr` (can be NULL) as the second parameter of each call. diff --git a/src/roaring64.c b/src/roaring64.c index d1ca33310..121dbdecc 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -8,11 +9,25 @@ #include // For serialization / deserialization +#include +#include +#include #include #include // containers.h last to avoid conflict with ROARING_CONTAINER_T. #include +#define CROARING_ALIGN_BUF(buf, alignment) \ + (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) + +#define CROARING_ALIGN_RELATIVE(buf_cur, buf_start, alignment) \ + (char *)((buf_start) + \ + (((ptrdiff_t)((buf_cur) - (buf_start)) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1)))) + +#define CROARING_BITSET_ALIGNMENT 64 + #ifdef __cplusplus using namespace ::roaring::internal; @@ -54,6 +69,10 @@ typedef struct roaring64_iterator_s { bool saturated_forward; } roaring64_iterator_t; +static inline bool is_frozen64(const roaring64_bitmap_t *r) { + return r->flags & ROARING_FLAG_FROZEN; +} + // Splits the given uint64 key into high 48 bit and low 16 bit components. // Expects high48_out to be of length ART_KEY_BYTES. static inline uint16_t split_key(uint64_t key, uint8_t high48_out[]) { @@ -219,10 +238,18 @@ void roaring64_bitmap_free(roaring64_bitmap_t *r) { art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); while (it.value != NULL) { leaf_t leaf = (leaf_t)*it.value; - container_free(get_container(r, leaf), get_typecode(leaf)); + if (is_frozen64(r)) { + // Only free the container itself, not the buffer-backed contents + // within. + roaring_free(get_container(r, leaf)); + } else { + container_free(get_container(r, leaf), get_typecode(leaf)); + } art_iterator_next(&it); } - art_free(&r->art); + if (!is_frozen64(r)) { + art_free(&r->art); + } roaring_free(r->containers); roaring_free(r); } @@ -923,6 +950,37 @@ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { return has_run_container; } +static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { + size_t idx = get_index(*leaf); + if (idx < r->first_free) { + return; + } + r->containers[r->first_free] = get_container(r, *leaf); + r->containers[idx] = NULL; + *leaf = create_leaf(r->first_free, get_typecode(*leaf)); + r->first_free = next_free_container_idx(r); +} + +size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { + size_t freed = art_shrink_to_fit(&r->art); + art_iterator_t it = art_init_iterator(&r->art, true); + while (it.value != NULL) { + leaf_t *leaf = (leaf_t *)it.value; + freed += container_shrink_to_fit(get_container(r, *leaf), + get_typecode(*leaf)); + move_to_shrink(r, leaf); + art_iterator_next(&it); + } + size_t new_capacity = r->first_free; + if (new_capacity < r->capacity) { + r->containers = roaring_realloc(r->containers, + new_capacity * sizeof(container_t *)); + freed += r->capacity - new_capacity; + r->capacity = new_capacity; + } + return freed; +} + /** * (For advanced users.) * Collect statistics about the bitmap @@ -2106,6 +2164,330 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( return r; } +// Returns an "element count" for the given container. This has a different +// meaning for each container type, but the purpose is the minimal information +// required to serialize the container metadata. +static inline uint32_t container_get_element_count(const container_t *c, + uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + return ((bitset_container_t *)c)->cardinality; + } + case ARRAY_CONTAINER_TYPE: { + return ((array_container_t *)c)->cardinality; + } + case RUN_CONTAINER_TYPE: { + return ((run_container_t *)c)->n_runs; + } + default: { + assert(false); + roaring_unreachable; + return 0; + } + } +} + +static inline size_t container_get_frozen_size(const container_t *c, + uint8_t typecode) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + return BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); + } + case ARRAY_CONTAINER_TYPE: { + return container_get_element_count(c, typecode) * sizeof(uint16_t); + } + case RUN_CONTAINER_TYPE: { + return container_get_element_count(c, typecode) * sizeof(rle16_t); + } + default: { + assert(false); + roaring_unreachable; + return 0; + } + } +} + +size_t align_size(size_t size, size_t alignment) { + return (size + alignment - 1) & ~(alignment - 1); +} + +size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { + // Flags. + size_t size = sizeof(r->flags); + // Container count. + size += sizeof(r->capacity); + // Container element counts. + size += r->capacity * sizeof(uint16_t); + // Total container sizes. + size += 3 * sizeof(size_t); + // ART (8 byte aligned). + size = align_size(size, 8); + size += art_size_in_bytes(&r->art); + // Containers (aligned). + size = align_size(size, CROARING_BITSET_ALIGNMENT); + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + size += container_get_frozen_size(get_container(r, leaf), typecode); + art_iterator_next(&it); + } + // Padding to make overall size a multiple of required alignment. + size = align_size(size, CROARING_BITSET_ALIGNMENT); + return size; +} + +static inline void container_frozen_serialize(const container_t *container, + uint8_t typecode, + uint64_t **bitsets, + uint16_t **arrays, + rle16_t **runs) { + size_t size = container_get_frozen_size(container, typecode); + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *bitset = (bitset_container_t *)container; + memcpy(*bitsets, bitset->words, size); + *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; + break; + } + case ARRAY_CONTAINER_TYPE: { + array_container_t *array = (array_container_t *)container; + memcpy(*arrays, array->array, size); + *arrays += container_get_element_count(container, typecode); + break; + } + case RUN_CONTAINER_TYPE: { + run_container_t *run = (run_container_t *)container; + memcpy(*runs, run->runs, size); + *runs += container_get_element_count(container, typecode); + break; + } + default: { + assert(false); + roaring_unreachable; + } + } +} + +size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, + char *buf) { + if (buf == NULL) { + return 0; + } + const char *initial_buf = buf; + + // Flags. + memcpy(buf, &r->flags, sizeof(r->flags)); + buf += sizeof(r->flags); + + // Container count. + memcpy(buf, &r->capacity, sizeof(r->capacity)); + buf += sizeof(r->capacity); + + // Container element counts. + size_t total_sizes[4] = CROARING_ZERO_INITIALIZER; // Indexed by typecode. + art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); + + uint32_t elem_count = container_get_element_count(container, typecode); + uint16_t compressed_elem_count = (uint16_t)(elem_count - 1); + memcpy(buf, &compressed_elem_count, sizeof(compressed_elem_count)); + buf += sizeof(compressed_elem_count); + + total_sizes[typecode] += container_get_frozen_size(container, typecode); + art_iterator_next(&it); + } + + // Total container sizes. + memcpy(buf, &(total_sizes[BITSET_CONTAINER_TYPE]), sizeof(size_t)); + buf += sizeof(size_t); + memcpy(buf, &(total_sizes[RUN_CONTAINER_TYPE]), sizeof(size_t)); + buf += sizeof(size_t); + memcpy(buf, &(total_sizes[ARRAY_CONTAINER_TYPE]), sizeof(size_t)); + buf += sizeof(size_t); + + // ART. + buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, 8); + buf += art_serialize(&r->art, buf); + + // Containers (aligned). + // Runs before arrays as run elements are larger than array elements and + // smaller than bitset elements. + buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, CROARING_BITSET_ALIGNMENT); + uint64_t *bitsets = (uint64_t *)buf; + buf += total_sizes[BITSET_CONTAINER_TYPE]; + buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, alignof(rle16_t)); + rle16_t *runs = (rle16_t *)buf; + buf += total_sizes[RUN_CONTAINER_TYPE]; + buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, alignof(uint16_t)); + uint16_t *arrays = (uint16_t *)buf; + buf += total_sizes[ARRAY_CONTAINER_TYPE]; + + it = art_init_iterator((art_t *)&r->art, /*first=*/true); + while (it.value != NULL) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + container_t *container = get_container(r, leaf); + container_frozen_serialize(container, typecode, &bitsets, &arrays, + &runs); + art_iterator_next(&it); + } + + // Padding to make overall size a multiple of required alignment. + buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, CROARING_BITSET_ALIGNMENT); + + return buf - initial_buf; +} + +static container_t *container_frozen_view(uint8_t typecode, uint32_t elem_count, + const uint64_t **bitsets, + const uint16_t **arrays, + const rle16_t **runs) { + switch (typecode) { + case BITSET_CONTAINER_TYPE: { + bitset_container_t *c = (bitset_container_t *)roaring_malloc( + sizeof(bitset_container_t)); + c->cardinality = elem_count; + c->words = (uint64_t *)*bitsets; + *bitsets += BITSET_CONTAINER_SIZE_IN_WORDS; + return (container_t *)c; + } + case ARRAY_CONTAINER_TYPE: { + array_container_t *c = + (array_container_t *)roaring_malloc(sizeof(array_container_t)); + c->cardinality = elem_count; + c->capacity = elem_count; + c->array = (uint16_t *)*arrays; + *arrays += elem_count; + return (container_t *)c; + } + case RUN_CONTAINER_TYPE: { + run_container_t *c = + (run_container_t *)roaring_malloc(sizeof(run_container_t)); + c->n_runs = elem_count; + c->capacity = elem_count; + c->runs = (rle16_t *)*runs; + *runs += elem_count; + return (container_t *)c; + } + default: { + assert(false); + roaring_unreachable; + return NULL; + } + } +} + +roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, + size_t maxbytes) { + if (buf == NULL) { + return NULL; + } + + roaring64_bitmap_t *r = roaring64_bitmap_create(); + + // Flags. + if (maxbytes < sizeof(r->flags)) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&r->flags, buf, sizeof(r->flags)); + buf += sizeof(r->flags); + maxbytes -= sizeof(r->flags); + r->flags |= ROARING_FLAG_FROZEN; + + // Container count. + if (maxbytes < sizeof(r->capacity)) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&r->capacity, buf, sizeof(r->capacity)); + buf += sizeof(r->capacity); + maxbytes -= sizeof(r->capacity); + + r->containers = + (container_t *)roaring_malloc(r->capacity * sizeof(container_t *)); + + // Container element counts. + if (maxbytes < r->capacity * sizeof(uint16_t)) { + roaring64_bitmap_free(r); + return NULL; + } + const char *elem_counts = buf; + buf += r->capacity * sizeof(uint16_t); + maxbytes -= r->capacity * sizeof(uint16_t); + + // Total container sizes. + size_t total_sizes[4]; + if (maxbytes < sizeof(size_t) * 3) { + roaring64_bitmap_free(r); + return NULL; + } + memcpy(&(total_sizes[BITSET_CONTAINER_TYPE]), buf, sizeof(size_t)); + buf += sizeof(size_t); + maxbytes -= sizeof(size_t); + memcpy(&(total_sizes[RUN_CONTAINER_TYPE]), buf, sizeof(size_t)); + buf += sizeof(size_t); + maxbytes -= sizeof(size_t); + memcpy(&(total_sizes[ARRAY_CONTAINER_TYPE]), buf, sizeof(size_t)); + buf += sizeof(size_t); + maxbytes -= sizeof(size_t); + + // ART (8 byte aligned). + buf = CROARING_ALIGN_BUF(buf, 8); + size_t art_size = art_frozen_view(buf, maxbytes, &r->art); + if (art_size == 0) { + roaring64_bitmap_free(r); + return NULL; + } + buf += art_size; + maxbytes -= art_size; + + // Containers (aligned). + const char *before_containers = buf; + buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); + const uint64_t *bitsets = (const uint64_t *)buf; + buf += total_sizes[BITSET_CONTAINER_TYPE]; + buf = CROARING_ALIGN_BUF(buf, alignof(rle16_t)); + const rle16_t *runs = (const rle16_t *)buf; + buf += total_sizes[RUN_CONTAINER_TYPE]; + buf = CROARING_ALIGN_BUF(buf, alignof(uint16_t)); + const uint16_t *arrays = (const uint16_t *)buf; + buf += total_sizes[ARRAY_CONTAINER_TYPE]; + if (maxbytes < (size_t)(buf - before_containers)) { + roaring64_bitmap_free(r); + return NULL; + } + maxbytes -= buf - before_containers; + + // Deserialize in ART iteration order. + art_iterator_t it = art_init_iterator(&r->art, /*first=*/true); + for (size_t i = 0; it.value != NULL; ++i) { + leaf_t leaf = (leaf_t)*it.value; + uint8_t typecode = get_typecode(leaf); + + uint16_t compressed_elem_count; + memcpy(&compressed_elem_count, elem_counts + (i * sizeof(uint16_t)), + sizeof(compressed_elem_count)); + uint32_t elem_count = (uint32_t)(compressed_elem_count) + 1; + + // The container index is unrelated to the iteration order. + size_t index = get_index(leaf); + r->containers[index] = container_frozen_view(typecode, elem_count, + &bitsets, &arrays, &runs); + + art_iterator_next(&it); + } + + // Padding to make overall size a multiple of required alignment. + buf = CROARING_ALIGN_BUF(buf, CROARING_BITSET_ALIGNMENT); + + return r; +} + bool roaring64_bitmap_iterate(const roaring64_bitmap_t *r, roaring_iterator64 iterator, void *ptr) { art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); diff --git a/tests/roaring64_unit.cpp b/tests/roaring64_unit.cpp index 935c1bbb0..373d86214 100644 --- a/tests/roaring64_unit.cpp +++ b/tests/roaring64_unit.cpp @@ -1536,6 +1536,47 @@ DEFINE_TEST(test_portable_serialize) { roaring64_bitmap_free(r); } +void check_frozen_serialization(roaring64_bitmap_t* r1) { + roaring64_bitmap_shrink_to_fit(r1); + assert_r64_valid(r1); + + // Serialize to an unaligned buffer, deserialize from an 32-byte aligned + // buffer. + size_t serialized_size = roaring64_bitmap_frozen_size_in_bytes(r1); + char* buf = (char*)roaring_aligned_malloc(64, serialized_size + 1); + size_t serialized = roaring64_bitmap_frozen_serialize(r1, buf + 1); + assert_int_equal(serialized, serialized_size); + memmove(buf, buf + 1, serialized_size); + + roaring64_bitmap_t* r2 = roaring64_bitmap_frozen_view(buf, serialized_size); + assert_true(r2 != NULL); + assert_r64_valid(r2); + assert_true(roaring64_bitmap_equals(r2, r1)); + + roaring64_bitmap_free(r2); + roaring_aligned_free(buf); +} + +DEFINE_TEST(test_frozen_serialize) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + + check_frozen_serialization(r); + + roaring64_bitmap_add(r, 0); + roaring64_bitmap_add(r, 1); + roaring64_bitmap_add(r, 1ULL << 16); + roaring64_bitmap_add(r, 1ULL << 32); + roaring64_bitmap_add(r, 1ULL << 48); + roaring64_bitmap_add(r, 1ULL << 60); + roaring64_bitmap_add(r, UINT64_MAX); + check_frozen_serialization(r); + + roaring64_bitmap_add_range(r, 1ULL << 16, 1ULL << 32); + check_frozen_serialization(r); + + roaring64_bitmap_free(r); +} + bool roaring_iterator64_sumall(uint64_t value, void* param) { *(uint64_t*)param += value; return true; @@ -1978,6 +2019,7 @@ int main() { cmocka_unit_test(test_flip), cmocka_unit_test(test_flip_inplace), cmocka_unit_test(test_portable_serialize), + cmocka_unit_test(test_frozen_serialize), cmocka_unit_test(test_iterate), cmocka_unit_test(test_to_uint64_array), cmocka_unit_test(test_iterator_create), From c669fb022d73cb42dbb3c9d4282f1fe74aa6685e Mon Sep 17 00:00:00 2001 From: Soerian Date: Wed, 1 Jan 2025 14:40:15 +0000 Subject: [PATCH 05/16] Synthetic benchmarks for r64 --- microbenchmarks/CMakeLists.txt | 4 + microbenchmarks/synthetic_bench.cpp | 472 ++++++++++++++++++++++++++++ 2 files changed, 476 insertions(+) create mode 100644 microbenchmarks/synthetic_bench.cpp diff --git a/microbenchmarks/CMakeLists.txt b/microbenchmarks/CMakeLists.txt index 04ba24c7a..2c51c7041 100644 --- a/microbenchmarks/CMakeLists.txt +++ b/microbenchmarks/CMakeLists.txt @@ -25,3 +25,7 @@ add_executable(bench bench.cpp) target_link_libraries(bench PRIVATE roaring) target_link_libraries(bench PRIVATE benchmark::benchmark) target_compile_definitions(bench PRIVATE BENCHMARK_DATA_DIR="${BENCHMARK_DATA_DIR}") + +add_executable(synthetic_bench synthetic_bench.cpp) +target_link_libraries(synthetic_bench PRIVATE roaring) +target_link_libraries(synthetic_bench PRIVATE benchmark::benchmark) diff --git a/microbenchmarks/synthetic_bench.cpp b/microbenchmarks/synthetic_bench.cpp new file mode 100644 index 000000000..22d409577 --- /dev/null +++ b/microbenchmarks/synthetic_bench.cpp @@ -0,0 +1,472 @@ +#include +#include +#include + +#include "performancecounters/event_counter.h" +#include "roaring/roaring64.h" +#include "roaring64map.hh" + +namespace roaring { + +const auto kCountAndDensityRange = { + benchmark::CreateRange(1000, 1000000, /*multi=*/10), + benchmark::CreateRange(1, uint64_t{1} << 48, + /*multi=*/256)}; + +// Bitmasks with 20 bits set, spread out over: 20, 32, 48, 64 bits. +// +// These bitmasks make it so that the set size is bounded, and the hit rate is +// high, while also changing density at different bit orders. With 2^20 random +// elements inserted, the hit rate is ~63% due to the overlap in elements +// inserted. +constexpr std::array kBitmasks = { + // 20 bit spread + 0x00000000000FFFFF, + 0x0000000FFFFF0000, + 0x000FFFFF00000000, + 0xFFFFF00000000000, + // 32 bit spread + 0x000000005DBFC83E, + 0x00005DBFC83E0000, + 0x5DBFC83E00000000, + // 48 bit spread + 0x0000493B189604B6, + 0x493B189604B60000, + // 64 bit spread + 0x420C684950A2D088, +}; + +std::random_device rd; +std::mt19937 gen(rd()); + +uint64_t randUint64() { + return std::uniform_int_distribution( + std::numeric_limits::min(), + std::numeric_limits::max())(gen); +} + +static void r64ContainsHit(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + roaring64_bitmap_add(r, val); + } + size_t i = 0; + for (auto _ : state) { + uint64_t val = i * step; + i = (i + 1) % count; + benchmark::DoNotOptimize(roaring64_bitmap_contains(r, val)); + } + roaring64_bitmap_free(r); +} +BENCHMARK(r64ContainsHit)->ArgsProduct({kCountAndDensityRange}); + +static void cppContainsHit(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + Roaring64Map r; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + r.add(val); + } + size_t i = 0; + for (auto _ : state) { + uint64_t val = i * step; + i = (i + 1) % count; + benchmark::DoNotOptimize(r.contains(val)); + } +} +BENCHMARK(cppContainsHit)->ArgsProduct({kCountAndDensityRange}); + +static void setContainsHit(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + std::set set; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + set.insert(val); + } + size_t i = 0; + for (auto _ : state) { + uint64_t val = i * step; + i = (i + 1) % count; + benchmark::DoNotOptimize(set.find(val) != set.end()); + } +} +BENCHMARK(setContainsHit)->ArgsProduct({kCountAndDensityRange}); + +static void r64ContainsMiss(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + roaring64_bitmap_add(r, val); + } + size_t i = 0; + for (auto _ : state) { + uint64_t val = (i + 1) * step - 1; + i = (i + 1) % count; + benchmark::DoNotOptimize(roaring64_bitmap_contains(r, val)); + } + roaring64_bitmap_free(r); +} +BENCHMARK(r64ContainsMiss)->ArgsProduct({kCountAndDensityRange}); + +static void cppContainsMiss(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + Roaring64Map r; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + r.add(val); + } + size_t i = 0; + for (auto _ : state) { + uint64_t val = (i + 1) * step - 1; + i = (i + 1) % count; + benchmark::DoNotOptimize(r.contains(val)); + } +} +BENCHMARK(cppContainsMiss)->ArgsProduct({kCountAndDensityRange}); + +static void setContainsMiss(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + std::set set; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + set.insert(val); + } + size_t i = 0; + for (auto _ : state) { + uint64_t val = (i + 1) * step - 1; + i = (i + 1) % count; + benchmark::DoNotOptimize(set.find(val) != set.end()); + } +} +BENCHMARK(setContainsMiss)->ArgsProduct({kCountAndDensityRange}); + +static void r64ContainsRandom(benchmark::State& state) { + uint64_t bitmask = kBitmasks[state.range(0)]; + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (size_t i = 0; i < (1 << 20); ++i) { + uint64_t val = randUint64() & bitmask; + roaring64_bitmap_add(r, val); + } + for (auto _ : state) { + uint64_t val = randUint64() & bitmask; + benchmark::DoNotOptimize(roaring64_bitmap_contains(r, val)); + } + roaring64_bitmap_free(r); +} +BENCHMARK(r64ContainsRandom)->DenseRange(0, kBitmasks.size() - 1, 1); + +static void cppContainsRandom(benchmark::State& state) { + uint64_t bitmask = kBitmasks[state.range(0)]; + Roaring64Map r; + for (size_t i = 0; i < (1 << 20); ++i) { + uint64_t val = randUint64() & bitmask; + r.add(val); + } + for (auto _ : state) { + uint64_t val = randUint64() & bitmask; + benchmark::DoNotOptimize(r.contains(val)); + } +} +BENCHMARK(cppContainsRandom)->DenseRange(0, kBitmasks.size() - 1, 1); + +static void setContainsRandom(benchmark::State& state) { + uint64_t bitmask = kBitmasks[state.range(0)]; + std::set set; + for (size_t i = 0; i < (1 << 20); ++i) { + uint64_t val = randUint64() & bitmask; + set.insert(val); + } + for (auto _ : state) { + uint64_t val = randUint64() & bitmask; + benchmark::DoNotOptimize(set.find(val) != set.end()); + } +} +BENCHMARK(setContainsRandom)->DenseRange(0, kBitmasks.size() - 1, 1); + +static void r64Insert(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + for (auto _ : state) { + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + roaring64_bitmap_add(r, val); + } + roaring64_bitmap_free(r); + } + state.SetItemsProcessed(count); +} +BENCHMARK(r64Insert)->ArgsProduct({kCountAndDensityRange}); + +static void cppInsert(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + for (auto _ : state) { + Roaring64Map r; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + r.add(val); + } + } + state.SetItemsProcessed(count); +} +BENCHMARK(cppInsert)->ArgsProduct({kCountAndDensityRange}); + +static void setInsert(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + for (auto _ : state) { + std::set set; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + set.insert(val); + } + } + state.SetItemsProcessed(count); +} +BENCHMARK(setInsert)->ArgsProduct({kCountAndDensityRange}); + +static void r64Remove(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + for (auto _ : state) { + state.PauseTiming(); + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + roaring64_bitmap_add(r, val); + } + state.ResumeTiming(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + roaring64_bitmap_remove(r, val); + } + state.PauseTiming(); + roaring64_bitmap_free(r); + } + state.SetItemsProcessed(count); +} +BENCHMARK(r64Remove)->ArgsProduct({kCountAndDensityRange}); + +static void cppRemove(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + for (auto _ : state) { + state.PauseTiming(); + Roaring64Map r; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + r.add(val); + } + state.ResumeTiming(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + r.remove(val); + } + state.PauseTiming(); + } + state.SetItemsProcessed(count); +} +BENCHMARK(cppRemove)->ArgsProduct({kCountAndDensityRange}); + +static void setRemove(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + for (auto _ : state) { + state.PauseTiming(); + std::set set; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + set.insert(val); + } + state.ResumeTiming(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + set.erase(val); + } + state.PauseTiming(); + } + state.SetItemsProcessed(count); +} +BENCHMARK(setRemove)->ArgsProduct({kCountAndDensityRange}); + +static void r64PortableSerialize(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + roaring64_bitmap_add(r, val); + } + size_t size = roaring64_bitmap_portable_size_in_bytes(r); + std::vector buf(size); + for (auto _ : state) { + benchmark::DoNotOptimize( + roaring64_bitmap_portable_serialize(r, buf.data())); + } + state.SetItemsProcessed(count); + state.SetBytesProcessed(size); + roaring64_bitmap_free(r); +} +BENCHMARK(r64PortableSerialize)->ArgsProduct({kCountAndDensityRange}); + +static void r64FrozenSerialize(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + roaring64_bitmap_add(r, val); + } + roaring64_bitmap_shrink_to_fit(r); + size_t size = roaring64_bitmap_frozen_size_in_bytes(r); + std::vector buf(size); + for (auto _ : state) { + benchmark::DoNotOptimize( + roaring64_bitmap_frozen_serialize(r, buf.data())); + } + state.SetItemsProcessed(count); + state.SetBytesProcessed(size); + roaring64_bitmap_free(r); +} +BENCHMARK(r64FrozenSerialize)->ArgsProduct({kCountAndDensityRange}); + +static void cppPortableSerialize(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + Roaring64Map r; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + r.add(val); + } + size_t size = r.getSizeInBytes(/*portable=*/true); + std::vector buf(size); + for (auto _ : state) { + benchmark::DoNotOptimize(r.write(buf.data(), /*portable=*/true)); + } + state.SetItemsProcessed(count); + state.SetBytesProcessed(size); +} +BENCHMARK(cppPortableSerialize)->ArgsProduct({kCountAndDensityRange}); + +static void cppFrozenSerialize(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + Roaring64Map r; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + r.add(val); + } + size_t size = r.getFrozenSizeInBytes(); + // TODO: there seems to be a bug in writeFrozen that causes writes beyond + // getFrozenSizeInBytes() + std::vector buf(size * 2); + for (auto _ : state) { + r.writeFrozen(buf.data()); + benchmark::DoNotOptimize(buf); + } + state.SetItemsProcessed(count); + state.SetBytesProcessed(size); +} +BENCHMARK(cppFrozenSerialize)->ArgsProduct({kCountAndDensityRange}); + +static void r64PortableDeserialize(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + roaring64_bitmap_add(r1, val); + } + size_t size = roaring64_bitmap_portable_size_in_bytes(r1); + std::vector buf(size); + roaring64_bitmap_portable_serialize(r1, buf.data()); + roaring64_bitmap_free(r1); + for (auto _ : state) { + auto r2 = roaring64_bitmap_portable_deserialize_safe(buf.data(), size); + benchmark::DoNotOptimize(r2); + roaring64_bitmap_free(r2); + } + state.SetItemsProcessed(count); + state.SetBytesProcessed(size); +} +BENCHMARK(r64PortableDeserialize)->ArgsProduct({kCountAndDensityRange}); + +static void r64FrozenDeserialize(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + roaring64_bitmap_t* r1 = roaring64_bitmap_create(); + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + roaring64_bitmap_add(r1, val); + } + roaring64_bitmap_shrink_to_fit(r1); + size_t size = roaring64_bitmap_frozen_size_in_bytes(r1); + char* buf = (char*)aligned_alloc(64, size); + roaring64_bitmap_frozen_serialize(r1, buf); + roaring64_bitmap_free(r1); + for (auto _ : state) { + auto r2 = roaring64_bitmap_frozen_view(buf, size); + benchmark::DoNotOptimize(r2); + roaring64_bitmap_free(r2); + } + free(buf); + state.SetItemsProcessed(count); + state.SetBytesProcessed(size); +} +BENCHMARK(r64FrozenDeserialize)->ArgsProduct({kCountAndDensityRange}); + +static void cppPortableDeserialize(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + Roaring64Map r1; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + r1.add(val); + } + size_t size = r1.getSizeInBytes(/*portable=*/true); + std::vector buf(size); + r1.write(buf.data(), /*portable=*/true); + for (auto _ : state) { + auto r2 = Roaring64Map::read(buf.data(), /*portable=*/true); + benchmark::DoNotOptimize(r2); + } + state.SetItemsProcessed(count); + state.SetBytesProcessed(size); +} +BENCHMARK(cppPortableDeserialize)->ArgsProduct({kCountAndDensityRange}); + +static void cppFrozenDeserialize(benchmark::State& state) { + size_t count = state.range(0); + uint64_t step = state.range(1); + Roaring64Map r1; + for (size_t i = 0; i < count; ++i) { + uint64_t val = i * step; + r1.add(val); + } + size_t size = r1.getFrozenSizeInBytes(); + // TODO: there seems to be a bug in writeFrozen that causes writes beyond + // getFrozenSizeInBytes() + std::vector buf(size * 2); + r1.writeFrozen(buf.data()); + for (auto _ : state) { + auto r2 = Roaring64Map::frozenView(buf.data()); + benchmark::DoNotOptimize(r2); + } + state.SetItemsProcessed(count); + state.SetBytesProcessed(size); +} +BENCHMARK(cppFrozenDeserialize)->ArgsProduct({kCountAndDensityRange}); + +} // namespace roaring + +BENCHMARK_MAIN(); From fcbe947842b5a9f198fe632056fdd9c98e7c61af Mon Sep 17 00:00:00 2001 From: Soerian Date: Sun, 12 Jan 2025 13:47:33 +0000 Subject: [PATCH 06/16] Address review comments --- include/roaring/roaring64.h | 2 +- src/art/art.c | 97 +++++++++++++++++++++++++++---------- src/roaring64.c | 74 ++++++++++++++++++++-------- tests/art_unit.cpp | 2 + 4 files changed, 128 insertions(+), 47 deletions(-) diff --git a/include/roaring/roaring64.h b/include/roaring/roaring64.h index 3506918e6..e185b48aa 100644 --- a/include/roaring/roaring64.h +++ b/include/roaring/roaring64.h @@ -601,7 +601,7 @@ size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, /** * Creates a readonly bitmap that is a view of the given buffer. The buffer - * should be created with `roaring64_bitmap_frozen_serialize()`, and must be + * must be created with `roaring64_bitmap_frozen_serialize()`, and must be * aligned by 64 bytes. * * Returns NULL if deserialization fails. diff --git a/src/art/art.c b/src/art/art.c index 24d8aa8a3..6ed82ad5c 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -23,10 +23,12 @@ (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ (ptrdiff_t)(~((alignment)-1))) -#define CROARING_ART_ALIGN_RELATIVE(buf_cur, buf_start, alignment) \ - (char *)((buf_start) + \ - (((ptrdiff_t)((buf_cur) - (buf_start)) + ((alignment)-1)) & \ - (ptrdiff_t)(~((alignment)-1)))) +// Gives the byte difference needed to align the current buffer to the +// alignment, relative to the start of the buffer. +#define CROARING_ART_ALIGN_SIZE_RELATIVE(buf_cur, buf_start, alignment) \ + ((((ptrdiff_t)((buf_cur) - (buf_start)) + ((alignment)-1)) & \ + (ptrdiff_t)(~((alignment)-1))) - \ + (ptrdiff_t)((buf_cur) - (buf_start))) #ifdef __cplusplus extern "C" { @@ -135,7 +137,7 @@ static inline art_typecode_t art_ref_typecode(art_ref_t ref) { */ static art_node_t *art_deref(const art_t *art, art_ref_t ref) { assert(ref != CROARING_ART_NULL_REF); - uint64_t index = ref >> 16; + uint64_t index = art_ref_index(ref); switch (art_ref_typecode(ref)) { case CROARING_ART_LEAF_TYPE: return (art_node_t *)&art->leaves[index]; @@ -181,18 +183,26 @@ static inline art_ref_t art_get_ref(const art_t *art, const art_node_t *node, } /** - * Extends the array of nodes of the given typecode by `items`. Invalidates - * pointers into the array obtained by `art_deref`. + * Extends the array of nodes of the given typecode. Invalidates pointers into + * the array obtained by `art_deref`. + * + * Must only be called when the node array of the given type is "full" + * (first_free == capacity). */ -static void art_extend(art_t *art, art_typecode_t typecode, size_t items) { +static void art_extend(art_t *art, art_typecode_t typecode) { size_t size = art->first_free[typecode]; - size_t desired_size = size + items; size_t capacity = art->capacities[typecode]; - if (desired_size <= capacity) { + if (size < capacity) { return; } - size_t new_capacity = - (size < 1024) ? 2 * desired_size : 5 * desired_size / 4; + size_t new_capacity; + if (capacity == 0) { + new_capacity = 2; + } else if (capacity < 1024) { + new_capacity = 2 * capacity; + } else { + new_capacity = 5 * capacity / 4; + } art->capacities[typecode] = new_capacity; size_t increase = new_capacity - capacity; switch (typecode) { @@ -289,7 +299,7 @@ static size_t art_next_free(const art_t *art, art_typecode_t typecode, static size_t art_allocate_index(art_t *art, art_typecode_t typecode) { size_t first_free = art->first_free[typecode]; if (first_free == art->capacities[typecode]) { - art_extend(art, typecode, 1); + art_extend(art, typecode); } art->first_free[typecode] = art_next_free(art, typecode, first_free + 1); return first_free; @@ -1696,7 +1706,8 @@ static size_t art_shrink_node_arrays(art_t *art) { size_t new_capacity = art->first_free[CROARING_ART_LEAF_TYPE]; art->leaves = roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); - freed += art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity; + freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * + sizeof(art_leaf_t); art->capacities[CROARING_ART_LEAF_TYPE] = new_capacity; } if (art->first_free[CROARING_ART_NODE4_TYPE] < @@ -1704,7 +1715,8 @@ static size_t art_shrink_node_arrays(art_t *art) { size_t new_capacity = art->first_free[CROARING_ART_NODE4_TYPE]; art->node4s = roaring_realloc(art->node4s, new_capacity * sizeof(art_node4_t)); - freed += art->capacities[CROARING_ART_NODE4_TYPE] - new_capacity; + freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * + sizeof(art_node4_t); art->capacities[CROARING_ART_NODE4_TYPE] = new_capacity; } if (art->first_free[CROARING_ART_NODE16_TYPE] < @@ -1712,7 +1724,8 @@ static size_t art_shrink_node_arrays(art_t *art) { size_t new_capacity = art->first_free[CROARING_ART_NODE16_TYPE]; art->node16s = roaring_realloc(art->node16s, new_capacity * sizeof(art_node16_t)); - freed += art->capacities[CROARING_ART_NODE16_TYPE] - new_capacity; + freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * + sizeof(art_node16_t); art->capacities[CROARING_ART_NODE16_TYPE] = new_capacity; } if (art->first_free[CROARING_ART_NODE48_TYPE] < @@ -1720,7 +1733,8 @@ static size_t art_shrink_node_arrays(art_t *art) { size_t new_capacity = art->first_free[CROARING_ART_NODE48_TYPE]; art->node48s = roaring_realloc(art->node48s, new_capacity * sizeof(art_node48_t)); - freed += art->capacities[CROARING_ART_NODE48_TYPE] - new_capacity; + freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * + sizeof(art_node48_t); art->capacities[CROARING_ART_NODE48_TYPE] = new_capacity; } if (art->first_free[CROARING_ART_NODE256_TYPE] < @@ -1728,7 +1742,8 @@ static size_t art_shrink_node_arrays(art_t *art) { size_t new_capacity = art->first_free[CROARING_ART_NODE256_TYPE]; art->node256s = roaring_realloc(art->node256s, new_capacity * sizeof(art_node256_t)); - freed += art->capacities[CROARING_ART_NODE256_TYPE] - new_capacity; + freed += (art->capacities[CROARING_ART_NODE256_TYPE] - new_capacity) * + sizeof(art_node256_t); art->capacities[CROARING_ART_NODE256_TYPE] = new_capacity; } return freed; @@ -1786,6 +1801,19 @@ static void art_shrink_at(art_t *art, art_ref_t ref) { } } +static bool art_is_shrunken(const art_t *art) { + return art->first_free[CROARING_ART_LEAF_TYPE] == + art->capacities[CROARING_ART_LEAF_TYPE] && + art->first_free[CROARING_ART_NODE4_TYPE] == + art->capacities[CROARING_ART_NODE4_TYPE] && + art->first_free[CROARING_ART_NODE16_TYPE] == + art->capacities[CROARING_ART_NODE16_TYPE] && + art->first_free[CROARING_ART_NODE48_TYPE] == + art->capacities[CROARING_ART_NODE48_TYPE] && + art->first_free[CROARING_ART_NODE256_TYPE] == + art->capacities[CROARING_ART_NODE256_TYPE]; +} + void art_init_cleared(art_t *art) { art->root = CROARING_ART_NULL_REF; memset(art->first_free, 0, sizeof(art->first_free)); @@ -1798,6 +1826,9 @@ void art_init_cleared(art_t *art) { } size_t art_shrink_to_fit(art_t *art) { + if (art_is_shrunken(art)) { + return 0; + } if (art->root != CROARING_ART_NULL_REF) { art->root = art_move_node_to_shrink(art, art->root); art_shrink_at(art, art->root); @@ -2302,6 +2333,9 @@ _Static_assert(alignof(art_leaf_t) == alignof(art_node256_t), "Serialization assumes node type alignment is equal"); size_t art_size_in_bytes(const art_t *art) { + if (!art_is_shrunken(art)) { + return 0; + } // Root. size_t size = sizeof(art->root); // Node counts. @@ -2321,6 +2355,9 @@ size_t art_serialize(const art_t *art, char *buf) { if (buf == NULL) { return 0; } + if (!art_is_shrunken(art)) { + return 0; + } const char *initial_buf = buf; // Root. @@ -2331,12 +2368,15 @@ size_t art_serialize(const art_t *art, char *buf) { memcpy(buf, art->capacities, sizeof(art->capacities)); buf += sizeof(art->capacities); + // Alignment for leaves. The rest of the nodes are aligned the same way. + size_t align_bytes = + CROARING_ART_ALIGN_SIZE_RELATIVE(buf, initial_buf, alignof(art_leaf_t)); + memset(buf, 0, align_bytes); + buf += align_bytes; + if (art->capacities[CROARING_ART_LEAF_TYPE] > 0) { size_t size = art->capacities[CROARING_ART_LEAF_TYPE] * sizeof(art_leaf_t); - // Alignment for leaves. The rest of the nodes are aligned the same way. - buf = - CROARING_ART_ALIGN_RELATIVE(buf, initial_buf, alignof(art_leaf_t)); memcpy(buf, art->leaves, size); buf += size; } @@ -2392,18 +2432,23 @@ size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art) { buf += sizeof(art->capacities); maxbytes -= sizeof(art->capacities); + // Alignment for leaves. The rest of the nodes are aligned the same way. + const char *before_align = buf; + buf = CROARING_ART_ALIGN_BUF(buf, alignof(art_leaf_t)); + if (maxbytes < (size_t)(buf - before_align)) { + return 0; + } + maxbytes -= buf - before_align; + if (art->capacities[CROARING_ART_LEAF_TYPE] > 0) { size_t size = art->capacities[CROARING_ART_LEAF_TYPE] * sizeof(art_leaf_t); - const char *before_align = buf; - // Alignment for leaves. The rest of the nodes are aligned the same way. - buf = CROARING_ART_ALIGN_BUF(buf, alignof(art_leaf_t)); - if (maxbytes < (buf - before_align) + size) { + if (maxbytes < size) { return 0; } art->leaves = (art_leaf_t *)buf; buf += size; - maxbytes -= (buf - before_align); + maxbytes -= size; } if (art->capacities[CROARING_ART_NODE4_TYPE] > 0) { size_t size = diff --git a/src/roaring64.c b/src/roaring64.c index 121dbdecc..789bbf564 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -21,11 +21,6 @@ (char *)(((uintptr_t)(buf) + ((alignment)-1)) & \ (ptrdiff_t)(~((alignment)-1))) -#define CROARING_ALIGN_RELATIVE(buf_cur, buf_start, alignment) \ - (char *)((buf_start) + \ - (((ptrdiff_t)((buf_cur) - (buf_start)) + ((alignment)-1)) & \ - (ptrdiff_t)(~((alignment)-1)))) - #define CROARING_BITSET_ALIGNMENT 64 #ifdef __cplusplus @@ -117,13 +112,23 @@ static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, return *leaf; } -static void extend_containers(roaring64_bitmap_t *r, size_t items) { - size_t desired_cap = r->capacity + items; - if (desired_cap <= r->capacity) { +/** + * Extends the array of container pointers. Must only be called when the array + * is "full" (first_free == capacity). + */ +static void extend_containers(roaring64_bitmap_t *r) { + size_t size = r->first_free; + if (size < r->capacity) { return; } - size_t new_capacity = - (r->capacity < 1024) ? 2 * desired_cap : 5 * desired_cap / 4; + size_t new_capacity; + if (r->capacity == 0) { + new_capacity = 2; + } else if (r->capacity < 1024) { + new_capacity = 2 * r->capacity; + } else { + new_capacity = 5 * r->capacity / 4; + } size_t increase = new_capacity - r->capacity; r->containers = roaring_realloc(r->containers, new_capacity * sizeof(container_t *)); @@ -143,7 +148,7 @@ static size_t next_free_container_idx(const roaring64_bitmap_t *r) { static size_t allocate_index(roaring64_bitmap_t *r) { size_t first_free = r->first_free; if (first_free == r->capacity) { - extend_containers(r, 1); + extend_containers(r); } r->first_free = next_free_container_idx(r); return first_free; @@ -961,8 +966,15 @@ static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { r->first_free = next_free_container_idx(r); } +static inline bool is_shrunken(const roaring64_bitmap_t *r) { + return r->first_free == r->capacity; +} + size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { size_t freed = art_shrink_to_fit(&r->art); + if (is_shrunken(r)) { + return freed; + } art_iterator_t it = art_init_iterator(&r->art, true); while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; @@ -975,7 +987,7 @@ size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { if (new_capacity < r->capacity) { r->containers = roaring_realloc(r->containers, new_capacity * sizeof(container_t *)); - freed += r->capacity - new_capacity; + freed += (r->capacity - new_capacity) * sizeof(container_t *); r->capacity = new_capacity; } return freed; @@ -2212,6 +2224,9 @@ size_t align_size(size_t size, size_t alignment) { } size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { + if (!is_shrunken(r)) { + return 0; + } // Flags. size_t size = sizeof(r->flags); // Container count. @@ -2223,15 +2238,23 @@ size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { // ART (8 byte aligned). size = align_size(size, 8); size += art_size_in_bytes(&r->art); - // Containers (aligned). - size = align_size(size, CROARING_BITSET_ALIGNMENT); + + size_t total_sizes[4] = CROARING_ZERO_INITIALIZER; // Indexed by typecode. art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); while (it.value != NULL) { leaf_t leaf = (leaf_t)*it.value; uint8_t typecode = get_typecode(leaf); - size += container_get_frozen_size(get_container(r, leaf), typecode); + total_sizes[typecode] += + container_get_frozen_size(get_container(r, leaf), typecode); art_iterator_next(&it); } + // Containers (aligned). + size = align_size(size, CROARING_BITSET_ALIGNMENT); + size += total_sizes[BITSET_CONTAINER_TYPE]; + size = align_size(size, alignof(rle16_t)); + size += total_sizes[ARRAY_CONTAINER_TYPE]; + size = align_size(size, alignof(uint16_t)); + size += total_sizes[RUN_CONTAINER_TYPE]; // Padding to make overall size a multiple of required alignment. size = align_size(size, CROARING_BITSET_ALIGNMENT); return size; @@ -2269,11 +2292,22 @@ static inline void container_frozen_serialize(const container_t *container, } } +static inline char *pad_align(char *buf, const char *initial_buf, + size_t alignment) { + size_t buf_size = buf - initial_buf; + size_t pad = align_size(buf_size, alignment) - buf_size; + memset(buf, 0, pad); + return buf + pad; +} + size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, char *buf) { if (buf == NULL) { return 0; } + if (!is_shrunken(r)) { + return 0; + } const char *initial_buf = buf; // Flags. @@ -2310,19 +2344,19 @@ size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, buf += sizeof(size_t); // ART. - buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, 8); + buf = pad_align(buf, initial_buf, 8); buf += art_serialize(&r->art, buf); // Containers (aligned). // Runs before arrays as run elements are larger than array elements and // smaller than bitset elements. - buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, CROARING_BITSET_ALIGNMENT); + buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); uint64_t *bitsets = (uint64_t *)buf; buf += total_sizes[BITSET_CONTAINER_TYPE]; - buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, alignof(rle16_t)); + buf = pad_align(buf, initial_buf, alignof(rle16_t)); rle16_t *runs = (rle16_t *)buf; buf += total_sizes[RUN_CONTAINER_TYPE]; - buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, alignof(uint16_t)); + buf = pad_align(buf, initial_buf, alignof(uint16_t)); uint16_t *arrays = (uint16_t *)buf; buf += total_sizes[ARRAY_CONTAINER_TYPE]; @@ -2337,7 +2371,7 @@ size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, } // Padding to make overall size a multiple of required alignment. - buf = CROARING_ALIGN_RELATIVE(buf, initial_buf, CROARING_BITSET_ALIGNMENT); + buf = pad_align(buf, initial_buf, CROARING_BITSET_ALIGNMENT); return buf - initial_buf; } diff --git a/tests/art_unit.cpp b/tests/art_unit.cpp index e3f3ffc3f..fd77e2046 100644 --- a/tests/art_unit.cpp +++ b/tests/art_unit.cpp @@ -676,6 +676,7 @@ DEFINE_TEST(test_art_frozen_view) { assert_art_valid(&art1); } + art_shrink_to_fit(&art1); size_t serialized_size = art_size_in_bytes(&art1); char* buf = (char*)roaring_aligned_malloc(8, serialized_size); assert_int_equal(art_serialize(&art1, buf), serialized_size); @@ -709,6 +710,7 @@ DEFINE_TEST(test_art_frozen_view) { assert_art_valid(&art1); } + art_shrink_to_fit(&art1); size_t serialized_size = art_size_in_bytes(&art1); char* buf = (char*)roaring_aligned_malloc(8, serialized_size); assert_int_equal(art_serialize(&art1, buf), serialized_size); From 9ae922197c8300c97a41e7c9e558207b816c9d8e Mon Sep 17 00:00:00 2001 From: Soerian Date: Sun, 12 Jan 2025 18:58:47 +0000 Subject: [PATCH 07/16] Add random insert / remove benchmark --- microbenchmarks/synthetic_bench.cpp | 49 +++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/microbenchmarks/synthetic_bench.cpp b/microbenchmarks/synthetic_bench.cpp index 22d409577..5ca9ec133 100644 --- a/microbenchmarks/synthetic_bench.cpp +++ b/microbenchmarks/synthetic_bench.cpp @@ -299,6 +299,55 @@ static void setRemove(benchmark::State& state) { } BENCHMARK(setRemove)->ArgsProduct({kCountAndDensityRange}); +static void r64InsertRemoveRandom(benchmark::State& state) { + uint64_t bitmask = kBitmasks[state.range(0)]; + roaring64_bitmap_t* r = roaring64_bitmap_create(); + for (size_t i = 0; i < (1 << 20); ++i) { + uint64_t val = randUint64() & bitmask; + roaring64_bitmap_add(r, val); + } + for (auto _ : state) { + uint64_t val1 = randUint64() & bitmask; + uint64_t val2 = randUint64() & bitmask; + roaring64_bitmap_add(r, val1); + roaring64_bitmap_remove(r, val2); + } + roaring64_bitmap_free(r); +} +BENCHMARK(r64InsertRemoveRandom)->DenseRange(0, kBitmasks.size() - 1, 1); + +static void cppInsertRemoveRandom(benchmark::State& state) { + uint64_t bitmask = kBitmasks[state.range(0)]; + Roaring64Map r; + for (size_t i = 0; i < (1 << 20); ++i) { + uint64_t val = randUint64() & bitmask; + r.add(val); + } + for (auto _ : state) { + uint64_t val1 = randUint64() & bitmask; + uint64_t val2 = randUint64() & bitmask; + r.add(val1); + r.remove(val2); + } +} +BENCHMARK(cppInsertRemoveRandom)->DenseRange(0, kBitmasks.size() - 1, 1); + +static void setInsertRemoveRandom(benchmark::State& state) { + uint64_t bitmask = kBitmasks[state.range(0)]; + std::set set; + for (size_t i = 0; i < (1 << 20); ++i) { + uint64_t val = randUint64() & bitmask; + set.insert(val); + } + for (auto _ : state) { + uint64_t val1 = randUint64() & bitmask; + uint64_t val2 = randUint64() & bitmask; + set.insert(val1); + set.erase(val2); + } +} +BENCHMARK(setInsertRemoveRandom)->DenseRange(0, kBitmasks.size() - 1, 1); + static void r64PortableSerialize(benchmark::State& state) { size_t count = state.range(0); uint64_t step = state.range(1); From 23a8441cf3765aec5bc6b0dc4f4c61df39fdb7d1 Mon Sep 17 00:00:00 2001 From: Soerian Date: Sun, 12 Jan 2025 18:58:58 +0000 Subject: [PATCH 08/16] Link free nodes together This adds the index of the next free node into a newly freed node, or `capacity` if there are no more free indices. This significantly speeds up finding the next free index, which is important for add+remove workloads. Benchmarks Old: ------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------ r64InsertRemoveRandom/0 127 ns 127 ns 5461079 r64InsertRemoveRandom/1 31633 ns 31604 ns 24028 r64InsertRemoveRandom/2 30782 ns 30769 ns 21859 r64InsertRemoveRandom/3 31985 ns 31969 ns 21558 r64InsertRemoveRandom/4 356 ns 356 ns 1962694 r64InsertRemoveRandom/5 28972 ns 28962 ns 21366 r64InsertRemoveRandom/6 30632 ns 30623 ns 22682 r64InsertRemoveRandom/7 448 ns 448 ns 1601550 r64InsertRemoveRandom/8 32506 ns 32495 ns 21591 r64InsertRemoveRandom/9 689 ns 689 ns 1002237 cppInsertRemoveRandom/0 131 ns 131 ns 5319673 cppInsertRemoveRandom/1 16106 ns 16104 ns 43632 cppInsertRemoveRandom/2 3881 ns 3881 ns 180087 cppInsertRemoveRandom/3 3582 ns 3582 ns 171298 cppInsertRemoveRandom/4 403 ns 402 ns 1666697 cppInsertRemoveRandom/5 993 ns 993 ns 706038 cppInsertRemoveRandom/6 4039 ns 4038 ns 172421 cppInsertRemoveRandom/7 469 ns 469 ns 1440197 cppInsertRemoveRandom/8 1454 ns 1454 ns 633551 cppInsertRemoveRandom/9 654 ns 654 ns 1091588 setInsertRemoveRandom/0 1944 ns 1943 ns 368926 setInsertRemoveRandom/1 1955 ns 1953 ns 404931 setInsertRemoveRandom/2 1911 ns 1910 ns 358466 setInsertRemoveRandom/3 1953 ns 1951 ns 362351 setInsertRemoveRandom/4 2104 ns 2102 ns 321387 setInsertRemoveRandom/5 1944 ns 1943 ns 354836 setInsertRemoveRandom/6 1835 ns 1835 ns 359099 setInsertRemoveRandom/7 1970 ns 1968 ns 372625 setInsertRemoveRandom/8 1894 ns 1892 ns 355456 setInsertRemoveRandom/9 1659 ns 1659 ns 355902 New: ------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------ r64InsertRemoveRandom/0 128 ns 128 ns 5614266 r64InsertRemoveRandom/1 935 ns 935 ns 739679 r64InsertRemoveRandom/2 916 ns 916 ns 739944 r64InsertRemoveRandom/3 936 ns 936 ns 690708 r64InsertRemoveRandom/4 368 ns 368 ns 1957642 r64InsertRemoveRandom/5 1141 ns 1140 ns 592505 r64InsertRemoveRandom/6 1139 ns 1138 ns 657840 r64InsertRemoveRandom/7 481 ns 481 ns 1434967 r64InsertRemoveRandom/8 1447 ns 1446 ns 484463 r64InsertRemoveRandom/9 721 ns 721 ns 1017456 cppInsertRemoveRandom/0 134 ns 134 ns 5524804 cppInsertRemoveRandom/1 15616 ns 15608 ns 47666 cppInsertRemoveRandom/2 3855 ns 3854 ns 180265 cppInsertRemoveRandom/3 3809 ns 3808 ns 183595 cppInsertRemoveRandom/4 412 ns 412 ns 1695708 cppInsertRemoveRandom/5 1012 ns 1011 ns 713501 cppInsertRemoveRandom/6 3410 ns 3409 ns 199214 cppInsertRemoveRandom/7 474 ns 474 ns 1496740 cppInsertRemoveRandom/8 1421 ns 1420 ns 465868 cppInsertRemoveRandom/9 564 ns 564 ns 1148076 setInsertRemoveRandom/0 1956 ns 1956 ns 351283 setInsertRemoveRandom/1 1959 ns 1958 ns 355766 setInsertRemoveRandom/2 1886 ns 1885 ns 357406 setInsertRemoveRandom/3 1905 ns 1904 ns 355235 setInsertRemoveRandom/4 1945 ns 1944 ns 364599 setInsertRemoveRandom/5 1902 ns 1902 ns 350312 setInsertRemoveRandom/6 1907 ns 1906 ns 346962 setInsertRemoveRandom/7 1937 ns 1936 ns 356168 setInsertRemoveRandom/8 1881 ns 1880 ns 341472 setInsertRemoveRandom/9 1962 ns 1961 ns 350643 --- src/art/art.c | 356 ++++++++++++++++++++++++++++---------------------- 1 file changed, 197 insertions(+), 159 deletions(-) diff --git a/src/art/art.c b/src/art/art.c index 6ed82ad5c..0d00a3795 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -44,8 +44,13 @@ typedef void art_node_t; typedef struct art_leaf_s { bool occupied; - art_key_chunk_t key[ART_KEY_BYTES]; - art_val_t val; + union { + struct { + art_key_chunk_t key[ART_KEY_BYTES]; + art_val_t val; + }; + size_t next_free; // Used if !occupied. + }; } art_leaf_t; // Inner node, with prefix. @@ -62,16 +67,26 @@ typedef struct art_inner_node_s { typedef struct art_node4_s { art_inner_node_t base; uint8_t count; - uint8_t keys[4]; - art_ref_t children[4]; + union { + struct { + uint8_t keys[4]; + art_ref_t children[4]; + }; + size_t next_free; // Used if count == 0. + }; } art_node4_t; // Node16: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node16_s { art_inner_node_t base; uint8_t count; - uint8_t keys[16]; - art_ref_t children[16]; + union { + struct { + uint8_t keys[16]; + art_ref_t children[16]; + }; + size_t next_free; // Used if count == 0. + }; } art_node16_t; // Node48: key[i] corresponds with children[key[i]] if key[i] != @@ -80,11 +95,17 @@ typedef struct art_node16_s { typedef struct art_node48_s { art_inner_node_t base; uint8_t count; - // Bitset where the ith bit is set if children[i] is available - // Because there are at most 48 children, only the bottom 48 bits are used. - uint64_t available_children; - uint8_t keys[256]; - art_ref_t children[48]; + union { + struct { + // Bitset where the ith bit is set if children[i] is available + // Because there are at most 48 children, only the bottom 48 bits + // are used. + uint64_t available_children; + uint8_t keys[256]; + art_ref_t children[48]; + }; + size_t next_free; // Used if count == 0. + }; } art_node48_t; // Node256: children[i] is directly indexed by key chunk. A child is present if @@ -92,7 +113,12 @@ typedef struct art_node48_s { typedef struct art_node256_s { art_inner_node_t base; uint16_t count; - art_ref_t children[256]; + union { + struct { + art_ref_t children[256]; + }; + size_t next_free; // Used if count == 0. + }; } art_node256_t; // Helper struct to refer to a child within a node at a specific index. @@ -182,129 +208,6 @@ static inline art_ref_t art_get_ref(const art_t *art, const art_node_t *node, return art_to_ref(art_get_index(art, node, typecode), typecode); } -/** - * Extends the array of nodes of the given typecode. Invalidates pointers into - * the array obtained by `art_deref`. - * - * Must only be called when the node array of the given type is "full" - * (first_free == capacity). - */ -static void art_extend(art_t *art, art_typecode_t typecode) { - size_t size = art->first_free[typecode]; - size_t capacity = art->capacities[typecode]; - if (size < capacity) { - return; - } - size_t new_capacity; - if (capacity == 0) { - new_capacity = 2; - } else if (capacity < 1024) { - new_capacity = 2 * capacity; - } else { - new_capacity = 5 * capacity / 4; - } - art->capacities[typecode] = new_capacity; - size_t increase = new_capacity - capacity; - switch (typecode) { - case CROARING_ART_LEAF_TYPE: - art->leaves = - roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); - memset(art->leaves + capacity, 0, increase * sizeof(art_leaf_t)); - break; - case CROARING_ART_NODE4_TYPE: - art->node4s = roaring_realloc(art->node4s, - new_capacity * sizeof(art_node4_t)); - memset(art->node4s + capacity, 0, increase * sizeof(art_node4_t)); - break; - case CROARING_ART_NODE16_TYPE: - art->node16s = roaring_realloc(art->node16s, - new_capacity * sizeof(art_node16_t)); - memset(art->node16s + capacity, 0, increase * sizeof(art_node16_t)); - break; - case CROARING_ART_NODE48_TYPE: - art->node48s = roaring_realloc(art->node48s, - new_capacity * sizeof(art_node48_t)); - memset(art->node48s + capacity, 0, increase * sizeof(art_node48_t)); - break; - case CROARING_ART_NODE256_TYPE: - art->node256s = roaring_realloc( - art->node256s, new_capacity * sizeof(art_node256_t)); - memset(art->node256s + capacity, 0, - increase * sizeof(art_node256_t)); - break; - default: - assert(false); - } -} - -/** - * Returns the next free index for the given typecode, may be equal to the - * capacity of the array. - */ -static size_t art_next_free(const art_t *art, art_typecode_t typecode, - size_t start_index) { - size_t capacity = art->capacities[typecode]; - switch (typecode) { - case CROARING_ART_LEAF_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (!art->leaves[i].occupied) { - return i; - } - } - break; - } - case CROARING_ART_NODE4_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (art->node4s[i].count == 0) { - return i; - } - } - break; - } - case CROARING_ART_NODE16_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (art->node16s[i].count == 0) { - return i; - } - } - break; - } - case CROARING_ART_NODE48_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (art->node48s[i].count == 0) { - return i; - } - } - break; - } - case CROARING_ART_NODE256_TYPE: { - for (size_t i = start_index; i < capacity; ++i) { - if (art->node256s[i].count == 0) { - return i; - } - } - break; - } - default: - assert(false); - return 0; - } - return capacity; -} - -/** - * Marks an index for the given typecode as used, expanding the relevant node - * array if necessary. - */ -static size_t art_allocate_index(art_t *art, art_typecode_t typecode) { - size_t first_free = art->first_free[typecode]; - if (first_free == art->capacities[typecode]) { - art_extend(art, typecode); - } - art->first_free[typecode] = art_next_free(art, typecode, first_free + 1); - return first_free; -} - static inline bool art_is_leaf(art_ref_t ref) { return art_ref_typecode(ref) == CROARING_ART_LEAF_TYPE; } @@ -319,6 +222,8 @@ static inline void art_init_inner_node(art_inner_node_t *node, static void art_node_free(art_t *art, art_node_t *node, art_typecode_t typecode); +static size_t art_allocate_index(art_t *art, art_typecode_t typecode); + // ===================== Start of node-specific functions ====================== static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], @@ -331,7 +236,10 @@ static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], return art_to_ref(index, CROARING_ART_LEAF_TYPE); } -static inline void art_leaf_clear(art_leaf_t *leaf) { leaf->occupied = false; } +static inline void art_leaf_clear(art_leaf_t *leaf, art_ref_t next_free) { + leaf->occupied = false; + leaf->next_free = next_free; +} static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size); @@ -363,7 +271,10 @@ static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], return node; } -static inline void art_node4_clear(art_node4_t *node) { node->count = 0; } +static inline void art_node4_clear(art_node4_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} static inline art_ref_t art_node4_find_child(const art_node4_t *node, art_key_chunk_t key) { @@ -566,7 +477,10 @@ static art_node16_t *art_node16_create(art_t *art, return node; } -static inline void art_node16_clear(art_node16_t *node) { node->count = 0; } +static inline void art_node16_clear(art_node16_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} static inline art_ref_t art_node16_find_child(const art_node16_t *node, art_key_chunk_t key) { @@ -751,7 +665,10 @@ static art_node48_t *art_node48_create(art_t *art, return node; } -static inline void art_node48_clear(art_node48_t *node) { node->count = 0; } +static inline void art_node48_clear(art_node48_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} static inline art_ref_t art_node48_find_child(const art_node48_t *node, art_key_chunk_t key) { @@ -955,7 +872,10 @@ static art_node256_t *art_node256_create(art_t *art, return node; } -static inline void art_node256_clear(art_node256_t *node) { node->count = 0; } +static inline void art_node256_clear(art_node256_t *node, art_ref_t next_free) { + node->count = 0; + node->next_free = next_free; +} static inline art_ref_t art_node256_find_child(const art_node256_t *node, art_key_chunk_t key) { @@ -1180,25 +1100,24 @@ static art_ref_t art_node_insert_leaf(art_t *art, art_inner_node_t *node, // Marks the node as unoccopied and frees its index. static void art_node_free(art_t *art, art_node_t *node, art_typecode_t typecode) { - uint64_t index = art_get_index(art, node, typecode); - if (index < art->first_free[typecode]) { - art->first_free[typecode] = index; - } + size_t index = art_get_index(art, node, typecode); + size_t next_free = art->first_free[typecode]; + art->first_free[typecode] = index; switch (typecode) { case CROARING_ART_LEAF_TYPE: - art_leaf_clear((art_leaf_t *)node); + art_leaf_clear((art_leaf_t *)node, next_free); break; case CROARING_ART_NODE4_TYPE: - art_node4_clear((art_node4_t *)node); + art_node4_clear((art_node4_t *)node, next_free); break; case CROARING_ART_NODE16_TYPE: - art_node16_clear((art_node16_t *)node); + art_node16_clear((art_node16_t *)node, next_free); break; case CROARING_ART_NODE48_TYPE: - art_node48_clear((art_node48_t *)node); + art_node48_clear((art_node48_t *)node, next_free); break; case CROARING_ART_NODE256_TYPE: - art_node256_clear((art_node256_t *)node); + art_node256_clear((art_node256_t *)node, next_free); break; default: assert(false); @@ -1348,6 +1267,124 @@ static uint8_t art_common_prefix(const art_key_chunk_t key1[], return offset; } +/** + * Extends the array of nodes of the given typecode. Invalidates pointers into + * the array obtained by `art_deref`. + * + * Must only be called when the node array of the given type is "full" + * (first_free == capacity). + */ +static void art_extend(art_t *art, art_typecode_t typecode) { + size_t size = art->first_free[typecode]; + size_t capacity = art->capacities[typecode]; + if (size < capacity) { + return; + } + size_t new_capacity; + if (capacity == 0) { + new_capacity = 2; + } else if (capacity < 1024) { + new_capacity = 2 * capacity; + } else { + new_capacity = 5 * capacity / 4; + } + art->capacities[typecode] = new_capacity; + size_t increase = new_capacity - capacity; + switch (typecode) { + case CROARING_ART_LEAF_TYPE: { + art->leaves = + roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); + memset(art->leaves + capacity, 0, increase * sizeof(art_leaf_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_leaf_clear(art->leaves + i, i + 1); + } + break; + } + case CROARING_ART_NODE4_TYPE: { + art->node4s = roaring_realloc(art->node4s, + new_capacity * sizeof(art_node4_t)); + memset(art->node4s + capacity, 0, increase * sizeof(art_node4_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_node4_clear(art->node4s + i, i + 1); + } + break; + } + case CROARING_ART_NODE16_TYPE: { + art->node16s = roaring_realloc(art->node16s, + new_capacity * sizeof(art_node16_t)); + memset(art->node16s + capacity, 0, increase * sizeof(art_node16_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_node16_clear(art->node16s + i, i + 1); + } + break; + } + case CROARING_ART_NODE48_TYPE: { + art->node48s = roaring_realloc(art->node48s, + new_capacity * sizeof(art_node48_t)); + memset(art->node48s + capacity, 0, increase * sizeof(art_node48_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_node48_clear(art->node48s + i, i + 1); + } + break; + } + case CROARING_ART_NODE256_TYPE: { + art->node256s = roaring_realloc( + art->node256s, new_capacity * sizeof(art_node256_t)); + memset(art->node256s + capacity, 0, + increase * sizeof(art_node256_t)); + for (size_t i = capacity; i < new_capacity; ++i) { + art_node256_clear(art->node256s + i, i + 1); + } + break; + } + default: + assert(false); + } +} + +/** + * Returns the next free index for the given typecode, may be equal to the + * capacity of the array. + */ +static size_t art_next_free(const art_t *art, art_typecode_t typecode) { + size_t index = art->first_free[typecode]; + switch (typecode) { + case CROARING_ART_LEAF_TYPE: { + return art->leaves[index].next_free; + } + case CROARING_ART_NODE4_TYPE: { + return art->node4s[index].next_free; + } + case CROARING_ART_NODE16_TYPE: { + return art->node16s[index].next_free; + } + case CROARING_ART_NODE48_TYPE: { + return art->node48s[index].next_free; + } + case CROARING_ART_NODE256_TYPE: { + return art->node256s[index].next_free; + } + default: + assert(false); + return 0; + } +} + +/** + * Marks an index for the given typecode as used, expanding the relevant node + * array if necessary. + */ +static size_t art_allocate_index(art_t *art, art_typecode_t typecode) { + size_t first_free = art->first_free[typecode]; + if (first_free == art->capacities[typecode]) { + art_extend(art, typecode); + art->first_free[typecode]++; + return first_free; + } + art->first_free[typecode] = art_next_free(art, typecode); + return first_free; +} + // Returns a pointer to the rootmost node where the value was inserted, may // not be equal to `node`. static art_ref_t art_insert_at(art_t *art, art_ref_t ref, @@ -1659,31 +1696,36 @@ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { size_t to = first_free; switch (typecode) { case CROARING_ART_LEAF_TYPE: { + size_t next_free = art->leaves[to].next_free; memcpy(art->leaves + to, art->leaves + from, sizeof(art_leaf_t)); - art_leaf_clear(&art->leaves[from]); + art_leaf_clear(&art->leaves[from], next_free); break; } case CROARING_ART_NODE4_TYPE: { + size_t next_free = art->node4s[to].next_free; memcpy(art->node4s + to, art->node4s + from, sizeof(art_node4_t)); - art_node4_clear(&art->node4s[from]); + art_node4_clear(&art->node4s[from], next_free); break; } case CROARING_ART_NODE16_TYPE: { + size_t next_free = art->node16s[to].next_free; memcpy(art->node16s + to, art->node16s + from, sizeof(art_node16_t)); - art_node16_clear(&art->node16s[from]); + art_node16_clear(&art->node16s[from], next_free); break; } case CROARING_ART_NODE48_TYPE: { + size_t next_free = art->node48s[to].next_free; memcpy(art->node48s + to, art->node48s + from, sizeof(art_node48_t)); - art_node48_clear(&art->node48s[from]); + art_node48_clear(&art->node48s[from], next_free); break; } case CROARING_ART_NODE256_TYPE: { + size_t next_free = art->node256s[to].next_free; memcpy(art->node256s + to, art->node256s + from, sizeof(art_node256_t)); - art_node256_clear(&art->node256s[from]); + art_node256_clear(&art->node256s[from], next_free); break; } default: { @@ -1691,7 +1733,7 @@ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { return 0; } } - art->first_free[typecode] = art_next_free(art, typecode, to + 1); + art->first_free[typecode] = from; return art_to_ref(to, typecode); } @@ -2314,10 +2356,6 @@ bool art_internal_validate(const art_t *art, const char **reason, if (first_free > capacity) { return art_validate_fail(&validator, "first_free > capacity"); } - size_t next_free = art_next_free(art, type, 0); - if (first_free != next_free) { - return art_validate_fail(&validator, "first_free != next_free"); - } } } return art_internal_validate_at(art, art->root, validator); From 94bd6910b4a57bcabad0f8211735fb92ff10492c Mon Sep 17 00:00:00 2001 From: Soerian Date: Sun, 19 Jan 2025 17:19:25 +0000 Subject: [PATCH 09/16] Sort free lists in art_shrink_to_fit This avoids a bug in the following scenario: art->leaves = [2,0,x] art->first_free[leaf_type] = 1 Where `2` and `0` are pointers to the next free index, and `x` is an occupied leaf. In this case, if `art_shrink_to_fit` was called, then we would have the following result: art->leaves = [2,x,0] art->first_free[leaf_type] = 0 This is not fully shrunken, and therefore wrong. Sorting the free indices fixes this scenario. Before `art_shrink_to_fit`: art->leaves = [1,2,x] art->first_free[leaf_type] = 0 After `art_shrink_to_fit`: art->leaves = [x,2,3] art->first_free[leaf_type] = 1 --- src/art/art.c | 166 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 108 insertions(+), 58 deletions(-) diff --git a/src/art/art.c b/src/art/art.c index 0d00a3795..18933f9a0 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -181,6 +181,11 @@ static art_node_t *art_deref(const art_t *art, art_ref_t ref) { } } +static inline art_node_t *art_get_node(const art_t *art, size_t index, + art_typecode_t typecode) { + return art_deref(art, art_to_ref(index, typecode)); +} + static inline uint64_t art_get_index(const art_t *art, const art_node_t *node, art_typecode_t typecode) { switch (typecode) { @@ -1097,33 +1102,58 @@ static art_ref_t art_node_insert_leaf(art_t *art, art_inner_node_t *node, } } -// Marks the node as unoccopied and frees its index. -static void art_node_free(art_t *art, art_node_t *node, - art_typecode_t typecode) { - size_t index = art_get_index(art, node, typecode); - size_t next_free = art->first_free[typecode]; - art->first_free[typecode] = index; +static size_t art_node_get_next_free(const art_t *art, art_ref_t ref) { + art_node_t *node = art_deref(art, ref); + art_typecode_t typecode = art_ref_typecode(ref); + switch (typecode) { + case CROARING_ART_LEAF_TYPE: + return ((art_leaf_t *)node)->next_free; + case CROARING_ART_NODE4_TYPE: + return ((art_node4_t *)node)->next_free; + case CROARING_ART_NODE16_TYPE: + return ((art_node16_t *)node)->next_free; + case CROARING_ART_NODE48_TYPE: + return ((art_node48_t *)node)->next_free; + case CROARING_ART_NODE256_TYPE: + return ((art_node256_t *)node)->next_free; + default: + assert(false); + return 0; + } +} + +static void art_node_set_next_free(art_node_t *node, art_typecode_t typecode, + size_t next_free) { switch (typecode) { case CROARING_ART_LEAF_TYPE: - art_leaf_clear((art_leaf_t *)node, next_free); + ((art_leaf_t *)node)->next_free = next_free; break; case CROARING_ART_NODE4_TYPE: - art_node4_clear((art_node4_t *)node, next_free); + ((art_node4_t *)node)->next_free = next_free; break; case CROARING_ART_NODE16_TYPE: - art_node16_clear((art_node16_t *)node, next_free); + ((art_node16_t *)node)->next_free = next_free; break; case CROARING_ART_NODE48_TYPE: - art_node48_clear((art_node48_t *)node, next_free); + ((art_node48_t *)node)->next_free = next_free; break; case CROARING_ART_NODE256_TYPE: - art_node256_clear((art_node256_t *)node, next_free); + ((art_node256_t *)node)->next_free = next_free; break; default: assert(false); } } +// Marks the node as unoccopied and frees its index. +static void art_node_free(art_t *art, art_node_t *node, + art_typecode_t typecode) { + size_t index = art_get_index(art, node, typecode); + size_t next_free = art->first_free[typecode]; + art_node_set_next_free(node, typecode, next_free); + art->first_free[typecode] = index; +} + // Returns the next child in key order, or NULL if called on a leaf. // Provided index may be in the range [-1, 255]. static art_indexed_child_t art_node_next_child(const art_node_t *node, @@ -1295,36 +1325,24 @@ static void art_extend(art_t *art, art_typecode_t typecode) { art->leaves = roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); memset(art->leaves + capacity, 0, increase * sizeof(art_leaf_t)); - for (size_t i = capacity; i < new_capacity; ++i) { - art_leaf_clear(art->leaves + i, i + 1); - } break; } case CROARING_ART_NODE4_TYPE: { art->node4s = roaring_realloc(art->node4s, new_capacity * sizeof(art_node4_t)); memset(art->node4s + capacity, 0, increase * sizeof(art_node4_t)); - for (size_t i = capacity; i < new_capacity; ++i) { - art_node4_clear(art->node4s + i, i + 1); - } break; } case CROARING_ART_NODE16_TYPE: { art->node16s = roaring_realloc(art->node16s, new_capacity * sizeof(art_node16_t)); memset(art->node16s + capacity, 0, increase * sizeof(art_node16_t)); - for (size_t i = capacity; i < new_capacity; ++i) { - art_node16_clear(art->node16s + i, i + 1); - } break; } case CROARING_ART_NODE48_TYPE: { art->node48s = roaring_realloc(art->node48s, new_capacity * sizeof(art_node48_t)); memset(art->node48s + capacity, 0, increase * sizeof(art_node48_t)); - for (size_t i = capacity; i < new_capacity; ++i) { - art_node48_clear(art->node48s + i, i + 1); - } break; } case CROARING_ART_NODE256_TYPE: { @@ -1332,14 +1350,14 @@ static void art_extend(art_t *art, art_typecode_t typecode) { art->node256s, new_capacity * sizeof(art_node256_t)); memset(art->node256s + capacity, 0, increase * sizeof(art_node256_t)); - for (size_t i = capacity; i < new_capacity; ++i) { - art_node256_clear(art->node256s + i, i + 1); - } break; } default: assert(false); } + for (size_t i = capacity; i < new_capacity; ++i) { + art_node_set_next_free(art_get_node(art, i, typecode), typecode, i + 1); + } } /** @@ -1348,26 +1366,7 @@ static void art_extend(art_t *art, art_typecode_t typecode) { */ static size_t art_next_free(const art_t *art, art_typecode_t typecode) { size_t index = art->first_free[typecode]; - switch (typecode) { - case CROARING_ART_LEAF_TYPE: { - return art->leaves[index].next_free; - } - case CROARING_ART_NODE4_TYPE: { - return art->node4s[index].next_free; - } - case CROARING_ART_NODE16_TYPE: { - return art->node16s[index].next_free; - } - case CROARING_ART_NODE48_TYPE: { - return art->node48s[index].next_free; - } - case CROARING_ART_NODE256_TYPE: { - return art->node256s[index].next_free; - } - default: - assert(false); - return 0; - } + return art_node_get_next_free(art, art_to_ref(index, typecode)); } /** @@ -1682,7 +1681,8 @@ void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { /** * Moves the node at `ref` to the earliest free index before it (if any), - * returns the new ref. + * returns the new ref. Assumes `art->first_free[typecode]` points to the + * smallest free index. */ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { size_t idx = art_ref_index(ref); @@ -1694,38 +1694,29 @@ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { } size_t from = idx; size_t to = first_free; + size_t next_free = art_node_get_next_free(art, art_to_ref(to, typecode)); switch (typecode) { case CROARING_ART_LEAF_TYPE: { - size_t next_free = art->leaves[to].next_free; memcpy(art->leaves + to, art->leaves + from, sizeof(art_leaf_t)); - art_leaf_clear(&art->leaves[from], next_free); break; } case CROARING_ART_NODE4_TYPE: { - size_t next_free = art->node4s[to].next_free; memcpy(art->node4s + to, art->node4s + from, sizeof(art_node4_t)); - art_node4_clear(&art->node4s[from], next_free); break; } case CROARING_ART_NODE16_TYPE: { - size_t next_free = art->node16s[to].next_free; memcpy(art->node16s + to, art->node16s + from, sizeof(art_node16_t)); - art_node16_clear(&art->node16s[from], next_free); break; } case CROARING_ART_NODE48_TYPE: { - size_t next_free = art->node48s[to].next_free; memcpy(art->node48s + to, art->node48s + from, sizeof(art_node48_t)); - art_node48_clear(&art->node48s[from], next_free); break; } case CROARING_ART_NODE256_TYPE: { - size_t next_free = art->node256s[to].next_free; memcpy(art->node256s + to, art->node256s + from, sizeof(art_node256_t)); - art_node256_clear(&art->node256s[from], next_free); break; } default: { @@ -1733,10 +1724,68 @@ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { return 0; } } - art->first_free[typecode] = from; + + // With an integer representing the next free index, and an `x` representing + // an occupied index, assume the following scenario at the start of this + // function: + // nodes = [1,2,5,x,x] + // first_free = 0 + // + // We just moved a node from index 3 to 0: + // nodes = [x,2,5,?,x] + // + // We need to modify the free list so that the free indices are ascending. + // This can be done by traversing the list until we find a node with a + // `next_free` greater than the index we copied the node from, and inserting + // the new index in between. This leads to the following: + // nodes = [x,2,3,5,x] + // first_free = 1 + size_t initial_next_free = next_free; + size_t current = next_free; + while (next_free < from) { + current = next_free; + next_free = + art_node_get_next_free(art, art_to_ref(next_free, typecode)); + } + art_node_set_next_free(art_deref(art, ref), typecode, next_free); + if (current < from) { + art_node_set_next_free(art_get_node(art, current, typecode), typecode, + from); + } + art->first_free[typecode] = + from < initial_next_free ? from : initial_next_free; return art_to_ref(to, typecode); } +/** + * Sorts the free lists pointed to by art->first_free in ascending index order. + */ +static void art_sort_free_lists(art_t *art) { + for (art_typecode_t type = CROARING_ART_LEAF_TYPE; + type <= CROARING_ART_NODE256_TYPE; ++type) { + bool *free_indices = + (bool *)roaring_malloc(art->capacities[type] * sizeof(bool)); + memset(free_indices, false, art->capacities[type] * sizeof(bool)); + + for (size_t i = art->first_free[type]; i < art->capacities[type]; + i = art_node_get_next_free(art, art_to_ref(i, type))) { + free_indices[i] = true; + } + + size_t first_free = art->capacities[type]; + for (size_t i = art->capacities[type]; i > 0; --i) { + size_t index = i - 1; + if (free_indices[index]) { + art_node_set_next_free(art_get_node(art, index, type), type, + first_free); + first_free = index; + } + } + art->first_free[type] = first_free; + roaring_free(free_indices); + } +} + /** * Shrinks all node arrays to `first_free`. Assumes all indices after * `first_free` are unused. @@ -1872,6 +1921,7 @@ size_t art_shrink_to_fit(art_t *art) { return 0; } if (art->root != CROARING_ART_NULL_REF) { + art_sort_free_lists(art); art->root = art_move_node_to_shrink(art, art->root); art_shrink_at(art, art->root); } From 145616c3f054fe78dea99cb7014014b94883a65a Mon Sep 17 00:00:00 2001 From: Soerian Date: Sun, 26 Jan 2025 19:55:58 +0000 Subject: [PATCH 10/16] Minor cleanups to ART and r64 internals --- src/art/art.c | 32 +++++++++++++------------------- src/roaring64.c | 2 -- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/src/art/art.c b/src/art/art.c index 18933f9a0..fe250adf8 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -37,19 +37,15 @@ namespace internal { #endif typedef uint8_t art_typecode_t; - -// All node types should count as unoccupied if zeroed with memset. - typedef void art_node_t; typedef struct art_leaf_s { - bool occupied; union { struct { art_key_chunk_t key[ART_KEY_BYTES]; art_val_t val; }; - size_t next_free; // Used if !occupied. + size_t next_free; }; } art_leaf_t; @@ -65,27 +61,27 @@ typedef struct art_inner_node_s { // Node4: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node4_s { - art_inner_node_t base; - uint8_t count; union { struct { + art_inner_node_t base; + uint8_t count; uint8_t keys[4]; art_ref_t children[4]; }; - size_t next_free; // Used if count == 0. + size_t next_free; }; } art_node4_t; // Node16: key[i] corresponds with children[i]. Keys are sorted. typedef struct art_node16_s { - art_inner_node_t base; - uint8_t count; union { struct { + art_inner_node_t base; + uint8_t count; uint8_t keys[16]; art_ref_t children[16]; }; - size_t next_free; // Used if count == 0. + size_t next_free; }; } art_node16_t; @@ -93,10 +89,10 @@ typedef struct art_node16_s { // CROARING_ART_NODE48_EMPTY_VAL. Keys are naturally sorted due to direct // indexing. typedef struct art_node48_s { - art_inner_node_t base; - uint8_t count; union { struct { + art_inner_node_t base; + uint8_t count; // Bitset where the ith bit is set if children[i] is available // Because there are at most 48 children, only the bottom 48 bits // are used. @@ -104,20 +100,20 @@ typedef struct art_node48_s { uint8_t keys[256]; art_ref_t children[48]; }; - size_t next_free; // Used if count == 0. + size_t next_free; }; } art_node48_t; // Node256: children[i] is directly indexed by key chunk. A child is present if // children[i] != NULL. typedef struct art_node256_s { - art_inner_node_t base; - uint16_t count; union { struct { + art_inner_node_t base; + uint16_t count; art_ref_t children[256]; }; - size_t next_free; // Used if count == 0. + size_t next_free; }; } art_node256_t; @@ -235,14 +231,12 @@ static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], art_val_t val) { uint64_t index = art_allocate_index(art, CROARING_ART_LEAF_TYPE); art_leaf_t *leaf = art->leaves + index; - leaf->occupied = true; memcpy(leaf->key, key, ART_KEY_BYTES); leaf->val = val; return art_to_ref(index, CROARING_ART_LEAF_TYPE); } static inline void art_leaf_clear(art_leaf_t *leaf, art_ref_t next_free) { - leaf->occupied = false; leaf->next_free = next_free; } diff --git a/src/roaring64.c b/src/roaring64.c index 789bbf564..84ff500f5 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -179,8 +179,6 @@ static inline leaf_t copy_leaf_container(const roaring64_bitmap_t *r1, return add_container(r2, container, typecode); } -static inline void free_leaf(leaf_t *leaf) { roaring_free(leaf); } - static inline int compare_high48(art_key_chunk_t key1[], art_key_chunk_t key2[]) { return art_compare_keys(key1, key2); From 62d2c1cc6a5a34e3b107444d7397905d960dee85 Mon Sep 17 00:00:00 2001 From: Soerian Date: Sat, 8 Feb 2025 19:36:28 +0000 Subject: [PATCH 11/16] Replace size_t with uint64_t where applicable Also replace malloc+memset with calloc. --- include/roaring/art/art.h | 4 +- src/art/art.c | 81 +++++++++++++++++++-------------------- 2 files changed, 42 insertions(+), 43 deletions(-) diff --git a/include/roaring/art/art.h b/include/roaring/art/art.h index 7a9e4eb25..362c32fc8 100644 --- a/include/roaring/art/art.h +++ b/include/roaring/art/art.h @@ -58,8 +58,8 @@ typedef struct art_s { // Indexed by node typecode, thus 1 larger than it needs to be for // convenience. `first_free` indicates the index where the first free node // lives, which may be equal to the capacity. - size_t first_free[6]; - size_t capacities[6]; + uint64_t first_free[6]; + uint64_t capacities[6]; art_leaf_t *leaves; art_node4_t *node4s; diff --git a/src/art/art.c b/src/art/art.c index fe250adf8..686b4e8b0 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -45,7 +45,7 @@ typedef struct art_leaf_s { art_key_chunk_t key[ART_KEY_BYTES]; art_val_t val; }; - size_t next_free; + uint64_t next_free; }; } art_leaf_t; @@ -68,7 +68,7 @@ typedef struct art_node4_s { uint8_t keys[4]; art_ref_t children[4]; }; - size_t next_free; + uint64_t next_free; }; } art_node4_t; @@ -81,7 +81,7 @@ typedef struct art_node16_s { uint8_t keys[16]; art_ref_t children[16]; }; - size_t next_free; + uint64_t next_free; }; } art_node16_t; @@ -100,7 +100,7 @@ typedef struct art_node48_s { uint8_t keys[256]; art_ref_t children[48]; }; - size_t next_free; + uint64_t next_free; }; } art_node48_t; @@ -113,7 +113,7 @@ typedef struct art_node256_s { uint16_t count; art_ref_t children[256]; }; - size_t next_free; + uint64_t next_free; }; } art_node256_t; @@ -177,7 +177,7 @@ static art_node_t *art_deref(const art_t *art, art_ref_t ref) { } } -static inline art_node_t *art_get_node(const art_t *art, size_t index, +static inline art_node_t *art_get_node(const art_t *art, uint64_t index, art_typecode_t typecode) { return art_deref(art, art_to_ref(index, typecode)); } @@ -223,7 +223,7 @@ static inline void art_init_inner_node(art_inner_node_t *node, static void art_node_free(art_t *art, art_node_t *node, art_typecode_t typecode); -static size_t art_allocate_index(art_t *art, art_typecode_t typecode); +static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode); // ===================== Start of node-specific functions ====================== @@ -1096,7 +1096,7 @@ static art_ref_t art_node_insert_leaf(art_t *art, art_inner_node_t *node, } } -static size_t art_node_get_next_free(const art_t *art, art_ref_t ref) { +static uint64_t art_node_get_next_free(const art_t *art, art_ref_t ref) { art_node_t *node = art_deref(art, ref); art_typecode_t typecode = art_ref_typecode(ref); switch (typecode) { @@ -1117,7 +1117,7 @@ static size_t art_node_get_next_free(const art_t *art, art_ref_t ref) { } static void art_node_set_next_free(art_node_t *node, art_typecode_t typecode, - size_t next_free) { + uint64_t next_free) { switch (typecode) { case CROARING_ART_LEAF_TYPE: ((art_leaf_t *)node)->next_free = next_free; @@ -1142,8 +1142,8 @@ static void art_node_set_next_free(art_node_t *node, art_typecode_t typecode, // Marks the node as unoccopied and frees its index. static void art_node_free(art_t *art, art_node_t *node, art_typecode_t typecode) { - size_t index = art_get_index(art, node, typecode); - size_t next_free = art->first_free[typecode]; + uint64_t index = art_get_index(art, node, typecode); + uint64_t next_free = art->first_free[typecode]; art_node_set_next_free(node, typecode, next_free); art->first_free[typecode] = index; } @@ -1299,12 +1299,12 @@ static uint8_t art_common_prefix(const art_key_chunk_t key1[], * (first_free == capacity). */ static void art_extend(art_t *art, art_typecode_t typecode) { - size_t size = art->first_free[typecode]; - size_t capacity = art->capacities[typecode]; + uint64_t size = art->first_free[typecode]; + uint64_t capacity = art->capacities[typecode]; if (size < capacity) { return; } - size_t new_capacity; + uint64_t new_capacity; if (capacity == 0) { new_capacity = 2; } else if (capacity < 1024) { @@ -1313,7 +1313,7 @@ static void art_extend(art_t *art, art_typecode_t typecode) { new_capacity = 5 * capacity / 4; } art->capacities[typecode] = new_capacity; - size_t increase = new_capacity - capacity; + uint64_t increase = new_capacity - capacity; switch (typecode) { case CROARING_ART_LEAF_TYPE: { art->leaves = @@ -1349,7 +1349,7 @@ static void art_extend(art_t *art, art_typecode_t typecode) { default: assert(false); } - for (size_t i = capacity; i < new_capacity; ++i) { + for (uint64_t i = capacity; i < new_capacity; ++i) { art_node_set_next_free(art_get_node(art, i, typecode), typecode, i + 1); } } @@ -1358,8 +1358,8 @@ static void art_extend(art_t *art, art_typecode_t typecode) { * Returns the next free index for the given typecode, may be equal to the * capacity of the array. */ -static size_t art_next_free(const art_t *art, art_typecode_t typecode) { - size_t index = art->first_free[typecode]; +static uint64_t art_next_free(const art_t *art, art_typecode_t typecode) { + uint64_t index = art->first_free[typecode]; return art_node_get_next_free(art, art_to_ref(index, typecode)); } @@ -1367,8 +1367,8 @@ static size_t art_next_free(const art_t *art, art_typecode_t typecode) { * Marks an index for the given typecode as used, expanding the relevant node * array if necessary. */ -static size_t art_allocate_index(art_t *art, art_typecode_t typecode) { - size_t first_free = art->first_free[typecode]; +static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode) { + uint64_t first_free = art->first_free[typecode]; if (first_free == art->capacities[typecode]) { art_extend(art, typecode); art->first_free[typecode]++; @@ -1679,16 +1679,16 @@ void art_node_printf(const art_t *art, art_ref_t ref, uint8_t depth) { * smallest free index. */ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { - size_t idx = art_ref_index(ref); + uint64_t idx = art_ref_index(ref); art_typecode_t typecode = art_ref_typecode(ref); - size_t first_free = art->first_free[typecode]; + uint64_t first_free = art->first_free[typecode]; assert(idx != first_free); if (idx < first_free) { return ref; } - size_t from = idx; - size_t to = first_free; - size_t next_free = art_node_get_next_free(art, art_to_ref(to, typecode)); + uint64_t from = idx; + uint64_t to = first_free; + uint64_t next_free = art_node_get_next_free(art, art_to_ref(to, typecode)); switch (typecode) { case CROARING_ART_LEAF_TYPE: { memcpy(art->leaves + to, art->leaves + from, sizeof(art_leaf_t)); @@ -1734,8 +1734,8 @@ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { // the new index in between. This leads to the following: // nodes = [x,2,3,5,x] // first_free = 1 - size_t initial_next_free = next_free; - size_t current = next_free; + uint64_t initial_next_free = next_free; + uint64_t current = next_free; while (next_free < from) { current = next_free; next_free = @@ -1758,17 +1758,16 @@ static void art_sort_free_lists(art_t *art) { for (art_typecode_t type = CROARING_ART_LEAF_TYPE; type <= CROARING_ART_NODE256_TYPE; ++type) { bool *free_indices = - (bool *)roaring_malloc(art->capacities[type] * sizeof(bool)); - memset(free_indices, false, art->capacities[type] * sizeof(bool)); + (bool *)roaring_calloc(art->capacities[type], sizeof(bool)); - for (size_t i = art->first_free[type]; i < art->capacities[type]; + for (uint64_t i = art->first_free[type]; i < art->capacities[type]; i = art_node_get_next_free(art, art_to_ref(i, type))) { free_indices[i] = true; } - size_t first_free = art->capacities[type]; - for (size_t i = art->capacities[type]; i > 0; --i) { - size_t index = i - 1; + uint64_t first_free = art->capacities[type]; + for (uint64_t i = art->capacities[type]; i > 0; --i) { + uint64_t index = i - 1; if (free_indices[index]) { art_node_set_next_free(art_get_node(art, index, type), type, first_free); @@ -1788,7 +1787,7 @@ static size_t art_shrink_node_arrays(art_t *art) { size_t freed = 0; if (art->first_free[CROARING_ART_LEAF_TYPE] < art->capacities[CROARING_ART_LEAF_TYPE]) { - size_t new_capacity = art->first_free[CROARING_ART_LEAF_TYPE]; + uint64_t new_capacity = art->first_free[CROARING_ART_LEAF_TYPE]; art->leaves = roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * @@ -1797,7 +1796,7 @@ static size_t art_shrink_node_arrays(art_t *art) { } if (art->first_free[CROARING_ART_NODE4_TYPE] < art->capacities[CROARING_ART_NODE4_TYPE]) { - size_t new_capacity = art->first_free[CROARING_ART_NODE4_TYPE]; + uint64_t new_capacity = art->first_free[CROARING_ART_NODE4_TYPE]; art->node4s = roaring_realloc(art->node4s, new_capacity * sizeof(art_node4_t)); freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * @@ -1806,7 +1805,7 @@ static size_t art_shrink_node_arrays(art_t *art) { } if (art->first_free[CROARING_ART_NODE16_TYPE] < art->capacities[CROARING_ART_NODE16_TYPE]) { - size_t new_capacity = art->first_free[CROARING_ART_NODE16_TYPE]; + uint64_t new_capacity = art->first_free[CROARING_ART_NODE16_TYPE]; art->node16s = roaring_realloc(art->node16s, new_capacity * sizeof(art_node16_t)); freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * @@ -1815,7 +1814,7 @@ static size_t art_shrink_node_arrays(art_t *art) { } if (art->first_free[CROARING_ART_NODE48_TYPE] < art->capacities[CROARING_ART_NODE48_TYPE]) { - size_t new_capacity = art->first_free[CROARING_ART_NODE48_TYPE]; + uint64_t new_capacity = art->first_free[CROARING_ART_NODE48_TYPE]; art->node48s = roaring_realloc(art->node48s, new_capacity * sizeof(art_node48_t)); freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * @@ -1824,7 +1823,7 @@ static size_t art_shrink_node_arrays(art_t *art) { } if (art->first_free[CROARING_ART_NODE256_TYPE] < art->capacities[CROARING_ART_NODE256_TYPE]) { - size_t new_capacity = art->first_free[CROARING_ART_NODE256_TYPE]; + uint64_t new_capacity = art->first_free[CROARING_ART_NODE256_TYPE]; art->node256s = roaring_realloc(art->node256s, new_capacity * sizeof(art_node256_t)); freed += (art->capacities[CROARING_ART_NODE256_TYPE] - new_capacity) * @@ -2394,9 +2393,9 @@ bool art_internal_validate(const art_t *art, const char **reason, }; for (art_typecode_t type = CROARING_ART_LEAF_TYPE; type <= CROARING_ART_NODE256_TYPE; ++type) { - size_t capacity = art->capacities[type]; - for (size_t i = 0; i < capacity; ++i) { - size_t first_free = art->first_free[type]; + uint64_t capacity = art->capacities[type]; + for (uint64_t i = 0; i < capacity; ++i) { + uint64_t first_free = art->first_free[type]; if (first_free > capacity) { return art_validate_fail(&validator, "first_free > capacity"); } From ba095a6878e526c37af452937fd3468b5c3232f3 Mon Sep 17 00:00:00 2001 From: Soerian Date: Mon, 17 Feb 2025 21:56:14 +0000 Subject: [PATCH 12/16] Use a generic pointer array for ART nodes This, combined with a static array of node type sizes, allows us to generically manipulate the nodes. --- include/roaring/art/art.h | 15 +- src/art/art.c | 304 ++++++++++---------------------------- 2 files changed, 79 insertions(+), 240 deletions(-) diff --git a/include/roaring/art/art.h b/include/roaring/art/art.h index 362c32fc8..d7a71a47c 100644 --- a/include/roaring/art/art.h +++ b/include/roaring/art/art.h @@ -39,11 +39,7 @@ typedef uint8_t art_key_chunk_t; // CROARING_ART_NULL_REF when pointing to a non-existent node. typedef uint64_t art_ref_t; -typedef struct art_leaf_s art_leaf_t; -typedef struct art_node4_s art_node4_t; -typedef struct art_node16_s art_node16_t; -typedef struct art_node48_s art_node48_t; -typedef struct art_node256_s art_node256_t; +typedef void art_node_t; /** * The ART is empty when root is a null ref. @@ -55,17 +51,12 @@ typedef struct art_node256_s art_node256_t; typedef struct art_s { art_ref_t root; - // Indexed by node typecode, thus 1 larger than it needs to be for + // Indexed by node typecode, thus 1 larger than they need to be for // convenience. `first_free` indicates the index where the first free node // lives, which may be equal to the capacity. uint64_t first_free[6]; uint64_t capacities[6]; - - art_leaf_t *leaves; - art_node4_t *node4s; - art_node16_t *node16s; - art_node48_t *node48s; - art_node256_t *node256s; + art_node_t *nodes[6]; } art_t; typedef uint64_t art_val_t; diff --git a/src/art/art.c b/src/art/art.c index 686b4e8b0..b984380d8 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -15,6 +15,9 @@ #define CROARING_ART_NODE48_TYPE 4 #define CROARING_ART_NODE256_TYPE 5 +#define CROARING_ART_MIN_TYPE CROARING_ART_LEAF_TYPE +#define CROARING_ART_MAX_TYPE CROARING_ART_NODE256_TYPE + // Node48 placeholder value to indicate no child is present at this key index. #define CROARING_ART_NODE48_EMPTY_VAL 48 #define CROARING_NODE48_AVAILABLE_CHILDREN_MASK ((UINT64_C(1) << 48) - 1) @@ -37,7 +40,6 @@ namespace internal { #endif typedef uint8_t art_typecode_t; -typedef void art_node_t; typedef struct art_leaf_s { union { @@ -117,6 +119,16 @@ typedef struct art_node256_s { }; } art_node256_t; +// Size of each node type, indexed by typecode for convenience. +static const size_t ART_NODE_SIZES[] = { + 0, + sizeof(art_leaf_t), + sizeof(art_node4_t), + sizeof(art_node16_t), + sizeof(art_node48_t), + sizeof(art_node256_t), +}; + // Helper struct to refer to a child within a node at a specific index. typedef struct art_indexed_child_s { art_ref_t child; @@ -159,22 +171,9 @@ static inline art_typecode_t art_ref_typecode(art_ref_t ref) { */ static art_node_t *art_deref(const art_t *art, art_ref_t ref) { assert(ref != CROARING_ART_NULL_REF); - uint64_t index = art_ref_index(ref); - switch (art_ref_typecode(ref)) { - case CROARING_ART_LEAF_TYPE: - return (art_node_t *)&art->leaves[index]; - case CROARING_ART_NODE4_TYPE: - return (art_node_t *)&art->node4s[index]; - case CROARING_ART_NODE16_TYPE: - return (art_node_t *)&art->node16s[index]; - case CROARING_ART_NODE48_TYPE: - return (art_node_t *)&art->node48s[index]; - case CROARING_ART_NODE256_TYPE: - return (art_node_t *)&art->node256s[index]; - default: - assert(false); - return NULL; - } + art_typecode_t typecode = art_ref_typecode(ref); + return (art_node_t *)((char *)art->nodes[typecode] + + art_ref_index(ref) * ART_NODE_SIZES[typecode]); } static inline art_node_t *art_get_node(const art_t *art, uint64_t index, @@ -184,17 +183,18 @@ static inline art_node_t *art_get_node(const art_t *art, uint64_t index, static inline uint64_t art_get_index(const art_t *art, const art_node_t *node, art_typecode_t typecode) { + art_node_t *nodes = art->nodes[typecode]; switch (typecode) { case CROARING_ART_LEAF_TYPE: - return (art_leaf_t *)node - art->leaves; + return (art_leaf_t *)node - (art_leaf_t *)nodes; case CROARING_ART_NODE4_TYPE: - return (art_node4_t *)node - art->node4s; + return (art_node4_t *)node - (art_node4_t *)nodes; case CROARING_ART_NODE16_TYPE: - return (art_node16_t *)node - art->node16s; + return (art_node16_t *)node - (art_node16_t *)nodes; case CROARING_ART_NODE48_TYPE: - return (art_node48_t *)node - art->node48s; + return (art_node48_t *)node - (art_node48_t *)nodes; case CROARING_ART_NODE256_TYPE: - return (art_node256_t *)node - art->node256s; + return (art_node256_t *)node - (art_node256_t *)nodes; default: assert(false); return 0; @@ -230,7 +230,8 @@ static uint64_t art_allocate_index(art_t *art, art_typecode_t typecode); static art_ref_t art_leaf_create(art_t *art, const art_key_chunk_t key[], art_val_t val) { uint64_t index = art_allocate_index(art, CROARING_ART_LEAF_TYPE); - art_leaf_t *leaf = art->leaves + index; + art_leaf_t *leaf = + ((art_leaf_t *)art->nodes[CROARING_ART_LEAF_TYPE]) + index; memcpy(leaf->key, key, ART_KEY_BYTES); leaf->val = val; return art_to_ref(index, CROARING_ART_LEAF_TYPE); @@ -264,7 +265,8 @@ static art_ref_t art_node256_insert(art_t *art, art_node256_t *node, static art_node4_t *art_node4_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size) { uint64_t index = art_allocate_index(art, CROARING_ART_NODE4_TYPE); - art_node4_t *node = art->node4s + index; + art_node4_t *node = + ((art_node4_t *)art->nodes[CROARING_ART_NODE4_TYPE]) + index; art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; return node; @@ -470,7 +472,8 @@ static art_node16_t *art_node16_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size) { uint64_t index = art_allocate_index(art, CROARING_ART_NODE16_TYPE); - art_node16_t *node = art->node16s + index; + art_node16_t *node = + ((art_node16_t *)art->nodes[CROARING_ART_NODE16_TYPE]) + index; art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; return node; @@ -654,7 +657,8 @@ static art_node48_t *art_node48_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size) { uint64_t index = art_allocate_index(art, CROARING_ART_NODE48_TYPE); - art_node48_t *node = art->node48s + index; + art_node48_t *node = + ((art_node48_t *)art->nodes[CROARING_ART_NODE48_TYPE]) + index; art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; node->available_children = CROARING_NODE48_AVAILABLE_CHILDREN_MASK; @@ -862,7 +866,8 @@ static art_node256_t *art_node256_create(art_t *art, const art_key_chunk_t prefix[], uint8_t prefix_size) { uint64_t index = art_allocate_index(art, CROARING_ART_NODE256_TYPE); - art_node256_t *node = art->node256s + index; + art_node256_t *node = + ((art_node256_t *)art->nodes[CROARING_ART_NODE256_TYPE]) + index; art_init_inner_node(&node->base, prefix, prefix_size); node->count = 0; for (size_t i = 0; i < 256; ++i) { @@ -1313,42 +1318,11 @@ static void art_extend(art_t *art, art_typecode_t typecode) { new_capacity = 5 * capacity / 4; } art->capacities[typecode] = new_capacity; + art->nodes[typecode] = roaring_realloc( + art->nodes[typecode], new_capacity * ART_NODE_SIZES[typecode]); uint64_t increase = new_capacity - capacity; - switch (typecode) { - case CROARING_ART_LEAF_TYPE: { - art->leaves = - roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); - memset(art->leaves + capacity, 0, increase * sizeof(art_leaf_t)); - break; - } - case CROARING_ART_NODE4_TYPE: { - art->node4s = roaring_realloc(art->node4s, - new_capacity * sizeof(art_node4_t)); - memset(art->node4s + capacity, 0, increase * sizeof(art_node4_t)); - break; - } - case CROARING_ART_NODE16_TYPE: { - art->node16s = roaring_realloc(art->node16s, - new_capacity * sizeof(art_node16_t)); - memset(art->node16s + capacity, 0, increase * sizeof(art_node16_t)); - break; - } - case CROARING_ART_NODE48_TYPE: { - art->node48s = roaring_realloc(art->node48s, - new_capacity * sizeof(art_node48_t)); - memset(art->node48s + capacity, 0, increase * sizeof(art_node48_t)); - break; - } - case CROARING_ART_NODE256_TYPE: { - art->node256s = roaring_realloc( - art->node256s, new_capacity * sizeof(art_node256_t)); - memset(art->node256s + capacity, 0, - increase * sizeof(art_node256_t)); - break; - } - default: - assert(false); - } + memset(art_get_node(art, capacity, typecode), 0, + increase * ART_NODE_SIZES[typecode]); for (uint64_t i = capacity; i < new_capacity; ++i) { art_node_set_next_free(art_get_node(art, i, typecode), typecode, i + 1); } @@ -1689,35 +1663,8 @@ static art_ref_t art_move_node_to_shrink(art_t *art, art_ref_t ref) { uint64_t from = idx; uint64_t to = first_free; uint64_t next_free = art_node_get_next_free(art, art_to_ref(to, typecode)); - switch (typecode) { - case CROARING_ART_LEAF_TYPE: { - memcpy(art->leaves + to, art->leaves + from, sizeof(art_leaf_t)); - break; - } - case CROARING_ART_NODE4_TYPE: { - memcpy(art->node4s + to, art->node4s + from, sizeof(art_node4_t)); - break; - } - case CROARING_ART_NODE16_TYPE: { - memcpy(art->node16s + to, art->node16s + from, - sizeof(art_node16_t)); - break; - } - case CROARING_ART_NODE48_TYPE: { - memcpy(art->node48s + to, art->node48s + from, - sizeof(art_node48_t)); - break; - } - case CROARING_ART_NODE256_TYPE: { - memcpy(art->node256s + to, art->node256s + from, - sizeof(art_node256_t)); - break; - } - default: { - assert(false); - return 0; - } - } + memcpy(art_get_node(art, to, typecode), art_get_node(art, from, typecode), + ART_NODE_SIZES[typecode]); // With an integer representing the next free index, and an `x` representing // an occupied index, assume the following scenario at the start of this @@ -1785,50 +1732,15 @@ static void art_sort_free_lists(art_t *art) { */ static size_t art_shrink_node_arrays(art_t *art) { size_t freed = 0; - if (art->first_free[CROARING_ART_LEAF_TYPE] < - art->capacities[CROARING_ART_LEAF_TYPE]) { - uint64_t new_capacity = art->first_free[CROARING_ART_LEAF_TYPE]; - art->leaves = - roaring_realloc(art->leaves, new_capacity * sizeof(art_leaf_t)); - freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * - sizeof(art_leaf_t); - art->capacities[CROARING_ART_LEAF_TYPE] = new_capacity; - } - if (art->first_free[CROARING_ART_NODE4_TYPE] < - art->capacities[CROARING_ART_NODE4_TYPE]) { - uint64_t new_capacity = art->first_free[CROARING_ART_NODE4_TYPE]; - art->node4s = - roaring_realloc(art->node4s, new_capacity * sizeof(art_node4_t)); - freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * - sizeof(art_node4_t); - art->capacities[CROARING_ART_NODE4_TYPE] = new_capacity; - } - if (art->first_free[CROARING_ART_NODE16_TYPE] < - art->capacities[CROARING_ART_NODE16_TYPE]) { - uint64_t new_capacity = art->first_free[CROARING_ART_NODE16_TYPE]; - art->node16s = - roaring_realloc(art->node16s, new_capacity * sizeof(art_node16_t)); - freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * - sizeof(art_node16_t); - art->capacities[CROARING_ART_NODE16_TYPE] = new_capacity; - } - if (art->first_free[CROARING_ART_NODE48_TYPE] < - art->capacities[CROARING_ART_NODE48_TYPE]) { - uint64_t new_capacity = art->first_free[CROARING_ART_NODE48_TYPE]; - art->node48s = - roaring_realloc(art->node48s, new_capacity * sizeof(art_node48_t)); - freed += (art->capacities[CROARING_ART_LEAF_TYPE] - new_capacity) * - sizeof(art_node48_t); - art->capacities[CROARING_ART_NODE48_TYPE] = new_capacity; - } - if (art->first_free[CROARING_ART_NODE256_TYPE] < - art->capacities[CROARING_ART_NODE256_TYPE]) { - uint64_t new_capacity = art->first_free[CROARING_ART_NODE256_TYPE]; - art->node256s = roaring_realloc(art->node256s, - new_capacity * sizeof(art_node256_t)); - freed += (art->capacities[CROARING_ART_NODE256_TYPE] - new_capacity) * - sizeof(art_node256_t); - art->capacities[CROARING_ART_NODE256_TYPE] = new_capacity; + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->first_free[t] < art->capacities[t]) { + uint64_t new_capacity = art->first_free[t]; + art->nodes[t] = roaring_realloc(art->nodes[t], + new_capacity * ART_NODE_SIZES[t]); + freed += (art->capacities[t] - new_capacity) * ART_NODE_SIZES[t]; + art->capacities[t] = new_capacity; + } } return freed; } @@ -1902,11 +1814,10 @@ void art_init_cleared(art_t *art) { art->root = CROARING_ART_NULL_REF; memset(art->first_free, 0, sizeof(art->first_free)); memset(art->capacities, 0, sizeof(art->capacities)); - art->leaves = NULL; - art->node4s = NULL; - art->node16s = NULL; - art->node48s = NULL; - art->node256s = NULL; + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + art->nodes[t] = NULL; + } } size_t art_shrink_to_fit(art_t *art) { @@ -1960,11 +1871,10 @@ bool art_is_empty(const art_t *art) { } void art_free(art_t *art) { - roaring_free(art->leaves); - roaring_free(art->node4s); - roaring_free(art->node16s); - roaring_free(art->node48s); - roaring_free(art->node256s); + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + roaring_free(art->nodes[t]); + } } void art_printf(const art_t *art) { @@ -2424,11 +2334,10 @@ size_t art_size_in_bytes(const art_t *art) { // Alignment for leaves. The rest of the nodes are aligned the same way. size += ((size + alignof(art_leaf_t) - 1) & ~(alignof(art_leaf_t) - 1)) - size; - size += art->capacities[CROARING_ART_LEAF_TYPE] * sizeof(art_leaf_t); - size += art->capacities[CROARING_ART_NODE4_TYPE] * sizeof(art_node4_t); - size += art->capacities[CROARING_ART_NODE16_TYPE] * sizeof(art_node16_t); - size += art->capacities[CROARING_ART_NODE48_TYPE] * sizeof(art_node48_t); - size += art->capacities[CROARING_ART_NODE256_TYPE] * sizeof(art_node256_t); + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + size += art->capacities[t] * ART_NODE_SIZES[t]; + } return size; } @@ -2455,35 +2364,13 @@ size_t art_serialize(const art_t *art, char *buf) { memset(buf, 0, align_bytes); buf += align_bytes; - if (art->capacities[CROARING_ART_LEAF_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_LEAF_TYPE] * sizeof(art_leaf_t); - memcpy(buf, art->leaves, size); - buf += size; - } - if (art->capacities[CROARING_ART_NODE4_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_NODE4_TYPE] * sizeof(art_node4_t); - memcpy(buf, art->node4s, size); - buf += size; - } - if (art->capacities[CROARING_ART_NODE16_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_NODE16_TYPE] * sizeof(art_node16_t); - memcpy(buf, art->node16s, size); - buf += size; - } - if (art->capacities[CROARING_ART_NODE48_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_NODE48_TYPE] * sizeof(art_node48_t); - memcpy(buf, art->node48s, size); - buf += size; - } - if (art->capacities[CROARING_ART_NODE256_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_NODE256_TYPE] * sizeof(art_node256_t); - memcpy(buf, art->node256s, size); - buf += size; + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->capacities[t] > 0) { + size_t size = art->capacities[t] * ART_NODE_SIZES[t]; + memcpy(buf, art->nodes[t], size); + buf += size; + } } return buf - initial_buf; @@ -2521,56 +2408,17 @@ size_t art_frozen_view(const char *buf, size_t maxbytes, art_t *art) { } maxbytes -= buf - before_align; - if (art->capacities[CROARING_ART_LEAF_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_LEAF_TYPE] * sizeof(art_leaf_t); - if (maxbytes < size) { - return 0; - } - art->leaves = (art_leaf_t *)buf; - buf += size; - maxbytes -= size; - } - if (art->capacities[CROARING_ART_NODE4_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_NODE4_TYPE] * sizeof(art_node4_t); - if (maxbytes < size) { - return 0; - } - art->node4s = (art_node4_t *)buf; - buf += size; - maxbytes -= size; - } - if (art->capacities[CROARING_ART_NODE16_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_NODE16_TYPE] * sizeof(art_node16_t); - if (maxbytes < size) { - return 0; - } - art->node16s = (art_node16_t *)buf; - buf += size; - maxbytes -= size; - } - if (art->capacities[CROARING_ART_NODE48_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_NODE48_TYPE] * sizeof(art_node48_t); - if (maxbytes < size) { - return 0; - } - art->node48s = (art_node48_t *)buf; - buf += size; - maxbytes -= size; - } - if (art->capacities[CROARING_ART_NODE256_TYPE] > 0) { - size_t size = - art->capacities[CROARING_ART_NODE256_TYPE] * sizeof(art_node256_t); - if (maxbytes < size) { - art_free(art); - return 0; + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->capacities[t] > 0) { + size_t size = art->capacities[t] * ART_NODE_SIZES[t]; + if (maxbytes < size) { + return 0; + } + art->nodes[t] = (char *)buf; + buf += size; + maxbytes -= size; } - art->node256s = (art_node256_t *)buf; - buf += size; - maxbytes -= size; } return buf - initial_buf; } From d37f26fe8c6b5c7080414dab37b3d8330e9bc871 Mon Sep 17 00:00:00 2001 From: Soerian Date: Tue, 18 Feb 2025 20:06:09 +0000 Subject: [PATCH 13/16] Correct outdated comment --- src/art/art.c | 3 --- src/roaring64.c | 3 +-- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/art/art.c b/src/art/art.c index b984380d8..de3d19393 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -1299,9 +1299,6 @@ static uint8_t art_common_prefix(const art_key_chunk_t key1[], /** * Extends the array of nodes of the given typecode. Invalidates pointers into * the array obtained by `art_deref`. - * - * Must only be called when the node array of the given type is "full" - * (first_free == capacity). */ static void art_extend(art_t *art, art_typecode_t typecode) { uint64_t size = art->first_free[typecode]; diff --git a/src/roaring64.c b/src/roaring64.c index 84ff500f5..16d722c2a 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -113,8 +113,7 @@ static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, } /** - * Extends the array of container pointers. Must only be called when the array - * is "full" (first_free == capacity). + * Extends the array of container pointers. */ static void extend_containers(roaring64_bitmap_t *r) { size_t size = r->first_free; From af87276b641fbec0f4db76f8ecb1b041002077a9 Mon Sep 17 00:00:00 2001 From: Soerian Date: Tue, 18 Feb 2025 20:11:27 +0000 Subject: [PATCH 14/16] Always try to shrink containers --- src/roaring64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/roaring64.c b/src/roaring64.c index 16d722c2a..7fa7d2e63 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -969,9 +969,6 @@ static inline bool is_shrunken(const roaring64_bitmap_t *r) { size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { size_t freed = art_shrink_to_fit(&r->art); - if (is_shrunken(r)) { - return freed; - } art_iterator_t it = art_init_iterator(&r->art, true); while (it.value != NULL) { leaf_t *leaf = (leaf_t *)it.value; @@ -980,6 +977,9 @@ size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { move_to_shrink(r, leaf); art_iterator_next(&it); } + if (is_shrunken(r)) { + return freed; + } size_t new_capacity = r->first_free; if (new_capacity < r->capacity) { r->containers = roaring_realloc(r->containers, From ba3942b0687255b54e53879223eaa146a3ec55f2 Mon Sep 17 00:00:00 2001 From: Soerian Date: Thu, 20 Feb 2025 21:11:18 +0000 Subject: [PATCH 15/16] Replace size_t with uint64_t where applicable in r64 --- src/roaring64.c | 84 +++++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/src/roaring64.c b/src/roaring64.c index 7fa7d2e63..be0ef27d3 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -37,8 +37,8 @@ namespace api { typedef struct roaring64_bitmap_s { art_t art; uint8_t flags; - size_t first_free; - size_t capacity; + uint64_t first_free; + uint64_t capacity; container_t **containers; } roaring64_bitmap_t; @@ -94,7 +94,7 @@ static inline leaf_t create_leaf(uint64_t container_index, uint8_t typecode) { static inline uint8_t get_typecode(leaf_t leaf) { return (uint8_t)leaf; } -static inline size_t get_index(leaf_t leaf) { return leaf >> 8; } +static inline uint64_t get_index(leaf_t leaf) { return leaf >> 8; } static inline container_t *get_container(const roaring64_bitmap_t *r, leaf_t leaf) { @@ -106,7 +106,7 @@ static inline container_t *get_container(const roaring64_bitmap_t *r, static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, container_t *container, uint8_t typecode) { - size_t index = get_index(*leaf); + uint64_t index = get_index(*leaf); r->containers[index] = container; *leaf = create_leaf(index, typecode); return *leaf; @@ -116,11 +116,11 @@ static inline leaf_t replace_container(roaring64_bitmap_t *r, leaf_t *leaf, * Extends the array of container pointers. */ static void extend_containers(roaring64_bitmap_t *r) { - size_t size = r->first_free; + uint64_t size = r->first_free; if (size < r->capacity) { return; } - size_t new_capacity; + uint64_t new_capacity; if (r->capacity == 0) { new_capacity = 2; } else if (r->capacity < 1024) { @@ -128,15 +128,15 @@ static void extend_containers(roaring64_bitmap_t *r) { } else { new_capacity = 5 * r->capacity / 4; } - size_t increase = new_capacity - r->capacity; + uint64_t increase = new_capacity - r->capacity; r->containers = roaring_realloc(r->containers, new_capacity * sizeof(container_t *)); memset(r->containers + r->capacity, 0, increase * sizeof(container_t *)); r->capacity = new_capacity; } -static size_t next_free_container_idx(const roaring64_bitmap_t *r) { - for (size_t i = r->first_free + 1; i < r->capacity; ++i) { +static uint64_t next_free_container_idx(const roaring64_bitmap_t *r) { + for (uint64_t i = r->first_free + 1; i < r->capacity; ++i) { if (r->containers[i] == NULL) { return i; } @@ -144,8 +144,8 @@ static size_t next_free_container_idx(const roaring64_bitmap_t *r) { return r->capacity; } -static size_t allocate_index(roaring64_bitmap_t *r) { - size_t first_free = r->first_free; +static uint64_t allocate_index(roaring64_bitmap_t *r) { + uint64_t first_free = r->first_free; if (first_free == r->capacity) { extend_containers(r); } @@ -155,13 +155,13 @@ static size_t allocate_index(roaring64_bitmap_t *r) { static leaf_t add_container(roaring64_bitmap_t *r, container_t *container, uint8_t typecode) { - size_t index = allocate_index(r); + uint64_t index = allocate_index(r); r->containers[index] = container; return create_leaf(index, typecode); } static void remove_container(roaring64_bitmap_t *r, leaf_t leaf) { - size_t index = get_index(leaf); + uint64_t index = get_index(leaf); r->containers[index] = NULL; if (index < r->first_free) { r->first_free = index; @@ -953,7 +953,7 @@ bool roaring64_bitmap_run_optimize(roaring64_bitmap_t *r) { } static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { - size_t idx = get_index(*leaf); + uint64_t idx = get_index(*leaf); if (idx < r->first_free) { return; } @@ -980,7 +980,7 @@ size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) { if (is_shrunken(r)) { return freed; } - size_t new_capacity = r->first_free; + uint64_t new_capacity = r->first_free; if (new_capacity < r->capacity) { r->containers = roaring_realloc(r->containers, new_capacity * sizeof(container_t *)); @@ -2216,7 +2216,7 @@ static inline size_t container_get_frozen_size(const container_t *c, } } -size_t align_size(size_t size, size_t alignment) { +uint64_t align_size(uint64_t size, uint64_t alignment) { return (size + alignment - 1) & ~(alignment - 1); } @@ -2225,18 +2225,19 @@ size_t roaring64_bitmap_frozen_size_in_bytes(const roaring64_bitmap_t *r) { return 0; } // Flags. - size_t size = sizeof(r->flags); + uint64_t size = sizeof(r->flags); // Container count. size += sizeof(r->capacity); // Container element counts. size += r->capacity * sizeof(uint16_t); // Total container sizes. - size += 3 * sizeof(size_t); + size += 3 * sizeof(uint64_t); // ART (8 byte aligned). size = align_size(size, 8); size += art_size_in_bytes(&r->art); - size_t total_sizes[4] = CROARING_ZERO_INITIALIZER; // Indexed by typecode. + uint64_t total_sizes[4] = + CROARING_ZERO_INITIALIZER; // Indexed by typecode. art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); while (it.value != NULL) { leaf_t leaf = (leaf_t)*it.value; @@ -2291,8 +2292,8 @@ static inline void container_frozen_serialize(const container_t *container, static inline char *pad_align(char *buf, const char *initial_buf, size_t alignment) { - size_t buf_size = buf - initial_buf; - size_t pad = align_size(buf_size, alignment) - buf_size; + uint64_t buf_size = buf - initial_buf; + uint64_t pad = align_size(buf_size, alignment) - buf_size; memset(buf, 0, pad); return buf + pad; } @@ -2316,7 +2317,8 @@ size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, buf += sizeof(r->capacity); // Container element counts. - size_t total_sizes[4] = CROARING_ZERO_INITIALIZER; // Indexed by typecode. + uint64_t total_sizes[4] = + CROARING_ZERO_INITIALIZER; // Indexed by typecode. art_iterator_t it = art_init_iterator((art_t *)&r->art, /*first=*/true); while (it.value != NULL) { leaf_t leaf = (leaf_t)*it.value; @@ -2333,12 +2335,12 @@ size_t roaring64_bitmap_frozen_serialize(const roaring64_bitmap_t *r, } // Total container sizes. - memcpy(buf, &(total_sizes[BITSET_CONTAINER_TYPE]), sizeof(size_t)); - buf += sizeof(size_t); - memcpy(buf, &(total_sizes[RUN_CONTAINER_TYPE]), sizeof(size_t)); - buf += sizeof(size_t); - memcpy(buf, &(total_sizes[ARRAY_CONTAINER_TYPE]), sizeof(size_t)); - buf += sizeof(size_t); + memcpy(buf, &(total_sizes[BITSET_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + memcpy(buf, &(total_sizes[RUN_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); + memcpy(buf, &(total_sizes[ARRAY_CONTAINER_TYPE]), sizeof(uint64_t)); + buf += sizeof(uint64_t); // ART. buf = pad_align(buf, initial_buf, 8); @@ -2452,20 +2454,20 @@ roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, maxbytes -= r->capacity * sizeof(uint16_t); // Total container sizes. - size_t total_sizes[4]; - if (maxbytes < sizeof(size_t) * 3) { + uint64_t total_sizes[4]; + if (maxbytes < sizeof(uint64_t) * 3) { roaring64_bitmap_free(r); return NULL; } - memcpy(&(total_sizes[BITSET_CONTAINER_TYPE]), buf, sizeof(size_t)); - buf += sizeof(size_t); - maxbytes -= sizeof(size_t); - memcpy(&(total_sizes[RUN_CONTAINER_TYPE]), buf, sizeof(size_t)); - buf += sizeof(size_t); - maxbytes -= sizeof(size_t); - memcpy(&(total_sizes[ARRAY_CONTAINER_TYPE]), buf, sizeof(size_t)); - buf += sizeof(size_t); - maxbytes -= sizeof(size_t); + memcpy(&(total_sizes[BITSET_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + memcpy(&(total_sizes[RUN_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); + memcpy(&(total_sizes[ARRAY_CONTAINER_TYPE]), buf, sizeof(uint64_t)); + buf += sizeof(uint64_t); + maxbytes -= sizeof(uint64_t); // ART (8 byte aligned). buf = CROARING_ALIGN_BUF(buf, 8); @@ -2488,7 +2490,7 @@ roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, buf = CROARING_ALIGN_BUF(buf, alignof(uint16_t)); const uint16_t *arrays = (const uint16_t *)buf; buf += total_sizes[ARRAY_CONTAINER_TYPE]; - if (maxbytes < (size_t)(buf - before_containers)) { + if (maxbytes < (uint64_t)(buf - before_containers)) { roaring64_bitmap_free(r); return NULL; } @@ -2506,7 +2508,7 @@ roaring64_bitmap_t *roaring64_bitmap_frozen_view(const char *buf, uint32_t elem_count = (uint32_t)(compressed_elem_count) + 1; // The container index is unrelated to the iteration order. - size_t index = get_index(leaf); + uint64_t index = get_index(leaf); r->containers[index] = container_frozen_view(typecode, elem_count, &bitsets, &arrays, &runs); From b7703da4a7daa07ccf7f3b124599ae30f478419a Mon Sep 17 00:00:00 2001 From: Soerian Date: Thu, 20 Feb 2025 21:18:52 +0000 Subject: [PATCH 16/16] Check if ART is shrunken when checking if r64 is shrunken --- include/roaring/art/art.h | 5 +++++ src/art/art.c | 23 ++++++++++------------- src/roaring64.c | 2 +- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/include/roaring/art/art.h b/include/roaring/art/art.h index d7a71a47c..16b1e5516 100644 --- a/include/roaring/art/art.h +++ b/include/roaring/art/art.h @@ -205,6 +205,11 @@ bool art_iterator_erase(art_iterator_t *iterator, art_val_t *erased_val); */ size_t art_shrink_to_fit(art_t *art); +/** + * Returns true if the ART has no unused elements. + */ +bool art_is_shrunken(const art_t *art); + /** * Returns the serialized size in bytes. * Requires `art_shrink_to_fit` to be called first. diff --git a/src/art/art.c b/src/art/art.c index de3d19393..8e54d4353 100644 --- a/src/art/art.c +++ b/src/art/art.c @@ -1794,19 +1794,6 @@ static void art_shrink_at(art_t *art, art_ref_t ref) { } } -static bool art_is_shrunken(const art_t *art) { - return art->first_free[CROARING_ART_LEAF_TYPE] == - art->capacities[CROARING_ART_LEAF_TYPE] && - art->first_free[CROARING_ART_NODE4_TYPE] == - art->capacities[CROARING_ART_NODE4_TYPE] && - art->first_free[CROARING_ART_NODE16_TYPE] == - art->capacities[CROARING_ART_NODE16_TYPE] && - art->first_free[CROARING_ART_NODE48_TYPE] == - art->capacities[CROARING_ART_NODE48_TYPE] && - art->first_free[CROARING_ART_NODE256_TYPE] == - art->capacities[CROARING_ART_NODE256_TYPE]; -} - void art_init_cleared(art_t *art) { art->root = CROARING_ART_NULL_REF; memset(art->first_free, 0, sizeof(art->first_free)); @@ -1829,6 +1816,16 @@ size_t art_shrink_to_fit(art_t *art) { return art_shrink_node_arrays(art); } +bool art_is_shrunken(const art_t *art) { + for (art_typecode_t t = CROARING_ART_MIN_TYPE; t <= CROARING_ART_MAX_TYPE; + ++t) { + if (art->first_free[t] != art->capacities[t]) { + return false; + } + } + return true; +} + art_val_t *art_insert(art_t *art, const art_key_chunk_t *key, art_val_t val) { art_ref_t leaf = art_leaf_create(art, key, val); if (art->root == CROARING_ART_NULL_REF) { diff --git a/src/roaring64.c b/src/roaring64.c index be0ef27d3..bc65e8b0e 100644 --- a/src/roaring64.c +++ b/src/roaring64.c @@ -964,7 +964,7 @@ static void move_to_shrink(roaring64_bitmap_t *r, leaf_t *leaf) { } static inline bool is_shrunken(const roaring64_bitmap_t *r) { - return r->first_free == r->capacity; + return art_is_shrunken(&r->art) && r->first_free == r->capacity; } size_t roaring64_bitmap_shrink_to_fit(roaring64_bitmap_t *r) {