Convert rtree from (void *) to (uint8_t) storage.
Reduce rtree memory usage by storing booleans (1 byte each) rather than
pointers. The rtree code is only used to record whether jemalloc manages
a chunk of memory, so there's no need to store pointers in the rtree.
Increase rtree node size to 64 KiB in order to reduce tree depth from 13
to 3 on 64-bit systems. The conversion to more compact leaf nodes was
enough by itself to make the rtree depth 1 on 32-bit systems; due to the
fact that root nodes are smaller than the specified node size if
possible, the node size change has no impact on 32-bit systems (assuming
default chunk size).
diff --git a/src/rtree.c b/src/rtree.c
index 4e26766..205957a 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -5,15 +5,20 @@
rtree_new(unsigned bits, rtree_alloc_t *alloc, rtree_dalloc_t *dalloc)
{
rtree_t *ret;
- unsigned bits_per_level, height, i;
+ unsigned bits_per_level, bits_in_leaf, height, i;
assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
bits_per_level = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(void *)))) - 1;
- height = bits / bits_per_level;
- if (height * bits_per_level != bits)
- height++;
- assert(height * bits_per_level >= bits);
+ bits_in_leaf = ffs(pow2_ceil((RTREE_NODESIZE / sizeof(uint8_t)))) - 1;
+ if (bits > bits_in_leaf) {
+ height = 1 + (bits - bits_in_leaf) / bits_per_level;
+ if ((height-1) * bits_per_level + bits_in_leaf != bits)
+ height++;
+ } else {
+ height = 1;
+ }
+ assert((height-1) * bits_per_level + bits_in_leaf >= bits);
ret = (rtree_t*)alloc(offsetof(rtree_t, level2bits) +
(sizeof(unsigned) * height));
@@ -25,23 +30,27 @@
ret->alloc = alloc;
ret->dalloc = dalloc;
if (malloc_mutex_init(&ret->mutex)) {
- /* Leak the rtree. */
+ if (dalloc != NULL)
+ dalloc(ret);
return (NULL);
}
ret->height = height;
- if (bits_per_level * height > bits)
- ret->level2bits[0] = bits % bits_per_level;
- else
- ret->level2bits[0] = bits_per_level;
- for (i = 1; i < height; i++)
- ret->level2bits[i] = bits_per_level;
+ if (height > 1) {
+ if ((height-1) * bits_per_level + bits_in_leaf > bits) {
+ ret->level2bits[0] = (bits - bits_in_leaf) %
+ bits_per_level;
+ } else
+ ret->level2bits[0] = bits_per_level;
+ for (i = 1; i < height-1; i++)
+ ret->level2bits[i] = bits_per_level;
+ ret->level2bits[height-1] = bits_in_leaf;
+ } else
+ ret->level2bits[0] = bits;
ret->root = (void**)alloc(sizeof(void *) << ret->level2bits[0]);
if (ret->root == NULL) {
- /*
- * We leak the rtree here, since there's no generic base
- * deallocation.
- */
+ if (dalloc != NULL)
+ dalloc(ret);
return (NULL);
}
memset(ret->root, 0, sizeof(void *) << ret->level2bits[0]);