| // Copyright (c) 2018 Kenton Varda and contributors |
| // Licensed under the MIT License: |
| // |
| // Permission is hereby granted, free of charge, to any person obtaining a copy |
| // of this software and associated documentation files (the "Software"), to deal |
| // in the Software without restriction, including without limitation the rights |
| // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| // copies of the Software, and to permit persons to whom the Software is |
| // furnished to do so, subject to the following conditions: |
| // |
| // The above copyright notice and this permission notice shall be included in |
| // all copies or substantial portions of the Software. |
| // |
| // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| // THE SOFTWARE. |
| |
| #include "table.h" |
| #include "debug.h" |
| #include <stdlib.h> |
| |
| #if KJ_DEBUG_TABLE_IMPL |
| #undef KJ_DASSERT |
| #define KJ_DASSERT KJ_ASSERT |
| #endif |
| |
| namespace kj { |
| namespace _ { |
| |
| static inline uint lg(uint value) { |
| // Compute floor(log2(value)). |
| // |
| // Undefined for value = 0. |
| #if _MSC_VER && !defined(__clang__) |
| unsigned long i; |
| auto found = _BitScanReverse(&i, value); |
| KJ_DASSERT(found); // !found means value = 0 |
| return i; |
| #else |
| return sizeof(uint) * 8 - 1 - __builtin_clz(value); |
| #endif |
| } |
| |
| void throwDuplicateTableRow() { |
| KJ_FAIL_REQUIRE("inserted row already exists in table"); |
| } |
| |
| void logHashTableInconsistency() { |
| KJ_LOG(ERROR, |
| "HashIndex detected hash table inconsistency. This can happen if you create a kj::Table " |
| "with a hash index and you modify the rows in the table post-indexing in a way that would " |
| "change their hash. This is a serious bug which will lead to undefined behavior." |
| "\nstack: ", kj::getStackTrace()); |
| } |
| |
| // List of primes where each element is roughly double the previous. Obtained |
| // from: |
| // http://planetmath.org/goodhashtableprimes |
| // Primes < 53 were added to ensure that small tables don't allocate excessive memory. |
| static const size_t PRIMES[] = { |
| 1, // 2^ 0 = 1 |
| 3, // 2^ 1 = 2 |
| 5, // 2^ 2 = 4 |
| 11, // 2^ 3 = 8 |
| 23, // 2^ 4 = 16 |
| 53, // 2^ 5 = 32 |
| 97, // 2^ 6 = 64 |
| 193, // 2^ 7 = 128 |
| 389, // 2^ 8 = 256 |
| 769, // 2^ 9 = 512 |
| 1543, // 2^10 = 1024 |
| 3079, // 2^11 = 2048 |
| 6151, // 2^12 = 4096 |
| 12289, // 2^13 = 8192 |
| 24593, // 2^14 = 16384 |
| 49157, // 2^15 = 32768 |
| 98317, // 2^16 = 65536 |
| 196613, // 2^17 = 131072 |
| 393241, // 2^18 = 262144 |
| 786433, // 2^19 = 524288 |
| 1572869, // 2^20 = 1048576 |
| 3145739, // 2^21 = 2097152 |
| 6291469, // 2^22 = 4194304 |
| 12582917, // 2^23 = 8388608 |
| 25165843, // 2^24 = 16777216 |
| 50331653, // 2^25 = 33554432 |
| 100663319, // 2^26 = 67108864 |
| 201326611, // 2^27 = 134217728 |
| 402653189, // 2^28 = 268435456 |
| 805306457, // 2^29 = 536870912 |
| 1610612741, // 2^30 = 1073741824 |
| }; |
| |
| uint chooseBucket(uint hash, uint count) { |
| // Integer modulus is really, really slow. It turns out that the compiler can generate much |
| // faster code if the denominator is a constant. Since we have a fixed set of possible |
| // denominators, a big old switch() statement is a win. |
| |
| // TODO(perf): Consider using power-of-two bucket sizes. We can safely do so as long as we demand |
| // high-quality hash functions -- kj::hashCode() needs good diffusion even for integers, can't |
| // just be a cast. Also be sure to implement Robin Hood hashing to avoid extremely bad negative |
| // lookup time when elements have sequential hashes (otherwise, it could be necessary to scan |
| // the entire list to determine that an element isn't present). |
| |
| switch (count) { |
| #define HANDLE(i) case i##u: return hash % i##u |
| HANDLE( 1); |
| HANDLE( 3); |
| HANDLE( 5); |
| HANDLE( 11); |
| HANDLE( 23); |
| HANDLE( 53); |
| HANDLE( 97); |
| HANDLE( 193); |
| HANDLE( 389); |
| HANDLE( 769); |
| HANDLE( 1543); |
| HANDLE( 3079); |
| HANDLE( 6151); |
| HANDLE( 12289); |
| HANDLE( 24593); |
| HANDLE( 49157); |
| HANDLE( 98317); |
| HANDLE( 196613); |
| HANDLE( 393241); |
| HANDLE( 786433); |
| HANDLE( 1572869); |
| HANDLE( 3145739); |
| HANDLE( 6291469); |
| HANDLE( 12582917); |
| HANDLE( 25165843); |
| HANDLE( 50331653); |
| HANDLE( 100663319); |
| HANDLE( 201326611); |
| HANDLE( 402653189); |
| HANDLE( 805306457); |
| HANDLE(1610612741); |
| #undef HANDLE |
| default: return hash % count; |
| } |
| } |
| |
| size_t chooseHashTableSize(uint size) { |
| if (size == 0) return 0; |
| |
| // Add 1 to compensate for the floor() above, then look up the best prime bucket size for that |
| // target size. |
| return PRIMES[lg(size) + 1]; |
| } |
| |
| kj::Array<HashBucket> rehash(kj::ArrayPtr<const HashBucket> oldBuckets, size_t targetSize) { |
| // Rehash the whole table. |
| |
| KJ_REQUIRE(targetSize < (1 << 30), "hash table has reached maximum size"); |
| |
| size_t size = chooseHashTableSize(targetSize); |
| |
| if (size < oldBuckets.size()) { |
| size = oldBuckets.size(); |
| } |
| |
| auto newBuckets = kj::heapArray<HashBucket>(size); |
| memset(newBuckets.begin(), 0, sizeof(HashBucket) * size); |
| |
| uint entryCount = 0; |
| uint collisionCount = 0; |
| |
| for (auto& oldBucket: oldBuckets) { |
| if (oldBucket.isOccupied()) { |
| ++entryCount; |
| for (uint i = oldBucket.hash % newBuckets.size();; i = probeHash(newBuckets, i)) { |
| auto& newBucket = newBuckets[i]; |
| if (newBucket.isEmpty()) { |
| newBucket = oldBucket; |
| break; |
| } |
| ++collisionCount; |
| } |
| } |
| } |
| |
| if (collisionCount > 16 + entryCount * 4) { |
| static bool warned = false; |
| if (!warned) { |
| KJ_LOG(WARNING, "detected excessive collisions in hash table; is your hash function OK?", |
| entryCount, collisionCount, kj::getStackTrace()); |
| warned = true; |
| } |
| } |
| |
| return newBuckets; |
| } |
| |
| // ======================================================================================= |
| // BTree |
| |
| #if _WIN32 |
| #define aligned_free _aligned_free |
| #else |
| #define aligned_free ::free |
| #endif |
| |
| BTreeImpl::BTreeImpl() |
| : tree(const_cast<NodeUnion*>(&EMPTY_NODE)), |
| treeCapacity(1), |
| height(0), |
| freelistHead(1), |
| freelistSize(0), |
| beginLeaf(0), |
| endLeaf(0) {} |
| |
| BTreeImpl::~BTreeImpl() noexcept(false) { |
| if (tree != &EMPTY_NODE) { |
| aligned_free(tree); |
| } |
| } |
| |
| BTreeImpl::BTreeImpl(BTreeImpl&& other) |
| : BTreeImpl() { |
| *this = kj::mv(other); |
| } |
| |
| BTreeImpl& BTreeImpl::operator=(BTreeImpl&& other) { |
| KJ_DASSERT(&other != this); |
| |
| if (tree != &EMPTY_NODE) { |
| aligned_free(tree); |
| } |
| tree = other.tree; |
| treeCapacity = other.treeCapacity; |
| height = other.height; |
| freelistHead = other.freelistHead; |
| freelistSize = other.freelistSize; |
| beginLeaf = other.beginLeaf; |
| endLeaf = other.endLeaf; |
| |
| other.tree = const_cast<NodeUnion*>(&EMPTY_NODE); |
| other.treeCapacity = 1; |
| other.height = 0; |
| other.freelistHead = 1; |
| other.freelistSize = 0; |
| other.beginLeaf = 0; |
| other.endLeaf = 0; |
| |
| return *this; |
| } |
| |
| const BTreeImpl::NodeUnion BTreeImpl::EMPTY_NODE = {{{0, {0}}}}; |
| |
| void BTreeImpl::verify(size_t size, FunctionParam<bool(uint, uint)> f) { |
| KJ_ASSERT(verifyNode(size, f, 0, height, nullptr) == size); |
| } |
| size_t BTreeImpl::verifyNode(size_t size, FunctionParam<bool(uint, uint)>& f, |
| uint pos, uint height, MaybeUint maxRow) { |
| if (height > 0) { |
| auto& parent = tree[pos].parent; |
| |
| auto n = parent.keyCount(); |
| size_t total = 0; |
| for (auto i: kj::zeroTo(n)) { |
| KJ_ASSERT(*parent.keys[i] < size, n, i); |
| total += verifyNode(size, f, parent.children[i], height - 1, parent.keys[i]); |
| if (i > 0) { |
| KJ_ASSERT(f(*parent.keys[i - 1], *parent.keys[i]), |
| n, i, parent.keys[i - 1], parent.keys[i]); |
| } |
| } |
| total += verifyNode(size, f, parent.children[n], height - 1, maxRow); |
| if (maxRow != nullptr) { |
| KJ_ASSERT(f(*parent.keys[n-1], *maxRow), n, parent.keys[n-1], maxRow); |
| } |
| return total; |
| } else { |
| auto& leaf = tree[pos].leaf; |
| auto n = leaf.size(); |
| for (auto i: kj::zeroTo(n)) { |
| KJ_ASSERT(*leaf.rows[i] < size, n, i); |
| if (i > 0) { |
| KJ_ASSERT(f(*leaf.rows[i - 1], *leaf.rows[i]), |
| n, i, leaf.rows[i - 1], leaf.rows[i]); |
| } |
| } |
| if (maxRow != nullptr) { |
| KJ_ASSERT(leaf.rows[n-1] == maxRow, n); |
| } |
| return n; |
| } |
| } |
| |
| kj::String BTreeImpl::MaybeUint::toString() const { |
| return i == 0 ? kj::str("(null)") : kj::str(i - 1); |
| } |
| |
| void BTreeImpl::logInconsistency() const { |
| KJ_LOG(ERROR, |
| "BTreeIndex detected tree state inconsistency. This can happen if you create a kj::Table " |
| "with a b-tree index and you modify the rows in the table post-indexing in a way that would " |
| "change their ordering. This is a serious bug which will lead to undefined behavior." |
| "\nstack: ", kj::getStackTrace()); |
| } |
| |
| void BTreeImpl::reserve(size_t size) { |
| KJ_REQUIRE(size < (1u << 31), "b-tree has reached maximum size"); |
| |
| // Calculate the worst-case number of leaves to cover the size, given that a leaf is always at |
| // least half-full. (Note that it's correct for this calculation to round down, not up: The |
| // remainder will necessarily be distributed among the non-full leaves, rather than creating a |
| // new leaf, because if it went into a new leaf, that leaf would be less than half-full.) |
| uint leaves = size / (Leaf::NROWS / 2); |
| |
| // Calculate the worst-case number of parents to cover the leaves, given that a parent is always |
| // at least half-full. Since the parents form a tree with branching factor B, the size of the |
| // tree is N/B + N/B^2 + N/B^3 + N/B^4 + ... = N / (B - 1). Math. |
| constexpr uint branchingFactor = Parent::NCHILDREN / 2; |
| uint parents = leaves / (branchingFactor - 1); |
| |
| // Height is log-base-branching-factor of leaves, plus 1 for the root node. |
| uint height = lg(leaves | 1) / lg(branchingFactor) + 1; |
| |
| size_t newSize = leaves + |
| parents + 1 + // + 1 for the root |
| height + 2; // minimum freelist size needed by insert() |
| |
| if (treeCapacity < newSize) { |
| growTree(newSize); |
| } |
| } |
| |
| void BTreeImpl::clear() { |
| if (tree != &EMPTY_NODE) { |
| azero(tree, treeCapacity); |
| height = 0; |
| freelistHead = 1; |
| freelistSize = treeCapacity - 1; // subtract one to account for the root node |
| beginLeaf = 0; |
| endLeaf = 0; |
| } |
| } |
| |
| void BTreeImpl::growTree(uint minCapacity) { |
| uint newCapacity = kj::max(kj::max(minCapacity, treeCapacity * 2), 4); |
| freelistSize += newCapacity - treeCapacity; |
| |
| // Allocate some aligned memory! In theory this should be as simple as calling the C11 standard |
| // aligned_alloc() function. Unfortunately, many platforms don't implement it. Luckily, there |
| // are usually alternatives. |
| |
| #if _WIN32 |
| // Windows lacks aligned_alloc() but has its own _aligned_malloc() (which requires freeing using |
| // _aligned_free()). |
| // WATCH OUT: The argument order for _aligned_malloc() is opposite of aligned_alloc()! |
| NodeUnion* newTree = reinterpret_cast<NodeUnion*>( |
| _aligned_malloc(newCapacity * sizeof(BTreeImpl::NodeUnion), sizeof(BTreeImpl::NodeUnion))); |
| KJ_ASSERT(newTree != nullptr, "memory allocation failed", newCapacity); |
| #else |
| // macOS, OpenBSD, and Android lack aligned_alloc(), but have posix_memalign(). Fine. |
| void* allocPtr; |
| int error = posix_memalign(&allocPtr, |
| sizeof(BTreeImpl::NodeUnion), newCapacity * sizeof(BTreeImpl::NodeUnion)); |
| if (error != 0) { |
| KJ_FAIL_SYSCALL("posix_memalign", error); |
| } |
| NodeUnion* newTree = reinterpret_cast<NodeUnion*>(allocPtr); |
| #endif |
| |
| // Note: C11 introduces aligned_alloc() as a standard, but it's still missing on many platforms, |
| // so we don't use it. But if you wanted to use it, you'd do this: |
| // NodeUnion* newTree = reinterpret_cast<NodeUnion*>( |
| // aligned_alloc(sizeof(BTreeImpl::NodeUnion), newCapacity * sizeof(BTreeImpl::NodeUnion))); |
| // KJ_ASSERT(newTree != nullptr, "memory allocation failed", newCapacity); |
| |
| acopy(newTree, tree, treeCapacity); |
| azero(newTree + treeCapacity, newCapacity - treeCapacity); |
| if (tree != &EMPTY_NODE) aligned_free(tree); |
| tree = newTree; |
| treeCapacity = newCapacity; |
| } |
| |
| BTreeImpl::Iterator BTreeImpl::search(const SearchKey& searchKey) const { |
| // Find the "first" row number (in sorted order) for which searchKey.isAfter(rowNumber) returns |
| // false. |
| |
| uint pos = 0; |
| |
| for (auto i KJ_UNUSED: zeroTo(height)) { |
| auto& parent = tree[pos].parent; |
| pos = parent.children[searchKey.search(parent)]; |
| } |
| |
| auto& leaf = tree[pos].leaf; |
| return { tree, &leaf, searchKey.search(leaf) }; |
| } |
| |
| template <typename T> |
| struct BTreeImpl::AllocResult { |
| uint index; |
| T& node; |
| }; |
| |
| template <typename T> |
| inline BTreeImpl::AllocResult<T> BTreeImpl::alloc() { |
| // Allocate a new item from the freelist. Guaranteed to be zero'd except for the first member. |
| uint i = freelistHead; |
| NodeUnion* ptr = &tree[i]; |
| freelistHead = i + 1 + ptr->freelist.nextOffset; |
| --freelistSize; |
| return { i, *ptr }; |
| } |
| |
| inline void BTreeImpl::free(uint pos) { |
| // Add the given node to the freelist. |
| |
| // HACK: This is typically called on a node immediately after copying its contents away, but the |
| // pointer used to copy it away may be a different pointer pointing to a different union member |
| // which the compiler may not recgonize as aliasing with this object. Just to be extra-safe, |
| // insert a compiler barrier. |
| compilerBarrier(); |
| |
| auto& node = tree[pos]; |
| node.freelist.nextOffset = freelistHead - pos - 1; |
| azero(node.freelist.zero, kj::size(node.freelist.zero)); |
| freelistHead = pos; |
| ++freelistSize; |
| } |
| |
| BTreeImpl::Iterator BTreeImpl::insert(const SearchKey& searchKey) { |
| // Like search() but ensures that there is room in the leaf node to insert a new row. |
| |
| // If we split the root node it will generate two new nodes. If we split any other node in the |
| // path it will generate one new node. `height` doesn't count leaf nodes, but we can equivalently |
| // think of it as not counting the root node, so in the worst case we may allocate height + 2 |
| // new nodes. |
| // |
| // (Also note that if the tree is currently empty, then `tree` points to a dummy root node in |
| // read-only memory. We definitely need to allocate a real tree node array in this case, and |
| // we'll start out allocating space for four nodes, which will be all we need up to 28 rows.) |
| if (freelistSize < height + 2) { |
| if (height > 0 && !tree[0].parent.isFull() && freelistSize >= height) { |
| // Slight optimization: The root node is not full, so we're definitely not going to split it. |
| // That means that the maximum allocations we might do is equal to `height`, not |
| // `height + 2`, and we have that much space, so no need to grow yet. |
| // |
| // This optimization is particularly important for small trees, e.g. when treeCapacity is 4 |
| // and the tree so far consists of a root and two children, we definitely don't need to grow |
| // the tree yet. |
| } else { |
| growTree(); |
| |
| if (freelistHead == 0) { |
| // We have no root yet. Allocate one. |
| KJ_ASSERT(alloc<Parent>().index == 0); |
| } |
| } |
| } |
| |
| uint pos = 0; |
| |
| // Track grandparent node and child index within grandparent. |
| Parent* parent = nullptr; |
| uint indexInParent = 0; |
| |
| for (auto i KJ_UNUSED: zeroTo(height)) { |
| Parent& node = insertHelper(searchKey, tree[pos].parent, parent, indexInParent, pos); |
| |
| parent = &node; |
| indexInParent = searchKey.search(node); |
| pos = node.children[indexInParent]; |
| } |
| |
| Leaf& leaf = insertHelper(searchKey, tree[pos].leaf, parent, indexInParent, pos); |
| |
| // Fun fact: Unlike erase(), there's no need to climb back up the tree modifying keys, because |
| // either the newly-inserted node will not be the last in the leaf (and thus parent keys aren't |
| // modified), or the leaf is the last leaf in the tree (and thus there's no parent key to |
| // modify). |
| |
| return { tree, &leaf, searchKey.search(leaf) }; |
| } |
| |
| template <typename Node> |
| Node& BTreeImpl::insertHelper(const SearchKey& searchKey, |
| Node& node, Parent* parent, uint indexInParent, uint pos) { |
| if (node.isFull()) { |
| // This node is full. Need to split. |
| if (parent == nullptr) { |
| // This is the root node. We need to split into two nodes and create a new root. |
| auto n1 = alloc<Node>(); |
| auto n2 = alloc<Node>(); |
| |
| uint pivot = split(n2.node, n2.index, node, pos); |
| move(n1.node, n1.index, node); |
| |
| // Rewrite root to have the two children. |
| tree[0].parent.initRoot(pivot, n1.index, n2.index); |
| |
| // Increased height. |
| ++height; |
| |
| // Decide which new branch has our search key. |
| if (searchKey.isAfter(pivot)) { |
| // the right one |
| return n2.node; |
| } else { |
| // the left one |
| return n1.node; |
| } |
| } else { |
| // This is a non-root parent node. We need to split it into two and insert the new node |
| // into the grandparent. |
| auto n = alloc<Node>(); |
| uint pivot = split(n.node, n.index, node, pos); |
| |
| // Insert new child into grandparent. |
| parent->insertAfter(indexInParent, pivot, n.index); |
| |
| // Decide which new branch has our search key. |
| if (searchKey.isAfter(pivot)) { |
| // the new one, which is right of the original |
| return n.node; |
| } else { |
| // the original one, which is left of the new one |
| return node; |
| } |
| } |
| } else { |
| // No split needed. |
| return node; |
| } |
| } |
| |
| void BTreeImpl::erase(uint row, const SearchKey& searchKey) { |
| // Erase the given row number from the tree. predicate() returns true for the given row and all |
| // rows after it. |
| |
| uint pos = 0; |
| |
| // Track grandparent node and child index within grandparent. |
| Parent* parent = nullptr; |
| uint indexInParent = 0; |
| |
| MaybeUint* fixup = nullptr; |
| |
| for (auto i KJ_UNUSED: zeroTo(height)) { |
| Parent& node = eraseHelper(tree[pos].parent, parent, indexInParent, pos, fixup); |
| |
| parent = &node; |
| indexInParent = searchKey.search(node); |
| pos = node.children[indexInParent]; |
| |
| if (indexInParent < kj::size(node.keys) && node.keys[indexInParent] == row) { |
| // Oh look, the row is a key in this node! We'll need to come back and fix this up later. |
| // Note that any particular row can only appear as *one* key value anywhere in the tree, so |
| // we only need one fixup pointer, which is nice. |
| MaybeUint* newFixup = &node.keys[indexInParent]; |
| if (fixup == newFixup) { |
| // The fixup pointer was already set while processing a parent node, and then a merge or |
| // rotate caused it to be moved, but the fixup pointer was updated... so it's already set |
| // to point at the slot we wanted it to point to, so nothing to see here. |
| } else { |
| KJ_DASSERT(fixup == nullptr); |
| fixup = newFixup; |
| } |
| } |
| } |
| |
| Leaf& leaf = eraseHelper(tree[pos].leaf, parent, indexInParent, pos, fixup); |
| |
| uint r = searchKey.search(leaf); |
| if (leaf.rows[r] == row) { |
| leaf.erase(r); |
| |
| if (fixup != nullptr) { |
| // There's a key in a parent node that needs fixup. This is only possible if the removed |
| // node is the last in its leaf. |
| KJ_DASSERT(leaf.rows[r] == nullptr); |
| KJ_DASSERT(r > 0); // non-root nodes must be at least half full so this can't be item 0 |
| KJ_DASSERT(*fixup == row); |
| *fixup = leaf.rows[r - 1]; |
| } |
| } else { |
| logInconsistency(); |
| } |
| } |
| |
| template <typename Node> |
| Node& BTreeImpl::eraseHelper( |
| Node& node, Parent* parent, uint indexInParent, uint pos, MaybeUint*& fixup) { |
| if (parent != nullptr && !node.isMostlyFull()) { |
| // This is not the root, but it's only half-full. Rebalance. |
| KJ_DASSERT(node.isHalfFull()); |
| |
| if (indexInParent > 0) { |
| // There's a sibling to the left. |
| uint sibPos = parent->children[indexInParent - 1]; |
| Node& sib = tree[sibPos]; |
| if (sib.isMostlyFull()) { |
| // Left sibling is more than half full. Steal one member. |
| rotateRight(sib, node, *parent, indexInParent - 1); |
| return node; |
| } else { |
| // Left sibling is half full, too. Merge. |
| KJ_ASSERT(sib.isHalfFull()); |
| merge(sib, sibPos, *parent->keys[indexInParent - 1], node); |
| parent->eraseAfter(indexInParent - 1); |
| free(pos); |
| if (fixup == &parent->keys[indexInParent]) --fixup; |
| |
| if (parent->keys[0] == nullptr) { |
| // Oh hah, the parent has no keys left. It must be the root. We can eliminate it. |
| KJ_DASSERT(parent == &tree->parent); |
| compilerBarrier(); // don't reorder any writes to parent below here |
| move(tree[0], 0, sib); |
| free(sibPos); |
| --height; |
| return tree[0]; |
| } else { |
| return sib; |
| } |
| } |
| } else if (indexInParent < Parent::NKEYS && parent->keys[indexInParent] != nullptr) { |
| // There's a sibling to the right. |
| uint sibPos = parent->children[indexInParent + 1]; |
| Node& sib = tree[sibPos]; |
| if (sib.isMostlyFull()) { |
| // Right sibling is more than half full. Steal one member. |
| rotateLeft(node, sib, *parent, indexInParent, fixup); |
| return node; |
| } else { |
| // Right sibling is half full, too. Merge. |
| KJ_ASSERT(sib.isHalfFull()); |
| merge(node, pos, *parent->keys[indexInParent], sib); |
| parent->eraseAfter(indexInParent); |
| free(sibPos); |
| if (fixup == &parent->keys[indexInParent]) fixup = nullptr; |
| |
| if (parent->keys[0] == nullptr) { |
| // Oh hah, the parent has no keys left. It must be the root. We can eliminate it. |
| KJ_DASSERT(parent == &tree->parent); |
| compilerBarrier(); // don't reorder any writes to parent below here |
| move(tree[0], 0, node); |
| free(pos); |
| --height; |
| return tree[0]; |
| } else { |
| return node; |
| } |
| } |
| } else { |
| KJ_FAIL_ASSERT("inconsistent b-tree"); |
| } |
| } |
| |
| return node; |
| } |
| |
| void BTreeImpl::renumber(uint oldRow, uint newRow, const SearchKey& searchKey) { |
| // Renumber the given row from oldRow to newRow. predicate() returns true for oldRow and all |
| // rows after it. (It will not be called on newRow.) |
| |
| uint pos = 0; |
| |
| for (auto i KJ_UNUSED: zeroTo(height)) { |
| auto& node = tree[pos].parent; |
| uint indexInParent = searchKey.search(node); |
| pos = node.children[indexInParent]; |
| if (indexInParent < kj::size(node.keys) && node.keys[indexInParent] == oldRow) { |
| node.keys[indexInParent] = newRow; |
| } |
| KJ_DASSERT(pos != 0); |
| } |
| |
| auto& leaf = tree[pos].leaf; |
| uint r = searchKey.search(leaf); |
| if (leaf.rows[r] == oldRow) { |
| leaf.rows[r] = newRow; |
| } else { |
| logInconsistency(); |
| } |
| } |
| |
| uint BTreeImpl::split(Parent& dst, uint dstPos, Parent& src, uint srcPos) { |
| constexpr size_t mid = Parent::NKEYS / 2; |
| uint pivot = *src.keys[mid]; |
| acopy(dst.keys, src.keys + mid + 1, Parent::NKEYS - mid - 1); |
| azero(src.keys + mid, Parent::NKEYS - mid); |
| acopy(dst.children, src.children + mid + 1, Parent::NCHILDREN - mid - 1); |
| azero(src.children + mid + 1, Parent::NCHILDREN - mid - 1); |
| return pivot; |
| } |
| |
| uint BTreeImpl::split(Leaf& dst, uint dstPos, Leaf& src, uint srcPos) { |
| constexpr size_t mid = Leaf::NROWS / 2; |
| uint pivot = *src.rows[mid - 1]; |
| acopy(dst.rows, src.rows + mid, Leaf::NROWS - mid); |
| azero(src.rows + mid, Leaf::NROWS - mid); |
| |
| if (src.next == 0) { |
| endLeaf = dstPos; |
| } else { |
| tree[src.next].leaf.prev = dstPos; |
| } |
| dst.next = src.next; |
| dst.prev = srcPos; |
| src.next = dstPos; |
| |
| return pivot; |
| } |
| |
| void BTreeImpl::merge(Parent& dst, uint dstPos, uint pivot, Parent& src) { |
| // merge() is only legal if both nodes are half-empty. Meanwhile, B-tree invariants |
| // guarantee that the node can't be more than half-empty, or we would have merged it sooner. |
| // (The root can be more than half-empty, but it is never merged with anything.) |
| KJ_DASSERT(src.isHalfFull()); |
| KJ_DASSERT(dst.isHalfFull()); |
| |
| constexpr size_t mid = Parent::NKEYS/2; |
| dst.keys[mid] = pivot; |
| acopy(dst.keys + mid + 1, src.keys, mid); |
| acopy(dst.children + mid + 1, src.children, mid + 1); |
| } |
| |
| void BTreeImpl::merge(Leaf& dst, uint dstPos, uint pivot, Leaf& src) { |
| // merge() is only legal if both nodes are half-empty. Meanwhile, B-tree invariants |
| // guarantee that the node can't be more than half-empty, or we would have merged it sooner. |
| // (The root can be more than half-empty, but it is never merged with anything.) |
| KJ_DASSERT(src.isHalfFull()); |
| KJ_DASSERT(dst.isHalfFull()); |
| |
| constexpr size_t mid = Leaf::NROWS/2; |
| KJ_DASSERT(dst.rows[mid-1] == pivot); |
| acopy(dst.rows + mid, src.rows, mid); |
| |
| dst.next = src.next; |
| if (dst.next == 0) { |
| endLeaf = dstPos; |
| } else { |
| tree[dst.next].leaf.prev = dstPos; |
| } |
| } |
| |
| void BTreeImpl::move(Parent& dst, uint dstPos, Parent& src) { |
| dst = src; |
| } |
| |
| void BTreeImpl::move(Leaf& dst, uint dstPos, Leaf& src) { |
| dst = src; |
| if (src.next == 0) { |
| endLeaf = dstPos; |
| } else { |
| tree[src.next].leaf.prev = dstPos; |
| } |
| if (src.prev == 0) { |
| beginLeaf = dstPos; |
| } else { |
| tree[src.prev].leaf.next = dstPos; |
| } |
| } |
| |
| void BTreeImpl::rotateLeft( |
| Parent& left, Parent& right, Parent& parent, uint indexInParent, MaybeUint*& fixup) { |
| // Steal one item from the right node and move it to the left node. |
| |
| // Like merge(), this is only called on an exactly-half-empty node. |
| KJ_DASSERT(left.isHalfFull()); |
| KJ_DASSERT(right.isMostlyFull()); |
| |
| constexpr size_t mid = Parent::NKEYS/2; |
| left.keys[mid] = parent.keys[indexInParent]; |
| if (fixup == &parent.keys[indexInParent]) fixup = &left.keys[mid]; |
| parent.keys[indexInParent] = right.keys[0]; |
| left.children[mid + 1] = right.children[0]; |
| amove(right.keys, right.keys + 1, Parent::NKEYS - 1); |
| right.keys[Parent::NKEYS - 1] = nullptr; |
| amove(right.children, right.children + 1, Parent::NCHILDREN - 1); |
| right.children[Parent::NCHILDREN - 1] = 0; |
| } |
| |
| void BTreeImpl::rotateLeft( |
| Leaf& left, Leaf& right, Parent& parent, uint indexInParent, MaybeUint*& fixup) { |
| // Steal one item from the right node and move it to the left node. |
| |
| // Like merge(), this is only called on an exactly-half-empty node. |
| KJ_DASSERT(left.isHalfFull()); |
| KJ_DASSERT(right.isMostlyFull()); |
| |
| constexpr size_t mid = Leaf::NROWS/2; |
| parent.keys[indexInParent] = left.rows[mid] = right.rows[0]; |
| if (fixup == &parent.keys[indexInParent]) fixup = nullptr; |
| amove(right.rows, right.rows + 1, Leaf::NROWS - 1); |
| right.rows[Leaf::NROWS - 1] = nullptr; |
| } |
| |
| void BTreeImpl::rotateRight(Parent& left, Parent& right, Parent& parent, uint indexInParent) { |
| // Steal one item from the left node and move it to the right node. |
| |
| // Like merge(), this is only called on an exactly-half-empty node. |
| KJ_DASSERT(right.isHalfFull()); |
| KJ_DASSERT(left.isMostlyFull()); |
| |
| constexpr size_t mid = Parent::NKEYS/2; |
| amove(right.keys + 1, right.keys, mid); |
| amove(right.children + 1, right.children, mid + 1); |
| |
| uint back = left.keyCount() - 1; |
| |
| right.keys[0] = parent.keys[indexInParent]; |
| parent.keys[indexInParent] = left.keys[back]; |
| right.children[0] = left.children[back + 1]; |
| left.keys[back] = nullptr; |
| left.children[back + 1] = 0; |
| } |
| |
| void BTreeImpl::rotateRight(Leaf& left, Leaf& right, Parent& parent, uint indexInParent) { |
| // Steal one item from the left node and move it to the right node. |
| |
| // Like mergeFrom(), this is only called on an exactly-half-empty node. |
| KJ_DASSERT(right.isHalfFull()); |
| KJ_DASSERT(left.isMostlyFull()); |
| |
| constexpr size_t mid = Leaf::NROWS/2; |
| amove(right.rows + 1, right.rows, mid); |
| |
| uint back = left.size() - 1; |
| |
| right.rows[0] = left.rows[back]; |
| parent.keys[indexInParent] = left.rows[back - 1]; |
| left.rows[back] = nullptr; |
| } |
| |
| void BTreeImpl::Parent::initRoot(uint key, uint leftChild, uint rightChild) { |
| // HACK: This is typically called on the root node immediately after copying its contents away, |
| // but the pointer used to copy it away may be a different pointer pointing to a different |
| // union member which the compiler may not recgonize as aliasing with this object. Just to |
| // be extra-safe, insert a compiler barrier. |
| compilerBarrier(); |
| |
| keys[0] = key; |
| children[0] = leftChild; |
| children[1] = rightChild; |
| azero(keys + 1, Parent::NKEYS - 1); |
| azero(children + 2, Parent::NCHILDREN - 2); |
| } |
| |
| void BTreeImpl::Parent::insertAfter(uint i, uint splitKey, uint child) { |
| KJ_IREQUIRE(children[Parent::NCHILDREN - 1] == 0); // check not full |
| |
| amove(keys + i + 1, keys + i, Parent::NKEYS - (i + 1)); |
| keys[i] = splitKey; |
| |
| amove(children + i + 2, children + i + 1, Parent::NCHILDREN - (i + 2)); |
| children[i + 1] = child; |
| } |
| |
| void BTreeImpl::Parent::eraseAfter(uint i) { |
| amove(keys + i, keys + i + 1, Parent::NKEYS - (i + 1)); |
| keys[Parent::NKEYS - 1] = nullptr; |
| amove(children + i + 1, children + i + 2, Parent::NCHILDREN - (i + 2)); |
| children[Parent::NCHILDREN - 1] = 0; |
| } |
| |
| } // namespace _ |
| |
| // ======================================================================================= |
| // Insertion order |
| |
| const InsertionOrderIndex::Link InsertionOrderIndex::EMPTY_LINK = { 0, 0 }; |
| |
| InsertionOrderIndex::InsertionOrderIndex(): capacity(0), links(const_cast<Link*>(&EMPTY_LINK)) {} |
| InsertionOrderIndex::InsertionOrderIndex(InsertionOrderIndex&& other) |
| : capacity(other.capacity), links(other.links) { |
| other.capacity = 0; |
| other.links = const_cast<Link*>(&EMPTY_LINK); |
| } |
| InsertionOrderIndex& InsertionOrderIndex::operator=(InsertionOrderIndex&& other) { |
| KJ_DASSERT(&other != this); |
| capacity = other.capacity; |
| links = other.links; |
| other.capacity = 0; |
| other.links = const_cast<Link*>(&EMPTY_LINK); |
| return *this; |
| } |
| InsertionOrderIndex::~InsertionOrderIndex() noexcept(false) { |
| if (links != &EMPTY_LINK) delete[] links; |
| } |
| |
| void InsertionOrderIndex::reserve(size_t size) { |
| KJ_ASSERT(size < (1u << 31), "Table too big for InsertionOrderIndex"); |
| |
| if (size > capacity) { |
| // Need to grow. |
| // Note that `size` and `capacity` do not include the special link[0]. |
| |
| // Round up to the next power of 2. |
| size_t allocation = 1u << (_::lg(size) + 1); |
| KJ_DASSERT(allocation > size); |
| KJ_DASSERT(allocation <= size * 2); |
| |
| // Round first allocation up to 8. |
| allocation = kj::max(allocation, 8); |
| |
| Link* newLinks = new Link[allocation]; |
| #ifdef KJ_DEBUG |
| // To catch bugs, fill unused links with 0xff. |
| memset(newLinks, 0xff, allocation * sizeof(Link)); |
| #endif |
| _::acopy(newLinks, links, capacity + 1); |
| if (links != &EMPTY_LINK) delete[] links; |
| links = newLinks; |
| capacity = allocation - 1; |
| } |
| } |
| |
| void InsertionOrderIndex::clear() { |
| links[0] = Link { 0, 0 }; |
| |
| #ifdef KJ_DEBUG |
| // To catch bugs, fill unused links with 0xff. |
| memset(links + 1, 0xff, capacity * sizeof(Link)); |
| #endif |
| } |
| |
| kj::Maybe<size_t> InsertionOrderIndex::insertImpl(size_t pos) { |
| if (pos >= capacity) { |
| reserve(pos + 1); |
| } |
| |
| links[pos + 1].prev = links[0].prev; |
| links[pos + 1].next = 0; |
| links[links[0].prev].next = pos + 1; |
| links[0].prev = pos + 1; |
| |
| return nullptr; |
| } |
| |
| void InsertionOrderIndex::eraseImpl(size_t pos) { |
| Link& link = links[pos + 1]; |
| links[link.next].prev = link.prev; |
| links[link.prev].next = link.next; |
| |
| #ifdef KJ_DEBUG |
| memset(&link, 0xff, sizeof(Link)); |
| #endif |
| } |
| |
| void InsertionOrderIndex::moveImpl(size_t oldPos, size_t newPos) { |
| Link& link = links[oldPos + 1]; |
| Link& newLink = links[newPos + 1]; |
| |
| newLink = link; |
| |
| KJ_DASSERT(links[link.next].prev == oldPos + 1); |
| KJ_DASSERT(links[link.prev].next == oldPos + 1); |
| links[link.next].prev = newPos + 1; |
| links[link.prev].next = newPos + 1; |
| |
| #ifdef KJ_DEBUG |
| memset(&link, 0xff, sizeof(Link)); |
| #endif |
| } |
| |
| } // namespace kj |