Search code examples
c++data-structuresb-tree

B-Tree implementation on C++ has a memory leak?


I have this B-Tree implementation, which I'm testing using search() and insert(). Testing is basically this:

void function(){
    BTree b16(16);
    // do lots of inserts and searchs on b16
}
for(many_times){
    function();
}

If I understand correctly, after each iteration of function(), b16 should get destroyed. However, after ~250 iterations, I get a bad_alloc error, which means I have a memory leak.

Is there a problem with the destructors? Here's the implementation:

#include <iostream>
#include <vector>
using namespace std;
typedef unsigned int uint;

class BNode{
    private:
        uint *keys;
        int B;
        BNode **C;
        int n;
        bool leaf;

    public:
        BNode(int temp, bool bool_leaf);
        ~BNode();
        void insertNonFull(uint k);
        void splitChild(int i, BNode *y);
        void traverse();
        BNode *search(uint k);
        friend class BTree;
};

class BTree{
    private:
        BNode *root;
        int B;
    public:
        BTree(int temp);
        ~BTree();
        BNode *search(uint k);
        int search_bool(uint k);
        void insert(uint k);
};

BNode::BNode(int t1, bool leaf1) {
    B = t1;
    leaf = leaf1;
    keys = new uint[2*B - 1];
    C = new BNode *[2*B];
    n = 0;
}

BNode::~BNode(){
    delete[] keys;
    delete[] C;
}

BNode *BNode::search(uint k){
    int i = 0;
    while (i < n && k > keys[i]){
        i++;
    }
    if(keys[i] == k){
        return this;
    }
    if (leaf == true){
        return NULL;
    }
    return C[i]->search(k);
}

void BTree::insert(uint k){
    if (root == NULL) {
        root = new BNode(B, true);
        root->keys[0] = k;
        root->n = 1;
    } 
    else{
        if (root->n == 2*B - 1){
            BNode *s = new BNode(B, false);
            s->C[0] = root;
            s->splitChild(0, root);
            int i = 0;
            if (s->keys[0] < k){
                i++;
            }
            s->C[i]->insertNonFull(k);
            root = s;
        } 
        else{
            root->insertNonFull(k);
        }
    }
}

void BNode::insertNonFull(uint k) {
    int i = n - 1;
    if (leaf == true) {
        while (i>=0 && keys[i] > k) {
            keys[i+1] = keys[i];
            i--;
        }
        keys[i+1] = k;
        n = n + 1;
    } 
    else {
        while (i>=0 && keys[i] > k){
            i--;
        }
        if (C[i+1]->n == 2*B-1) {
            splitChild(i+1, C[i+1]);
            if (keys[i + 1] < k){
                i++;
            }
        }
        C[i+1]->insertNonFull(k);
    }
}

void BNode::splitChild(int i, BNode *y) {
    BNode *z = new BNode(y->B, y->leaf);
    z->n = B - 1;
    for (int j = 0; j < B - 1; j++){
        z->keys[j] = y->keys[j+B];
    }
    if (!y->leaf){
        for (int j = 0; j < B; j++)
        z->C[j] = y->C[j + B];
    }
    y->n = B - 1;
    for(int j=n; j >= i+1; j--){
        C[j+1] = C[j];
    }
    C[i + 1] = z;
    for (int j = n - 1; j >= i; j--){
        keys[j+1] = keys[j];
    }
    keys[i] = y->keys[B - 1];
    n = n + 1;
}

BTree::BTree(int temp){
    root = NULL;
    B = temp;
}

BTree::~BTree(){
    delete root;
}

BNode *BTree::search(uint k){
    return (root == NULL) ? NULL : root->search(k);
}

int BTree::search_bool(uint k){
    if(search(k) != NULL){
        return true;
    }
    else{
        return false;
    }
}

Thanks in advance.


Solution

  • So, the simple diagnosis is that

    delete[] C;
    

    deletes only the array, not the nodes contained by it. So, (a) make sure they're properlu zero-initialized (b) delete them as well:

    BNode** C    = new BNode* [2 * B] { 0 };
    
    // in the destructor:
    for (int i = 0; i < 2 * B; ++i)
        delete C[i];
    delete[] C;
    

    HOWEVER.

    This doesn't work well because nodes can be split. After you moved some nodes from one node's C to another node's C, you run into double-free. So, when you split nodes, you have to make sure you set the moved-from C elemeent to NULL again:

                z->C[j] = y->C[j + B];
                y->C[j + B] = nullptr;
    

    Now, this program runs clean under valgrind, ubsan and asan and without leaks:

    Live On Coliru

    #include <iostream>
    #include <vector>
    using uint = unsigned;
    
    class BNode {
      private:
        int     B;
        bool    leaf;
        uint*   keys = new uint[2 * B - 1]{0};
        BNode** C    = new BNode* [2 * B] { 0 };
        int     n    = 0;
    
      public:
        BNode(int t1, bool leaf1) : B(t1), leaf(leaf1) {}
    
        ~BNode()
        {
            delete[] keys;
            for (int i = 0; i < 2 * B; ++i)
                delete C[i];
            delete[] C;
        }
    
        BNode const* search(uint k) const
        {
            int i = 0;
            while (i < n && k > keys[i]) {
                i++;
            }
            if (keys[i] == k) {
                return this;
            }
            return leaf ? nullptr : C[i]->search(k);
        }
    
        void insertNonFull(uint k)
        {
            int i = n - 1;
            if (leaf == true) {
                while (i >= 0 && keys[i] > k) {
                    keys[i + 1] = keys[i];
                    i--;
                }
                keys[i + 1] = k;
                n           = n + 1;
            } else {
                while (i >= 0 && keys[i] > k) {
                    i--;
                }
                if (C[i + 1]->n == 2 * B - 1) {
                    splitChild(i + 1, C[i + 1]);
                    if (keys[i + 1] < k) {
                        i++;
                    }
                }
                C[i + 1]->insertNonFull(k);
            }
        }
    
        void splitChild(int i, BNode* y)
        {
            BNode* z = new BNode(y->B, y->leaf);
            z->n     = B - 1;
            for (int j = 0; j < B - 1; j++) {
                z->keys[j] = y->keys[j + B];
            }
            if (!y->leaf) {
                for (int j = 0; j < B; j++) {
                    z->C[j]     = y->C[j + B];
                    y->C[j + B] = nullptr;
                }
            }
            y->n = B - 1;
            for (int j = n; j >= i + 1; j--) {
                C[j + 1] = C[j];
            }
            C[i + 1] = z;
            for (int j = n - 1; j >= i; j--) {
                keys[j + 1] = keys[j];
            }
            keys[i] = y->keys[B - 1];
            n       = n + 1;
        }
    
        friend class BTree;
    };
    
    class BTree {
      private:
        BNode* root = nullptr;
        int    B;
    
      public:
        BTree(int temp)
        {
            root = nullptr;
            B    = temp;
        }
    
        ~BTree() { delete root; }
    
        BNode const* search(uint k) const
        {
            return (root == nullptr) ? nullptr : root->search(k);
        }
    
        bool search_bool(uint k) const { return search(k) != nullptr; }
    
        void insert(uint k)
        {
            if (!root) {
                root          = new BNode(B, true);
                root->keys[0] = k;
                root->n       = 1;
            } else {
                if (root->n == 2 * B - 1) {
                    BNode* s = new BNode(B, false);
                    s->C[0]  = root;
                    s->splitChild(0, root);
                    int i = 0;
                    if (s->keys[0] < k) {
                        i++;
                    }
                    s->C[i]->insertNonFull(k);
                    root = s;
                } else {
                    root->insertNonFull(k);
                }
            }
        }
    };
    
    int main()
    {
        for (int b = 8; b < 17; ++b) {
            BTree tree(b);
            for (int i = 0; i < 100'000; ++i)
                tree.insert(rand() % 1000);
        }
    }
    

    In Closing

    Kudos for getting the algorithmics largely right here. That's not easy.

    As you can see in my cleanup/review I hardened a lot of stuff, mainly around initialization. This is an important habit. I can't actually rule out that not having that would have exposed other sleeping bugs.

    Also note the increased const-correctness.

    Also, like other said, prefer smart pointers and modern C++ techniques. It will be amazing how much less error prone just using std::array, unique_ptr and so will be. For example, the bug with moving nodes in splitChild would never have compiled because you have to explcitly move-assign unique_ptr

    Bonus

    Example in more modern C++:

    • without any new/delete
    • without any raw pointer
    • no more manual destructors (even no constructors required, really)
    • compiler checked move semantics, NICE!
    • statically known B factor, so everything becomes 10x more performant, while still being tunable
    • generic key (element) type, so you can now store std::string, double, whatnot
    • generic comparator, so you sort your keys in alternative orders (e.g. Btree<std::string, 16, std::greater<> > to store the keys in descending order instead of ascending).
    • No leaks!

    Live On Coliru

    #include <array>
    #include <algorithm>
    #include <memory>
    #include <iostream>
    
    template <typename T, unsigned B = 16, typename Cmp = std::less<T>>
    class BTree {
      private:
        static constexpr unsigned MaxKeys     = 2 * B - 1;
        static constexpr unsigned MaxChildren = 2 * B;
    
        struct BNode;
        using NodePtr = std::unique_ptr<BNode>;
    
        struct BNode {
            bool _leaf;
            int  _n = 0;
    
            std::array<T, MaxKeys>           _keys{};
            std::array<NodePtr, MaxChildren> _children{};
    
            BNode(bool leaf1) : _leaf(leaf1) {}
    
            BNode const* search(T k, Cmp cmp) const
            {
                int i = 0;
                while (i < _n && cmp(_keys[i], k)) {
                    i++;
                }
                if (_keys[i] == k) {
                    return this;
                }
                return _leaf ? nullptr : _children[i]->search(k, cmp);
            }
    
            void insertNonFull(T k, Cmp cmp)
            {
                int i = _n - 1;
                if (_leaf == true) {
                    while (i >= 0 && cmp(k, _keys[i])) {
                        _keys[i + 1] = _keys[i];
                        i--;
                    }
                    _keys[i + 1] = k;
                    _n           = _n + 1;
                } else {
                    while (i >= 0 && cmp(k, _keys[i])) {
                        i--;
                    }
                    if (_children[i + 1]->_n == MaxKeys) {
                        splitChild(i + 1, *_children[i + 1]);
                        if (cmp(_keys[i + 1], k)) {
                            i++;
                        }
                    }
                    _children[i + 1]->insertNonFull(k, cmp);
                }
            }
    
            void splitChild(int i, BNode& y)
            {
                NodePtr z = std::make_unique<BNode>(y._leaf);
                z->_n     = B - 1;
                for (unsigned j = 0; j < B - 1; j++) {
                    z->_keys[j] = y._keys[j + B];
                }
                if (!y._leaf) {
                    for (unsigned j = 0; j < B; j++) {
                        z->_children[j] = std::move(y._children[j + B]);
                    }
                }
                y._n = B - 1;
                for (int j = _n; j >= i + 1; j--) {
                    _children[j + 1] = std::move(_children[j]);
                }
                _children[i + 1] = std::move(z);
                for (int j = _n - 1; j >= i; j--) {
                    _keys[j + 1] = _keys[j];
                }
                _keys[i] = y._keys[B - 1];
                _n       = _n + 1;
            }
        };
    
        NodePtr root = nullptr;
        Cmp     _cmp{};
    
      public:
        BTree(Cmp cmp = {}) : _cmp(std::move(cmp)) {}
    
        BNode const* search(T k) const {
            return root ? root->search(k, _cmp) : nullptr;
        }
    
        bool search_bool(T k) const { return search(k) != nullptr; }
    
        void insert(T k)
        {
            if (!root) {
                root           = std::make_unique<BNode>(true);
                root->_keys[0] = k;
                root->_n       = 1;
            } else {
                if (root->_n == MaxKeys) {
                    NodePtr s       = std::make_unique<BNode>(false);
                    s->splitChild(0, *root);
                    s->_children[0] = std::move(root);
                    int i = 0;
                    if (_cmp(s->_keys[0], k)) {
                        i++;
                    }
                    s->_children[i]->insertNonFull(k, _cmp);
                    root = std::move(s);
                } else {
                    root->insertNonFull(k, _cmp);
                }
            }
        }
    };
    
    int main()
    {
        using Asc  = std::less<>;
        using Desc = std::greater<>;
        BTree<double,   8,  Asc>  b8;
        BTree<int,      9,  Desc> b9;
        BTree<unsigned, 10, Asc>  b10;
        BTree<size_t,   11, Desc> b11;
        BTree<double,   12, Asc>  b12;
        BTree<int,      13, Desc> b13;
        BTree<unsigned, 14, Asc>  b14;
        BTree<size_t,   15, Desc> b15;
        BTree<int,      16> b16; // default is ascending
    
        for (int i = 0; i < 100'000; ++i) {
            b8.insert(rand() % 10000);
            b9.insert(rand() % 10000);
            b10.insert(rand() % 10000);
            b11.insert(rand() % 10000);
            b12.insert(rand() % 10000);
            b13.insert(rand() % 10000);
            b14.insert(rand() % 10000);
            b15.insert(rand() % 10000);
            b16.insert(rand() % 10000);
        }
    
        {
            struct NC {
                int v;
                bool operator==(NC const& o) const { return v == o.v; }
            };
            struct CMP {
                bool operator()(NC const& a, NC const& b) const { return a.v < b.v; }
            };
    
            BTree<NC, 8, CMP> b8;
            b8.insert({42});
            bool ok = b8.search_bool({42});
        }
    }