Search code examples
javascriptalgorithmsortingcomparisonheapsort

Heapsort implementation does too many comparisons


I just implemented the heapsort algorithm. Im aiming to as few comparisons as possible. My implementation works, but does more than twice as many comparisons than this sample I found online: http://www.csse.monash.edu.au/~lloyd/tildeAlgDS/Sort/Heap/

The test array is: 6 2 3 1 5 4 2 5 1 6 7 4 3 2 6 7 3 2 5 6 1 8 7 5 4 3 2 1 5

The sample I found online did 194 comparisons to sort that array, my implementation did 570 comparisons.

Here the code:

$(document).ready(function(){

        init();

        });

    var comparisons = 0;

    var init = function()
    {    
        var dataSource = getTestData();

        var root = {value: dataSource[0]};

        //Building heap
        for(var i = 1; i < dataSource.length; i++)
            addChild(root, {value: dataSource[i]});

        console.log("--- Source: ---")
        console.log(JSON.stringify(dataSource));

        console.log("--- Input: ---")
        console.log(JSON.stringify(dataSource));

        console.log("--- Tree: ---")
        console.log(JSON.stringify(root, null, 4));
        console.log("Nodes: " + countChildren(root));

        var sortedArray = new Array();
        comparisons = 0;

        //Do sorting here
        while(countChildren(root) != 0){
            sortNode(root); //Sorting heap
            sortedArray.unshift(root.value); //Adding the biggest entry from the top of the heap to the output
            deleteNode(root); //Removing the top of the heap
        }

        console.log("--- Sorted Tree: ---")
        console.log(JSON.stringify(root, null, 4));
        console.log("Nodes: " + countChildren(root));

        console.log("--- Sorted: ---")
        console.log(JSON.stringify(sortedArray));

        console.log("!!! Comparisons: " + comparisons + " !!!");
    }

    var deleteNode = function(item)
    {
        if (item.left == null && item.right == null)
            item.value = null;
        else if (item.left != null) {
            item.value = item.left.value;

            if (countChildren(item.left) == 1)
                item.left = null;
            else
                deleteNode(item.left)
        }
        else if (item.right != null) {
            item.value = item.right.value;

            if (countChildren(item.right) == 1)
                item.right = null;
            else
                deleteNode(item.right)
        }
    }

    var sortNode = function(item)
    {
        if (item.left != null && item.right == null){
            sortNode(item.left);

            comparisons++;
            if (item.value < item.left.value) //Comparison
            {
                var tmp = item.value;
                item.value = item.left.value;
                item.left.value = tmp;
            }
        }else if (item.right != null && item.left == null){
            sortNode(item.right);

            comparisons++;
            if (item.value < item.right.value) //Comparison
            {
                var tmp = item.value;
                item.value = item.right.value;
                item.right.value = tmp;
            }
        }
        else if (item.right != null && item.left != null) {
            sortNode(item.right);
            sortNode(item.left);

            //Some more comparisons
            var left_bigger_right = (item.right.value <= item.left.value);        comparisons++;
            var left_bigger_this = (item.value < item.left.value);                comparisons++;
            var right_bigger_this = (item.value < item.right.value);              comparisons++;

            if ((left_bigger_this && left_bigger_right) || (right_bigger_this && left_bigger_right)) {
                var tmp = item.value;
                item.value = item.left.value;
                item.left.value = tmp;
            }
            else if (right_bigger_this && left_bigger_right == false){
                var tmp = item.value;
                item.value = item.right.value;
                item.right.value = tmp;
            }
        }
        else{ //No children
            //Nothing to do :)
        }
    }

    var addChild = function(item, toAdd)
    {
        if(item.left == null)
            item.left = toAdd;
        else if (item.right == null)
            item.right = toAdd;
        else{ //if(item.left != null && item.right != null)
            childrenCountLeft = countChildren(item.left);
            childrenCountRight = countChildren(item.right);

            if (childrenCountRight < childrenCountLeft)
                addChild(item.right, toAdd);
            else if (childrenCountLeft < childrenCountRight)
                addChild(item.left, toAdd);
            else //if (childrenCountLeft == childrenCountRight)
                addChild(item.left, toAdd);
        }
    }

    var countChildren = function(item){
        var children = 0;

        if (item.value != null)
            children += 1;

        if (item.left != null)
            children += countChildren(item.left);
        if (item.right != null)
            children += countChildren(item.right);

        return children;
    }

    var getTestData = function(){
        return new Array(6 ,2 ,3 ,1 ,5 ,4 ,2 ,5 ,1 ,6 ,7 ,4 ,3 ,2 ,6 ,7 ,3 ,2 ,5 ,6 ,1 ,8 ,7 ,5 ,4 ,3 ,2 ,1 ,5);
    }

My question is: On what part did I fail to implement the heapsort-algorithm? Where are the unnecessary comparisons?

They have to be in the sortNode function. I marked all the comparisons with comments.

I dont see how I can improve this function, to do less comparisons. So that would be my question.


Solution

  • What you have implemented does not look like a heap sort(to be more precise, the data structure you are using is not a proper binary heap).

    The sortNode functions always makes a recursive call for both of its children, which results in traversing the entire heap all the time. It is definitely not the way a heap should work. So all extra comparisons come from an incorrect algorithm(your implementation does O(n ^ 2) comparisons, while a correct one should do only O(n log n)).

    How to fix? As I have said above, the data structure you have know doesn't look like a heap, so I'd recommend implementing a heap properly from scratch(you can read a wikipedia article to make sure that you understand it properly).