Search code examples
javahuffman-codebinary-heap

Building a Huffman Tree using a Binary Heap


I am currently trying to write a program that reads in a text file and encodes it by creating a HuffmanTree. I am using parallel arrays in a binary heap of a priority queue and to keep track of my Huffman Trees.

I know the principal of removing two mins out of the heap, merging them, then inserting them back into the heap until there is one left, but I am having trouble translating that logic/algorithm to code.

Here is my HuffmanEncode class:

public class HuffmanEncode {

    public HuffmanEncode(String in, String out) {
        // Implements the Huffman encoding algorithm
        // Add private methods and instance variables as needed
        int[] freqs = new int[128]; // character counts
        char[] chars = new char[128]; //characters

        freqs = countFrequencies(in);
        HuffmanTree[] trees = new HuffmanTree[128]; //single node trees

        for(int i= 0; i < freqs.length; i++) {
            chars[i] = (char)i;
            trees[i] = new HuffmanTree(chars[i]);
        }  

        BinaryHeap heap = new BinaryHeap(128); // create a binary heap
        for(int i = 0; i < 128; i++) { 
            heap.insert(freqs[i], trees[i]); 
        }

        // STUCK HERE

        buildTree(heap);
        HuffmanTree root = new HuffmanTree();

        // STUCK HERE

    }

    private void buildTree(BinaryHeap h) {
        // grab two smallest
        while (h.getSize() > 1) { //repeat until there is only one left
            int temp1, temp2;
            HuffmanTree t1, t2;
            temp1 = h.getMinPriority();
            temp2 = h.getMinPriority();
            // add their frequency to create new single node tree with char 128
            int sum = temp1 + temp2;
            HuffmanTree node = new HuffmanTree((char)128);
            // insert it back into the heap
            h.insert(sum, node);
        }
    }

    // count the frequencies of all characters in ascii 0-127 and store them in an array
    private int[] countFrequencies(String input) {
        File f1 = new File(input);
        int[] count = new int[128];
        try {
            BufferedReader in = new BufferedReader (new FileReader (f1));
            int nextChar;
            char ch;

            while((nextChar = in.read()) != -1) { // stop when end of file is reached
                ch = ((char) nextChar);
                if(ch <= 127)
                    count[ch]++;
            }
            in.close();
        } catch (FileNotFoundException e) {
            System.out.println("file not found");
        } catch (IOException e) {
            System.out.println("Buffered Reader error");
        }
        return count;
    }

Here is my Binary Heap class:

public class BinaryHeap {
    // implements a binary heap where the heap rule is the value in the parent 
    // node is less than or equal to the values in the child nodes

    // implementation uses parallel arrays to store the priorities and the trees
    // must use this implementation

    int priority[];
    HuffmanTree trees[];
    int size;

    public BinaryHeap(int s) {
        priority = new int[s+1];
        trees = new HuffmanTree[s+1];
        size = 0;
    }

    public void removeMin() {
        // PRE: size != 0;
        // removes the priority and the tree at the root of the heap
        int parent;
        int child;
        int x = priority[size];
        HuffmanTree z = trees[size];
        size--;
        child = 2;
        while(child <= size) {
            parent = child / 2;
            if(child < size && priority[child+1] < priority[child])
                child++;
            if(x < priority[child]) break;
            priority[parent] = priority[child];
            trees[parent] = trees[child];
            child = 2 * child;
        }
        priority[child/2] = x;
        trees[child/2] = z;
    }

    public int getMinPriority() {
        // PRE: size != 0
        // return the priority in the root of the heap
        int min = priority[1];
        removeMin();
        return min;
    }

    public boolean full() {
        // return true if the heap is full otherwise return false
        return size == priority.length-1;
    }

    public void insert(int p, HuffmanTree t) {
        // insert the priority p and the associated tree t into the heap
        // PRE: !full()
        int parent;
        int child;
        size++;
        child = size;
        parent = child/2;
        priority[0] = p;
        trees[0] = t;
        while (priority[parent] > p) {
            priority[child] = priority[parent];
            trees[child] = trees[parent];
            child = parent;
            parent = child/2;
        }
        priority[child] = p;
        trees[child] = t;
    }

    public int getSize() {
        // return the number of values (priority, tree) pairs in the heap
        return size;
    }
}

Here is the class for the HuffmanTree object:

import java.util.*;

public class HuffmanTree {

    private class Node{
        private Node left;
        private char data;
        private Node right;
        private Node parent;

        private Node(Node L, char d, Node R, Node P) {
            left = L;
            data = d;
            right = R;
            parent = P;
        }
    }

    private Node root;
    private Node current; // value is changed by move methods

    public HuffmanTree() {
        root = null;
        current = null;
    }

    public HuffmanTree(char d) {
        // single node tree
        root = new Node(null, d, null, null);
        current = null;
    }

    public HuffmanTree(String t, char nonLeaf) {
        // Assumes t represents a post order representation of the tree
        // nonLeaf is the char value of the data in the non-leaf nodes
        // use (char) 128 for the non-leaf value
    }

    public HuffmanTree(HuffmanTree b1, HuffmanTree b2, char d) {
        // makes a new tree where b1 is the left subtree and b2 is the right subtree and d is the data in root
        root = new Node(b1.root, d, b2.root, null);
        current = null;
    }

    // use the move methods to traverse the tree
    // the move methods change the value of current
    // use these in the decoding process

    public void moveToRoot() {
        // change current to reference the root of the tree
        current = root;
    }

    public void moveToLeft() {
        // PRE: the current node is not a leaf
        current = current.left;
    }

    public void moveToRight() {
        // PRE: the current node is not a leaf
        current = current.right;
    }

    public void moveToParent() {
        // PRE: the current node is not the root
        current = current.parent;
    }

    public boolean atRoot() {
        // returns true if the current node is the root otherwise returns false
        if(current.equals(root)) {
            return true;
        }
        return false;
    }

    public boolean atLeaf() {
        // returns true if the current references a leaf otherwise return false
        if(current.left == null && current.right == null && current.parent != null) {
            return true;
        }
        return false;
    }

    public char current() {
        // returns the data value in the node referenced by current
        return current.data;
    }

    public Iterator<String> iterator(){
        //return a new path iterator object
        return new PathIterator();
    }

    public String toString() {
        // returns a string representation of the tree using postorder format
        return toString(root);
    }

    private String toString(Node r) {
        if(r == null)
            return "";

        toString(r.left);

        toString(r.right);

        return r.data + "";
    }

    public class PathIterator implements Iterator<String>{
        // the iterator returns the path (a series of 0s and 1s) to each leaf
        // DO NOT compute all paths in the constructor
        // only compute them as needed (similar to what you did in homework 2)
        // add private methods and variables as needed

        public PathIterator() {
        }

        public boolean hasNext() {
            return true;
        }

        public String next() {
            // the format of the string should be leaf value, a space, a sequence of 0s and 1s
            // the 0s and 1s indicate the path from the root the node containing the leaf value
            String result = "";
            return result;
        }

        public void remove() {
            // optional method not implemented
        }
    }

}

I understand not all of the code is complete, it is a work in progress. Currently I am trying to build the Tree using the HuffmanEncode class.

My question is how do I go about using the Binary Heap's parallel arrays to construct a binary tree? I have tried pulling two elements out of the array, adding their frequencies to create a new node, and insert them back into the tree (as shown in code), but I don't know how to actually keep that parallel with using the HuffmanTree Constructor to merge two trees together. How can I assure this all happens smoothy?


Solution

  • When you make the new node to insert back into the heap, you need to first connect its left and right branches to the two nodes you pulled out of the heap.