Search code examples
javacompressionhuffman-codeimage-compression

Problems when compressing and decompressing a small .png file using Huffman Coding (Java)


So I have a Java class that implements Hufmman Coding and I want to use it to compreess and decompress any type of file.

Here is my code:

import java.io.*;
import java.util.*;

public class HuffmanCoding {

    public static void main(String[] args) throws IOException {

        String inputFilePath = "C:\\Users\\MAJ\\eclipse-workspace\\ProjectTwo\\src\\inputFile.png";
        String encodedOutputFilePath = "C:\\Users\\MAJ\\eclipse-workspace\\ProjectTwo\\src\\encodedOutputFile.txt";
        // get the frequencies of all the bytes in the file
        byte[] data = fileToByteArray(inputFilePath);
        Map<Byte, Integer> frequencyTable = getByteFrequencies(data);

        // create a Huffman coding tree
        Node root = createHuffmanTree(frequencyTable);

        // create the table of encodings for each byte
        Map<Byte, String> encodings = createEncodings(root);

        // encode the input file and write the encoded output to the output file
        encodeFile(data, encodings, encodedOutputFilePath);
        String inputFileExtension = inputFilePath.substring(inputFilePath.lastIndexOf('.'));
        String decompressedOutputFilePath = "C:\\Users\\MAJ\\eclipse-workspace\\ProjectTwo\\src\\decompressedOutputFile" + inputFileExtension;
        decodeFile(encodedOutputFilePath, decompressedOutputFilePath, root);
    }

    public static byte[] fileToByteArray(String filePath) throws IOException {
        // read the file
        BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(filePath));
        byte[] data = inputStream.readAllBytes();
        inputStream.close();

        return data;
    }


    public static Map<Byte, Integer> getByteFrequencies(byte[] data) {
        // map for storing the frequencies of the bytes
        Map<Byte, Integer> frequencyTable = new HashMap<>();

        // count the frequencies of the bytes
        for (byte b : data) {
            frequencyTable.put(b, frequencyTable.getOrDefault(b, 0) + 1);
        }

        return frequencyTable;
    }

    public static Node createHuffmanTree(Map<Byte, Integer> frequencyTable) {
        // create a priority queue to store the nodes of the tree
        PriorityQueue<Node> queue = new PriorityQueue<>(Comparator.comparingInt(n -> n.frequency));

        // create a leaf node for each byte and add it to the priority queue
        for (Map.Entry<Byte, Integer> entry : frequencyTable.entrySet()) {
            queue.add(new Node(entry.getKey(), entry.getValue()));
        }

        // create the Huffman tree
        while (queue.size() > 1) {
            // remove the two nodes with the lowest frequency from the queue
            Node left = queue.poll();
            Node right = queue.poll();

            // create a new internal node with these two nodes as children and the sum of their frequencies as the frequency
            assert right != null;
            Node parent = new Node(left.frequency + right.frequency, left, right);

            // add the new internal node to the queue
            queue.add(parent);
        }

        // the root node is the node remaining in the queue
        return queue.poll();

    }


    // node class for the Huffman tree
    static class Node {
        int frequency;
        byte character;
        Node left;
        Node right;

        Node(int frequency, Node left, Node right) {
            this.frequency = frequency;
            this.left = left;
            this.right = right;
        }

        Node(byte character, int frequency) {
            this.character = character;
            this.frequency = frequency;
        }
    }

    public static Map<Byte, String> createEncodings(Node root) {
        // map for storing the encodings of the bytes
        Map<Byte, String> encodings = new HashMap<>();

        // create the encodings
        createEncodings(root, "", encodings);

        return encodings;
    }

    private static void createEncodings(Node node, String encoding, Map<Byte, String> encodings) {
        if (node == null) {
            return;
        }
        if (node.character != 0) {
            // this is a leaf node, so add the encoding to the map
            encodings.put(node.character, encoding);
        } else {
            // this is an internal node, so recurse on the left and right children
            createEncodings(node.left, encoding + "0", encodings);
            createEncodings(node.right, encoding + "1", encodings);
        }
    }



    public static void encodeFile(byte[] data, Map<Byte, String> encodings, String outputFilePath) throws IOException {
        BufferedWriter writer = new BufferedWriter(new FileWriter(outputFilePath));

        // create a string builder for building the encoded string
        StringBuilder sb = new StringBuilder();

        // encode the data and add the encoded string to the string builder
        for (byte b : data) {
            String str = encodings.get(b);
            if (str == null) {
                str = "";
            }
            sb.append(str);
        }

        // write the encoded string to the output file
        writer.write(sb.toString());

        writer.close();
    }




    public static void decodeFile(String inputFilePath, String outputFilePath, Node root) throws IOException {
        // read the encoded data from the input file
        BufferedReader reader = new BufferedReader(new FileReader(inputFilePath));
        String encodedData = reader.readLine();
        reader.close();

        // create the output file
        BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream(outputFilePath));

        // decode the data and write it to the output file
        Node current = root;
        for (int i = 0; i < encodedData.length(); i++) {
            current = encodedData.charAt(i) == '0' ? current.left : current.right;
            assert current != null;
            if (current.left == null && current.right == null) {
                outputStream.write(current.character);
                current = root;
            }
        }
        outputStream.close();
    }




}

When compressing and decompressing a .txt file, everything works fine, but when compressing & decompressing a small .png image of size 5 KB, the outputted decompressed file, which should be an identical .png image to the original one, has the correct size but when I try to open it with any kind of image viewer it doesn't load, and I can't seem to figure out what the problem is, and I'm assuming the same problem will occur with any other kinds of files (.mp4, .mp3, .jpeg, .exe, etc...). Please help me out if you can!


Solution

  • You can't have a "special" character if you want to be able to code all possible bytes. Also you don't need one. Leaves are already identified by null pointers. If you change:

    if (node.character != 0) {
    

    to:

    if (node.left == null) {
    

    then it works.

    You still have a ways to go before you have a working Huffman coder and decoder. You need to write bits instead of bytes, so that you're not dramatically expanding your data instead of compressing it. Having done that, now you'll need to deal with the extra bits in the last byte, to make sure the decoder doesn't decode an extraneous symbol or two at the end. To do that, you'll need to either send the number of symbols ahead of the symbols, or encode an additional end-of-stream symbol. You need to represent and encode the Huffman code at the start of the compressed data, so that the decoder can interpret the codes. You need to demonstrate your encoder and decoder work by making them separate programs so that the only thing the decoder has to go on is the one compressed file.