I was able to write a code for huffman coding only using queue library. But as I save my file for compression it gives a larger byte size than the original file.
ex.
filesize.txt has 17 bytes it contain a string "Stressed-desserts" while
compressedfile.bin has 44 bytes which contains the huffman codes of the original file "01111011000011110001001100100011110010010111".
This is my whole code
#include <iostream>
#include <queue>
#include <fstream>
using namespace std;
struct HuffNode{
int my_Frequency;
char my_Char;
string my_Code;
HuffNode* my_Left;
HuffNode* my_Right;
};
//global variables
int freq[256] = {0};
string encoded = "";
string filename;
//Comparing the frequency in the priority queue
struct compare_freq {
bool operator()(HuffNode* l, HuffNode* r) {
return l->my_Frequency > r->my_Frequency;
}
};
priority_queue <HuffNode*, vector<HuffNode*>, compare_freq> freq_queue;
//get the file from user
string get_file_name()
{
cout << "Input file name to compress: ";
cin >> filename;
return filename;
}
//Scan the file to be compressed and tally all the occurence of all characters.
void file_getter()
{
fstream fp;
char c;
fp.open(get_file_name(), ios::in);
if(!fp)
{
cout << "Error: Couldn't open file " << endl;
system("pause");
}
else
{
while(!fp.eof())
{
c = fp.get();
freq[c]++;
}
}
fp.close();
}
//HuffNode to create a newNode for queue containing the letter and the frequency
HuffNode* set_Node(char ch, int count)
{
HuffNode* newNode = new HuffNode;
newNode->my_Frequency = count;
newNode->my_Char = ch;
newNode->my_Code = "";
newNode->my_Right = nullptr;
newNode->my_Left = nullptr;
return newNode;
}
//Sort or Prioritize characters based on numbers of occurences in text.
void insert_Node(char ch, int count)
{
//pass the ch and count to the newNodes for queing
freq_queue.push(set_Node(ch, count));
}
void create_Huffman_Tree()
{
HuffNode* root;
file_getter();
//insert the characters in the their frequencies into the priority queue
for(int i = 0; i < 256; i++)
{
if(freq[i] > 0)
{
insert_Node(char(i), freq[i]);
}
}
//build the huffman tree
while(freq_queue.size() > 1)
{
//get the two highest priority nodes
HuffNode* for_Left = freq_queue.top();
freq_queue.pop();
HuffNode* for_Right = freq_queue.top();
freq_queue.pop();
//Create a new HuffNode with the combined frequency of the left and right children
int freq = for_Left->my_Frequency + for_Right->my_Frequency;
char ch = '$';
root = set_Node(ch, freq);
root->my_Left = for_Left;
root->my_Right = for_Right;
//Insert the new node into the priority_queue.
freq_queue.push(root);
}
// The remaining HuffmanNode in the queue is the root of the Huffman tree
root = freq_queue.top();
}
void preOrderTraverse(HuffNode* root, char c, string code)
{
if (root == nullptr) {
// If the tree is empty, return
return;
}
if (root->my_Char == c)
{
// If the current HuffmanNode is a leaf HuffmanNode, print the code for the character.
root->my_Code = code;
encoded += code;
return;
}
// Otherwise, recurse on the left and right children
preOrderTraverse(root->my_Left, c, code + "0");
preOrderTraverse(root->my_Right, c, code + "1");
}
void encode_File(string ccode)
{
HuffNode* root = freq_queue.top();
for(int i = 0; i < ccode.length(); i++)
{
char c = ccode[i];
string code = "";
preOrderTraverse(root, c, code);
}
}
void save_Huffman_Code()
{
fstream fp, fp2;
fp.open("Compressed_file.bin", ios::out);
fp2.open(filename, ios::in);
string ccode;
getline(fp2, ccode);
encode_File(ccode);
fp << encoded;
fp.close();
fp2.close();
}
int main()
{
create_Huffman_Tree();
HuffNode* root = freq_queue.top();
save_Huffman_Code();
}
I should get a compressed file that has a smaller byte size than the original. I am trying to write the code without using bit operations, unorderedmap or map. I only use priority_queue for the program.
You are writing eight bits per bit to your output, so it is eight times larger than it's supposed to be. You want to write one bit per bit. To write bits, you need to accumulate them, one by one, into a byte buffer until you have eight, then write that byte. At the end, write the remaining bits. Use the bit operators <<
and |
to put the bits into the byte buffer. E.g. for each bit
equal to 0 or 1:
unsigned buf = 0, n = 0;
...
buf |= bit << n;
if (++n == 8) {
fp.put(buf);
buf = n = 0;
}
...
if (n)
fp.put(buf);
There are many other things wrong with your code.
c
is a signed byte type, freq[c]++;
will fail for input that has bytes larger than 127, as c
will be negative. You need int c;
instead of char c
.while(!fp.eof())
will result in getting a -1
as your last character, which is an EOF indication, and again indexing your array with a negative number. Do while ((c = fp.get()) != -1)
.get()
's the first time you read the file, which is correct. However the second time you read the file, you use a single getline()
. This only gets the first line, and it omits the new line character. Read the file the same way both times, with a series of get()
's.