Huffman coding is a popular algorithm for lossless data compression. It assigns variable-length codes to input characters, with shorter codes assigned to more frequent characters. Here’s how the algorithm works:
Here’s a C++ implementation of Huffman coding:
#include <iostream>
#include <queue>
#include <unordered_map>
#include <vector>
#include <string>
using namespace std;
struct Node {
char ch;
int freq;
Node* left;
Node* right;
Node(char character, int frequency)
: ch(character), freq(frequency), left(nullptr), right(nullptr) {}
};
struct Compare {
bool operator()(Node* l, Node* r) {
return l->freq > r->freq;
}
};
void generateHuffmanCodes(Node* root, const string& str, unordered_map<char, string>& huffmanCodes) {
if (!root) return;
if (!root->left && !root->right) {
huffmanCodes[root->ch] = str;
}
generateHuffmanCodes(root->left, str + "0", huffmanCodes);
generateHuffmanCodes(root->right, str + "1", huffmanCodes);
}
void huffmanCoding(const string& text) {
unordered_map<char, int> frequency;
for (char ch : text) {
frequency[ch]++;
}
priority_queue<Node*, vector<Node*>, Compare> minHeap;
for (const auto& pair : frequency) {
minHeap.push(new Node(pair.first, pair.second));
}
while (minHeap.size() > 1) {
Node* left = minHeap.top(); minHeap.pop();
Node* right = minHeap.top(); minHeap.pop();
Node* combined = new Node('\0', left->freq + right->freq);
combined->left = left;
combined->right = right;
minHeap.push(combined);
}
Node* root = minHeap.top();
unordered_map<char, string> huffmanCodes;
generateHuffmanCodes(root, "", huffmanCodes);
cout << "Huffman Codes:\n";
for (const auto& pair : huffmanCodes) {
cout << pair.first << ": " << pair.second << endl;
}
string encodedString;
for (char ch : text) {
encodedString += huffmanCodes[ch];
}
cout << "Encoded String: " << encodedString << endl;
}
int main() {
string text = "hello huffman";
huffmanCoding(text);
return 0;
}
Huffman Codes:
e: 1110
f: 110
: 1011
m: 1010
h: 100
n: 000
u: 001
o: 010
a: 1111
l: 011
Encoded String: 1001110011011010101110000111011010101111000
This implementation can be expanded to include decoding and better memory management, but it serves as a basic demonstration of Huffman coding.