我制作了部署 C++ 的 Huffman 编码树,如下所示:
为此我创建了三个类 - HuffmanTree、BinTree 和 BinNode。
更多详情可以在我的 GitHub 上查看:https://github.com/MouChiaHung/DataStructures
检查这三个文件:bin_node.h、bin_tree.h 和 huffman_tree.h。他们读取源文件“source”,以霍夫曼方式encode到文件“encode”,然后解码文件“encode”并将结果存储到输出文件“decode”。此外,霍夫曼表记录在文件“table”中。
其中一个核心函数是 HuffmanTree::encode(),它从源文件中读取字符。
template<typename T> void amo::HuffmanTree<T>::grow(std::list<Model*>& list) { //ascendantly sorted list
Model* l;
Model* r;
Model* m;
BinNode<T>* lchild;
BinNode<T>* rchild;
BinNode<T>* vertex;
std::list<Model*>::iterator it = list.begin();
std::vector<BinNode<T>*> subs; //roots of sub-trees
typename std::vector<BinNode<T>*>::iterator it_subs = subs.begin();
int i = 0;
while (it!=list.end()) {
lchild = NULL;
rchild = NULL;
vertex = NULL;
cout << YELLOW << "while-loop:" << ++i << WHITE << endl;
if (std::next(it,1) == list.end()) { //met the last and single leaf or sub-tree
if (subs.size() > 1) {
cout << RED << "size of sub-tree is more than 1:" << subs.size() << WHITE << endl;
this->_root = subs.back();
subs.pop_back();
break;
}
else if (subs.size() == 1){
if (**it == subs.back()->data) { //met the last sub-tree
cout << GREEN << "going to attach the last sub-tree" << WHITE << endl;
vertex = subs.back();
subs.pop_back();
}
else { //met the last leaf
cout << GREEN << "going to attach the last leaf" << WHITE << endl;
r = *it;
lchild = subs.back();
subs.pop_back();
cout << CYAN << "lchild points to the root of the last sub-tree:" << *lchild;
rchild = new BinNode<T>(*r);
cout << CYAN << "rchild points to a new node:" << *rchild;
m = new Model(CHAR_VERTEX, (lchild->data.prob)+(r->prob));
vertex = new BinNode<T>(*m);
lchild->parent = vertex;
rchild->parent = vertex;
vertex->lchild = lchild;
vertex->rchild = rchild;
}
this->_root = vertex;
cout << CYAN << "root:" << *this->_root << WHITE << endl;
break;
}
else {
cout << RED << "size of sub-tree is less than 1:" << subs.size() << WHITE << endl;
this->_root = subs.back();
subs.pop_back();
break;
}
}
else {
l = *it;
it++;
r = *it;
m = new Model(CHAR_VERTEX, l->prob+r->prob);
for (it_subs=subs.begin(); it_subs!=subs.end(); it_subs++) { //set lchild if any sub-tree corresponds with this l model iterated currently
if (*l == (*it_subs)->data) {
cout << CYAN << "lchild points to the root of sub-tree:" << **it_subs;
lchild = *it_subs;
--(it_subs = subs.erase(it_subs));
}
if (lchild != NULL) break; //tricky but important
}
for (it_subs=subs.begin(); it_subs!=subs.end(); it_subs++) { //set rchild if any sub-tree corresponds with this r model iterated currently
if (*r == (*it_subs)->data) {
cout << CYAN << "rchild points to the root of sub-tree:" << **it_subs;
rchild = *it_subs;
--(it_subs = subs.erase(it_subs));
}
if (rchild != NULL) break; //tricky but important
}
if (lchild == NULL) { //set lchild with a new node if no any sub-tree corresponds with this l model iterated currently, which means meeting a row leaf
lchild = new BinNode<T>(*l);
cout << CYAN << "lchild points to a new node:" << *lchild;
}
if (rchild == NULL) { //set rchild with a new node if no any sub-tree corresponds with this r model iterated currently, which means meeting a row leaf
rchild = new BinNode<T>(*r);
cout << CYAN << "rchild points to a new node:" << *rchild;
}
vertex = new BinNode<T>(*m);
std::cout << GREEN << "growing..." << WHITE << endl;
std::cout << CYAN << "lchild" << *lchild << WHITE;
std::cout << CYAN << "rchild" << *rchild << WHITE;
std::cout << CYAN << "vertex" << *vertex << WHITE;
lchild->parent = vertex;
rchild->parent = vertex;
vertex->lchild = lchild;
vertex->rchild = rchild;
subs.push_back(vertex);
for (std::list<Model*>::iterator itt=it;itt!=list.end();itt++) {
if ((*m < **itt) || (*m == **itt)) {
list.insert(itt, m);
break;
}
else if (std::next(itt,1) == list.end()) {
list.push_back(m);
break;
}
}
it++;
}
}
this->updateHeightAll();
cout << GREEN << "-*-*-*-*-*-*-*-* Huffman tree top -*-*-*-*-*-*-*-*" << WHITE << endl;
this->traverseLevel();
cout << GREEN << "-*-*-*-*-*-*-*-* Huffman tree bottom -*-*-*-*-*-*-*-*" << WHITE << endl;
subs.clear();}
另一个核心函数是 Huffman::grow(),它为 PFC 编码生成二叉树。
template<typename T> void amo::HuffmanTree<T>::grow(std::list<Model*>& list) { //ascendantly sorted list
Model* l;
Model* r;
Model* m;
BinNode<T>* lchild;
BinNode<T>* rchild;
BinNode<T>* vertex;
std::list<Model*>::iterator it = list.begin();
std::vector<BinNode<T>*> subs; //roots of sub-trees
typename std::vector<BinNode<T>*>::iterator it_subs = subs.begin();
int i = 0;
while (it!=list.end()) {
lchild = NULL;
rchild = NULL;
vertex = NULL;
cout << YELLOW << "while-loop:" << ++i << WHITE << endl;
if (std::next(it,1) == list.end()) { //met the last and single leaf or sub-tree
if (subs.size() > 1) {
cout << RED << "size of sub-tree is more than 1:" << subs.size() << WHITE << endl;
this->_root = subs.back();
subs.pop_back();
break;
}
else if (subs.size() == 1){
if (**it == subs.back()->data) { //met the last sub-tree
cout << GREEN << "going to attach the last sub-tree" << WHITE << endl;
vertex = subs.back();
subs.pop_back();
}
else { //met the last leaf
cout << GREEN << "going to attach the last leaf" << WHITE << endl;
r = *it;
lchild = subs.back();
subs.pop_back();
cout << CYAN << "lchild points to the root of the last sub-tree:" << *lchild;
rchild = new BinNode<T>(*r);
cout << CYAN << "rchild points to a new node:" << *rchild;
m = new Model(CHAR_VERTEX, (lchild->data.prob)+(r->prob));
vertex = new BinNode<T>(*m);
lchild->parent = vertex;
rchild->parent = vertex;
vertex->lchild = lchild;
vertex->rchild = rchild;
}
this->_root = vertex;
cout << CYAN << "root:" << *this->_root << WHITE << endl;
break;
}
else {
cout << RED << "size of sub-tree is less than 1:" << subs.size() << WHITE << endl;
this->_root = subs.back();
subs.pop_back();
break;
}
}
else {
l = *it;
it++;
r = *it;
m = new Model(CHAR_VERTEX, l->prob+r->prob);
for (it_subs=subs.begin(); it_subs!=subs.end(); it_subs++) { //set lchild if any sub-tree corresponds with this l model iterated currently
if (*l == (*it_subs)->data) {
cout << CYAN << "lchild points to the root of sub-tree:" << **it_subs;
lchild = *it_subs;
--(it_subs = subs.erase(it_subs));
}
if (lchild != NULL) break; //tricky but important
}
for (it_subs=subs.begin(); it_subs!=subs.end(); it_subs++) { //set rchild if any sub-tree corresponds with this r model iterated currently
if (*r == (*it_subs)->data) {
cout << CYAN << "rchild points to the root of sub-tree:" << **it_subs;
rchild = *it_subs;
--(it_subs = subs.erase(it_subs));
}
if (rchild != NULL) break; //tricky but important
}
if (lchild == NULL) { //set lchild with a new node if no any sub-tree corresponds with this l model iterated currently, which means meeting a row leaf
lchild = new BinNode<T>(*l);
cout << CYAN << "lchild points to a new node:" << *lchild;
}
if (rchild == NULL) { //set rchild with a new node if no any sub-tree corresponds with this r model iterated currently, which means meeting a row leaf
rchild = new BinNode<T>(*r);
cout << CYAN << "rchild points to a new node:" << *rchild;
}
vertex = new BinNode<T>(*m);
std::cout << GREEN << "growing..." << WHITE << endl;
std::cout << CYAN << "lchild" << *lchild << WHITE;
std::cout << CYAN << "rchild" << *rchild << WHITE;
std::cout << CYAN << "vertex" << *vertex << WHITE;
lchild->parent = vertex;
rchild->parent = vertex;
vertex->lchild = lchild;
vertex->rchild = rchild;
subs.push_back(vertex);
for (std::list<Model*>::iterator itt=it;itt!=list.end();itt++) {
if ((*m < **itt) || (*m == **itt)) {
list.insert(itt, m);
break;
}
else if (std::next(itt,1) == list.end()) {
list.push_back(m);
break;
}
}
it++;
}
}
this->updateHeightAll();
cout << GREEN << "-*-*-*-*-*-*-*-* Huffman tree top -*-*-*-*-*-*-*-*" << WHITE << endl;
this->traverseLevel();
cout << GREEN << "-*-*-*-*-*-*-*-* Huffman tree bottom -*-*-*-*-*-*-*-*" << WHITE << endl;
subs.clear();}
而 Huffman::generate() 创建用于编码内容的表格。
template<typename T> void amo::HuffmanTree<T>::generate() {
std::string code = "";
std::queue<BinNode<T>*> queue;
BinNode<T>* node = this->_root;
BinNode<T>* tmp;
queue.push(node);
int i = 0;
while (true) {
if (queue.empty()) break;
node = queue.front();
queue.pop();
cout << YELLOW << "while-loop:" << ++i << ", node:" << *node << WHITE << endl;
if (node->data.c == CHAR_VERTEX) {
//do nothing
}
else {
if (node->isLeaf()) code = "";
tmp = node;
while (tmp!=NULL) {
if (tmp->isLeftChild()) code.insert(0, "0");
else if (tmp->isRightChild()) code.insert(0, "1");
tmp = tmp->parent;
}
if (node->data.c != CHAR_VERTEX) codes[node->data.c] = code;
}
if (node->hasLeftChild()) queue.push(node->lchild);
if (node->hasRightChild()) queue.push(node->rchild);
}
for (std::map<char,string>::iterator it=codes.begin();it!=codes.end();it++) {
cout << YELLOW << "codes[" << distance(codes.begin(),it) << "]:" << " key:" << it->first << " => value:" << it->second << WHITE << endl;
}}
谢谢,欢迎提出建议。