huffman文件压缩及其解压(附源码,针对各种文件)

阅读: 评论:0

huffman文件压缩及其解压(附源码,针对各种文件)

huffman文件压缩及其解压(附源码,针对各种文件)

  • huffman压缩简介
  • 构建压缩信息
  • 开始压缩
    • 统计字符
    • 建立huffman树
    • 得到huffman编码
    • 将huffman编码压缩
    • 书写配置信息
  • 解压缩
    • 读取配置信息
    • 重新建树
    • 还原文件
  • 整体源码
    • test.cpp
    • compress.h
    • HuffmanTree.h
    • Heap.h

huffman压缩简介

哈夫曼编码(Huffman Coding),又称霍夫曼编码,是一种编码方式,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码);
因为这个特性,所以我们可以利用其来进行文件压缩,例如:有一个原始数据序列,ABACCDAA则编码为A(0),B(10),C(110),(D111),压缩后为010011011011100,再将其转为一个整形写入,实现了文件压缩;

构建压缩信息

那么这个过程中,我们在类中也就是整个压缩过程中要对其字符对应的信息例如:其对应的字符出现的次数,其字符对应的huffman编码,好方便我们的压缩和解压的过程;

在类中定义一个CharInfo _infos[256]来表示其所有信息;

typedef long long LongType;struct CharInfo
{  LongType _count;//字符出现的次数string _code;//huffman编码unsigned char _ch;//字符CharInfo(LongType count = 0):_count(count), _code(""){}
};

开始压缩

压缩又可以分为以下几步:

统计字符

统计字符出现的次数

FILE* fout = fopen(filename, "rb");assert(fout);//char ch = fgetc(fout);int ch = fgetc(fout);//因为压缩汉字或者图片有些字符可能超过一个char表示的范围,具体与编码方式有关,在此不做过多描述while (ch != EOF){_infos[(unsigned char)ch]._count++;ch = fgetc(fout);}

建立huffman树

CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo> tree(_infos, 256,invalid);
//另外创建一个HuffmanTree.h文件
#pragma once
template <class T>
struct HuffmanTreeNode//huffman节点
{T _w;HuffmanTreeNode* _left;HuffmanTreeNode* _right;HuffmanTreeNode* _parent;HuffmanTreeNode(const T& x):_w(x), _left(NULL), _right(NULL), _parent(NULL){}};template <class T>
class HuffmanTree
{typedef HuffmanTreeNode<T> Node;public:HuffmanTree():_root(NULL){}HuffmanTree(T* a, size_t n,const T& invalid){Heap<Node*> minHeap;//利用堆来进行排序的比较for (size_t i = 0; i < n; i++){if (a[i] != invalid)minHeap.Push(new Node(a[i]));}while (minHeap.Size() > 1){Node* left = minHeap.Top();minHeap.Pop();Node* right = minHeap.Top();minHeap.Pop();Node* parent = new Node(left->_w + right->_w);parent->_left = left;left->_parent = parent;parent->_right = right;right->_parent = parent;minHeap.Push(parent);}_root = minHeap.Top();}~HuffmanTree(){_Destory(_root);}Node* Root(){return _root;}
protected:void _Destory(Node* root){if (root == NULL)return;_Destory(root->_left);_Destory(root->_right);delete root;}HuffmanTree(const HuffmanTree<T>&);//防拷贝HuffmanTree& operator=(const HuffmanTree<T>&);
protected:Node* _root;
};

其中利用的堆排序的代码:

//利用仿函数进行大堆小堆
template <class T>
struct Less
{bool operator()(const T& l, const T& r) const{return l->_w._count <= r->_w._count;}
};
template <class T>
struct Greater
{bool operator()(const T& l, const T& r) const{return l->_w._count >= r->_w._count;}
};
template <class T, class Compare = Less<T>>
//template <class T>
class Heap
{
public:Heap(){}Heap( T* a, size_t size){_a.reserve(size);for (size_t i = 0; i < size; ++i){_a.push_back(a[i]);}for (int i = (_a.size() - 2) / 2; i >= 0;--i)_AdjustDown(i);}size_t Size(){return _a.size();}void Push(const T& x){_a.push_back(x);_AdjustUp(_a.size() - 1);}void Pop(){assert(!_a.empty());swap(_a[0], _a[_a.size() - 1]);_a.pop_back();_AdjustDown(0);}const T& Top() const{return _a[0];}
protected:void _AdjustDown(int root){Compare comFunc;int parent = root;size_t child = root * 2 + 1;while (child < _a.size()){if (child + 1 < _a.size() && comFunc(_a[child + 1],_a[child])){++child;}if (comFunc(_a[child] , _a[parent])){swap(_a[child], _a[parent]);parent = child;child = parent * 2 + 1;}else{break;}}}void _AdjustUp(int child){Compare comFunc;int parent = (child - 1) >> 1;while (child > 0){if (comFunc(_a[child], _a[parent]))//if (_a[child] >= _a[parent]){swap(_a[child], _a[parent]);child = parent;parent = (child - 1) >> 1;}else{break;}}}
protected:vector<T> _a;
};
得到huffman编码
void GetHuffmanCode( HuffmanTreeNode<CharInfo>* root){if (root == NULL)return ;if (root->_left == NULL && root->_right == NULL){HuffmanTreeNode<CharInfo> *parent, *cur;cur = root;parent = cur->_parent;string& code = _infos[(unsigned)root->_w._ch]._code;while (parent){if (cur == parent->_left)code += '0';elsecode += '1';cur = parent;parent = cur->_parent;}reverse(code.begin(), d());return;//_infos[root->_w._ch]._code = code;}GetHuffmanCode(root->_left);GetHuffmanCode(root->_right); }
将huffman编码压缩

压缩的时候就是根据字符的顺序,将其转化为对应的huffman编码,当满8位的时候就将其写入到文件中;

fseek(fout,0, SEEK_SET);ch = fgetc(fout);unsigned char value = 0;int pos = 0;while (!feof(fout)){string& code = _infos[ch]._code;for (size_t i = 0; i < (code.size()); ++i){value <<= 1;++pos;if (code[i] == '1')//将其相应位置置为1value |= 1;if (pos == 8)//满8写入{printf("%d ", value);fputc(value, fin);value = 0;pos = 0;}}//cout << ch << " ";ch = fgetc(fout);}if (pos)//如果最后的字符huffman编码不够8位将其补够8位写入{value <<= (8 - pos);fputc(value, fin);}
书写配置信息

这样的话,压缩就算是完成了,但是在压缩的时候我们还需要写配置信息,也就是需要将我们的CharInfo _infos[256]的信息写入,不然的话,当单独解压缩的时候就没有办法了;

我采取的方法是将其信息写入到文件开头处,一开始解压缩就先读配置信息,这样子就需要一个断点来标识配置信息和文件内容了;
因为其本身的结构体中有string类型,如果直接写入会影响压缩效率,而我们只需要只要字符出现的次数就够了;

struct CountInfo
{int _ch;LongType _count;
};
CountInfo info;for (size_t i = 0; i < 256; ++i){if (_infos[i]._count){info._ch = _infos[i]._ch;info._count = _infos[i]._count;fwrite(&info,sizeof(info),1,fin);}}//CharInfo info1;info._count = -1;//断点fwrite(&info, sizeof(info), 1, fin);

这样子压缩就算是完成了;

解压缩

解压缩的过程就跟压缩类似了,所以简单了一些;就是先根据配置信息读出文件字符信息,然后重新建树,根据文件中现在的数字的位来找字符,再依次写入就好;

读取配置信息

根据写入的配置信息读取到断点处;

FILE* fout = fopen(filename, "rb");assert(fout);FILE* fin = fopen(uncompressFile.c_str(), "wb");assert(fin);CountInfo info;while (1){fread(&info, sizeof(CountInfo), 1, fout);if (info._count == -1) break;_infos[info._ch]._ch = info._ch;_infos[info._ch]._count = info._count;} 
重新建树

根据配置信息重新构建huffman树

HuffmanTree<CharInfo> tree(_infos, 256, invalid);
还原文件

根据压缩文件读取出来的数字按位,依次去huffman树中找到字符写入文件,需要注意一种就是当文件只有一种字符的时候,特别考虑

if (root->_left == NULL && root->_right == NULL){LongType count1 = count;while (count1){cout << cur->_w._ch<<" ";fputc(cur->_w._ch, fin);count1--;}}while (!feof(fout)){int pos = 7;char test = 1;while (pos >= 0 ){if (value &(test << pos))cur = cur->_right;elsecur = cur->_left;--pos;if (cur->_left == NULL && cur->_right == NULL){//fflush(stdin);//cout << cur->_w._ch << " ";fputc(cur->_w._ch, fin);cur = root;count--;}}if (count == 0)break;value = fgetc(fout);}

这个样子整个压缩解压就完成了;

整体源码

下面是整体源码:

test.cpp
#include <iostream>
#include <assert.h>
#include <string>
#include <vector>
#include <windows.h>
using namespace std;#include "HuffmanTree.h"
#include "compress.h"
#include "Heap.h"int main()
{Test1();return 0;
}
compress.h
#pragma once typedef long long LongType;struct CharInfo
{  LongType _count;//字符出现的次数string _code;//huffman编码unsigned char _ch;//字符CharInfo(LongType count = 0):_count(count), _code(""){}bool operator!=(const CharInfo& info) const{return _count != info._count;}CharInfo operator+(const CharInfo& info){return CharInfo(_count + info._count);}};struct CountInfo
{int _ch;LongType _count;
};
class CompressFile
{
public:CompressFile()//对其字符进行初始化{for (size_t i = 0; i <= 256; i++){_infos[i]._ch = i;}}void Compress(const char* filename){assert(filename);FILE* fout = fopen(filename, "rb");assert(fout);//char ch = fgetc(fout);int ch = fgetc(fout);while (ch != EOF){_infos[(unsigned char)ch]._count++;ch = fgetc(fout);}//fclose(fout);CharInfo invalid;invalid._count = 0;HuffmanTree<CharInfo> tree(_infos, 256,invalid);GetHuffmanCode(tree.Root());string compressFile = filename;compressFile += ".huffman";FILE* fin = fopen(compressFile.c_str(), "wb");assert(fin);CountInfo info;for (size_t i = 0; i < 256; ++i){if (_infos[i]._count){info._ch = _infos[i]._ch;info._count = _infos[i]._count;fwrite(&info,sizeof(info),1,fin);}}//CharInfo info1;info._count = -1;fwrite(&info, sizeof(info), 1, fin);fseek(fout,0, SEEK_SET);ch = fgetc(fout);unsigned char value = 0;int pos = 0;while (!feof(fout)){string& code = _infos[ch]._code;for (size_t i = 0; i < (code.size()); ++i){value <<= 1;++pos;if (code[i] == '1')value |= 1;if (pos == 8){printf("%d ", value);fputc(value, fin);value = 0;pos = 0;}}//cout << ch << " ";ch = fgetc(fout);}if (pos){value <<= (8 - pos);fputc(value, fin);}fclose(fin);fclose(fout);}void UnCompress(const char* filename)//当这个文件中只有一种字符时,只有一个根节点{assert(filename);string uncompressFile = filename;size_t index = uncompressFile.rfind('.');assert(index != string::npos);uncompressFile = uncompressFile.substr(0, index);index = uncompressFile.rfind('.');assert(index != string::npos);uncompressFile = uncompressFile.substr(0, index);uncompressFile += "1.jpg";FILE* fout = fopen(filename, "rb");assert(fout);FILE* fin = fopen(uncompressFile.c_str(), "wb");assert(fin);CountInfo info;while (1){fread(&info, sizeof(CountInfo), 1, fout);if (info._count == -1) break;_infos[info._ch]._ch = info._ch;_infos[info._ch]._count = info._count;} CharInfo invalid;invalid._count = 0;HuffmanTree<CharInfo> tree(_infos, 256, invalid);HuffmanTreeNode<CharInfo>* root = tree.Root();HuffmanTreeNode<CharInfo>* cur = root;LongType count = root->_w._count;unsigned char value = fgetc(fout);if (root->_left == NULL && root->_right == NULL){LongType count1 = count;while (count1){cout << cur->_w._ch<<" ";fputc(cur->_w._ch, fin);count1--;}}while (!feof(fout)){int pos = 7;char test = 1;while (pos >= 0 ){if (value &(test << pos))cur = cur->_right;elsecur = cur->_left;--pos;if (cur->_left == NULL && cur->_right == NULL){//fflush(stdin);//cout << cur->_w._ch << " ";fputc(cur->_w._ch, fin);cur = root;count--;}}if (count == 0)break;value = fgetc(fout);}fclose(fout);fclose(fin);}
protected:void GetHuffmanCode( HuffmanTreeNode<CharInfo>* root){if (root == NULL)return ;if (root->_left == NULL && root->_right == NULL){HuffmanTreeNode<CharInfo> *parent, *cur;cur = root;parent = cur->_parent;string& code = _infos[(unsigned)root->_w._ch]._code;while (parent){if (cur == parent->_left)code += '0';elsecode += '1';cur = parent;parent = cur->_parent;}reverse(code.begin(), d());return;//_infos[root->_w._ch]._code = code;}GetHuffmanCode(root->_left);GetHuffmanCode(root->_right); }protected:CharInfo _infos[256];
};void Test1()
{CompressFile c1;cout << "压缩中:" << endl;c1.Compress("psb.jpg");cout << "----------------------------------------------------------------" << endl;cout << "解压中" << endl;//c1.UnCompress("psb.jpg.huffman");CompressFile c2;c1.UnCompress("psb.jpg.huffman");}
HuffmanTree.h
#pragma oncetemplate <class T>
struct HuffmanTreeNode
{T _w;HuffmanTreeNode* _left;HuffmanTreeNode* _right;HuffmanTreeNode* _parent;HuffmanTreeNode(const T& x):_w(x), _left(NULL), _right(NULL), _parent(NULL){}};template <class T>
class HuffmanTree
{typedef HuffmanTreeNode<T> Node;public:HuffmanTree():_root(NULL){}HuffmanTree(T* a, size_t n,const T& invalid){Heap<Node*> minHeap;for (size_t i = 0; i < n; i++){if (a[i] != invalid)minHeap.Push(new Node(a[i]));}while (minHeap.Size() > 1){Node* left = minHeap.Top();minHeap.Pop();Node* right = minHeap.Top();minHeap.Pop();Node* parent = new Node(left->_w + right->_w);parent->_left = left;left->_parent = parent;parent->_right = right;right->_parent = parent;minHeap.Push(parent);}_root = minHeap.Top();}~HuffmanTree(){_Destory(_root);}Node* Root(){return _root;}
protected:void _Destory(Node* root){if (root == NULL)return;_Destory(root->_left);_Destory(root->_right);delete root;}HuffmanTree(const HuffmanTree<T>&);//防拷贝HuffmanTree& operator=(const HuffmanTree<T>&);
protected:Node* _root;
};
Heap.h
#pragma oncetemplate <class T>
struct Less
{bool operator()(const T& l, const T& r) const{return l->_w._count <= r->_w._count;}
};
template <class T>
struct Greater
{bool operator()(const T& l, const T& r) const{return l->_w._count >= r->_w._count;}
};
template <class T, class Compare = Less<T>>
//template <class T>
class Heap
{
public:Heap(){}Heap( T* a, size_t size){_a.reserve(size);for (size_t i = 0; i < size; ++i){_a.push_back(a[i]);}for (int i = (_a.size() - 2) / 2; i >= 0;--i)_AdjustDown(i);}size_t Size(){return _a.size();}void Push(const T& x){_a.push_back(x);_AdjustUp(_a.size() - 1);}void Pop(){assert(!_a.empty());swap(_a[0], _a[_a.size() - 1]);_a.pop_back();_AdjustDown(0);}const T& Top() const{return _a[0];}
protected:void _AdjustDown(int root){Compare comFunc;int parent = root;size_t child = root * 2 + 1;while (child < _a.size()){if (child + 1 < _a.size() && comFunc(_a[child + 1],_a[child])){++child;}if (comFunc(_a[child] , _a[parent])){swap(_a[child], _a[parent]);parent = child;child = parent * 2 + 1;}else{break;}}}void _AdjustUp(int child){Compare comFunc;int parent = (child - 1) >> 1;while (child > 0){if (comFunc(_a[child], _a[parent]))//if (_a[child] >= _a[parent]){swap(_a[child], _a[parent]);child = parent;parent = (child - 1) >> 1;}else{break;}}}
protected:vector<T> _a;
};

本文发布于:2024-01-29 00:15:58,感谢您对本站的认可!

本文链接:https://www.4u4v.net/it/170645856111332.html

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。

标签:源码   文件压缩   文件   huffman
留言与评论(共有 0 条评论)
   
验证码:

Copyright ©2019-2022 Comsenz Inc.Powered by ©

网站地图1 网站地图2 网站地图3 网站地图4 网站地图5 网站地图6 网站地图7 网站地图8 网站地图9 网站地图10 网站地图11 网站地图12 网站地图13 网站地图14 网站地图15 网站地图16 网站地图17 网站地图18 网站地图19 网站地图20 网站地图21 网站地图22/a> 网站地图23