Algorithm implementation of Huffman tree and Huffman coding

Algorithm implementation of Huffman tree and Huffman coding (required, confirmatory experiment)

Experimental purpose

Be familiar with the construction method of Huffman tree and the application of Huffman coding, and understand the application process of Huffman tree in the field of communication and coding.
Experimental content
(1) Input a short English passage of 100-200 words and save it in a file a.
(2) Write a function to count the number n of letters in the passage and the number of times each letter appears
(3) The writing function takes the number of letters as the weight, builds a Haffman tree (n leaves), and gives the Haffman code of each letter.
(4) Encode the original short text with each letter code, and store the code text in file b.
(5) The code text in file b is decoded with Haffman tree, the result is stored in file c, and whether a and c are consistent is compared to check the correctness of coding and decoding.
Data structure definition
The data structure used in the algorithm is linked list, which is used to create Huffman tree. The elements appearing in each node of Huffman tree have the weight of each node, as well as the parents, left children and right children of the node. Then a node type node is created to store characters and character information.
At the same time, in order to facilitate size comparison and sorting, I ordered a vector container to facilitate subsequent operations.

#include "constant.h"
#include <stdio.h>
#include <string.h>
typedef char **huffmancode;
#define maxsize 300
#define INF 1e5
typedef struct
{
  /* data */
  int weight;
  int parent, lchild, rchild;
} HTNode, *huffmantree;
typedef struct node
{
  char ch;
  int data;
} node;

void createhuffmantree(huffmantree &HT, huffmancode &HC, int *w, int n); //Create huff full tree
void huffmancoding(huffmantree &HT, huffmancode &HC, int *w, int n);     //The Huffman tree is encoded to find its corresponding Huffman code
void select(huffmantree HT, int length, int &s1, int &s2);               //Pick out the two numbers with the smallest weight
void read(int weight[maxsize], int &length);                             //Read the weights in the array into the weight array
void count();
void compare();
void translate();
 
Algorithm idea and algorithm design
void select(huffmantree HT, int length, int &s1, int &s2)
{
   //s1 stores the node with the smallest weight, and s2 stores the second smallest node
   if (length <= 2)
       exit(0);
   int s1a = 9999999, s2a = 9999999; //The first two numbers are stored by default
   for (int k = 1; k <= length; k++)
   {
       if (HT[k].parent == 0) //Continuously optimize and find out the best and most convenient optimization results and optimization scheme
       {
           if (HT[k].weight < s1a) //Find the smallest node first
           {
               s1a = HT[k].weight;
               s1 = k;
           }
       }
   }
   for (int k = 1; k <= length; k++)
   {
       if (HT[k].parent == 0) //Continuously optimize and find out the best and most convenient optimization results and optimization scheme
       {
           if (HT[k].weight < s2a && HT[k].weight != s1a)
           {
               s2a = HT[k].weight;
               s2 = k;
           }
       }
   }
}
void pr(node a)
{
   cout << a.ch << " " << a.data << endl;
}
void print(pair<char, int> v)
{
   //cout<<v.first<<" "<<v.second<<endl;
   node temp = {v.first, v.second};
   v1.push_back(temp);
}
void read(int weight[maxsize], int &length, char weightch[maxsize])
{
   weight[maxsize] = {0};
   FILE *fp;
   multimap<char, int> wgh;
   fp = fopen("shiyanshuju.txt", "r");
   char ch = fgetc(fp);
   while (ch != EOF)
   {
       if (wgh.find(ch) == wgh.end())
       {
           wgh.insert(make_pair(ch, 1));
       }
       else
       {
           wgh.find(ch)->second++;
       }
       ch = fgetc(fp);
   }
   fclose(fp);
   for_each(wgh.begin(), wgh.end(), print);
   vector<node>::iterator its = v1.begin();
   for (int i = 0; i < v1.size(); i++)
   {
       for (int j = i + 1; j < v1.size(); j++)
       {
           if (v1[i].data <= v1[j].data)
           {
               node temp = v1[j];
               v1[j] = v1[i];
               v1[i] = temp;
           }
       }
   }
   //for_each(v1.begin(), v1.end(), pr);
   int i = 1;
   length = wgh.size();
   its = v1.begin();
   while (its != v1.end())
   {
       weight[i] = its->data;
       weightch[i] = its->ch;
       its++;
       i++; //Don't add it
   }
   return;
}

void createhuffmantree(huffmantree &HT, huffmancode &HC, int *w, int n)
{
   if (n <= 1) //Create a Huffman tree, followed by autumn Huffman coding
   {
       return;
   }
   int m = 2 * n - 1;
   huffmantree p;
   int i = 0;
   int s1, s2;
   w++;
   HT = (huffmantree)malloc((m + 1) * sizeof(HTNode));
   for (p = HT, i = 1; i <= n; i++, ++p, ++w)
   {
       *p = {*w, 0, 0, 0};
   }
   for (; i <= m; i++, ++p)
   {
       *p = {0, 0, 0, 0};
   }
   for (i = n + 1; i <= m; i++)
   {
       select(HT, i - 1, s1, s2);
       HT[s1].parent = i;
       HT[s2].parent = i;
       HT[i].lchild = s1;
       HT[i].rchild = s2;
       HT[i].weight = HT[s1].weight + HT[s2].weight;
   }
}
void huffmancoding(huffmantree &HT, huffmancode &HC, int *w, int n)
{ //Huffman coding
   int i = 0;
   //Reverse Huffman coding from leaf to root
   HC = (huffmancode)malloc((n + 1) * sizeof(char *));
   char *cd = (char *)malloc(n * sizeof(char));
   cd[n - 1] = '\0';
   int start;
   for (i = 1; i <= n; i++)
   {
       start = n - 1;
       for (int c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent)
       {

           if (HT[f].lchild == c)
               cd[--start] = '0';
           else
               cd[--start] = '1';
       }
       HC[i] = (char *)malloc((n - start) * sizeof(char));
       strcpy(HC[i], &cd[start]);
   }
   free(cd);
}
void bianma(huffmancode &HC, char weightch[maxsize])
{
   FILE *fp1, *fp2;
   fp1 = fopen("bianma.txt", "w");      //Write file
   fp2 = fopen("shiyanshuju.txt", "r"); //read file
   char ch = fgetc(fp2);
   while (ch != EOF)
   {
       int i = 1;
       while (weightch[i] != ch && weightch[i] != '\n')
       {
           i++;
       }
       fprintf(fp1, "%s", HC[i]);
       ch = fgetc(fp2);
   }
   fclose(fp1);
   fclose(fp2);
}
void jiema(huffmantree HT, char weightch[maxsize], int m)
{
   FILE *fp;
   char ch = '\n';
   fp = fopen("bianma.txt", "r");
loop:
   int i = m;
   while (HT[i].lchild != 0 && HT[i].rchild != 0)
   {
       ch = fgetc(fp);
       if (ch == EOF)
       {
           fclose(fp);
           return;
       }
       if (ch == '1') //Note the left-right correspondence here
           i = HT[i].rchild;
       else
           i = HT[i].lchild;
   }
   printf("%c", weightch[i]);
   goto loop;
}
Experimental code

Some algorithms have been given above:
The following is the connection part given for:

#ifndef constant_h
#define constant_h
#define TRUE 1
#define FALSE 0
#define OK 1
#define ERROR 0
#define INFEASIBLE -1
#define OVERFLOW -2

typedef int Status;

#endif /* constant_h */

#include <stdio.h>
#include <iostream>
#include "huffmancode.hpp"
#include "constant.h"
#include <stdlib.h>
#include <string.h>
#include <string>
#include <algorithm>
#include <vector>
#include <map>
using namespace std;
void huffmancoding(huffmantree &HT, huffmancode &HC, int *w, int n);
void select(huffmantree HT, int length, int &s1, int &s2);
void read(int weight[maxsize], int &length, char weightch[maxsize]);
void huffmancoding(huffmantree &HT, huffmancode &HC, int *w, int n);
void createhuffmantree(huffmantree &HT, huffmancode &HC, int *w, int n);
void huffmancoding(huffmantree &HT, huffmancode &HC, int *w, int n);
void bianma(huffmancode &HC, char weightch[maxsize]);
void jiema(huffmantree HT, char weightch[maxsize], int m);
vector<node> v1;

int main()
{
   int length = 0;
   huffmantree HT;
   huffmancode HC;
   int weight[maxsize] = {0};
   char weightch[maxsize] = {'\n'};
   read(weight, length, weightch);
   createhuffmantree(HT, HC, weight, length);
   huffmancoding(HT, HC, weight, length);
   bianma(HC, weightch);
   int m = length * 2 - 1;
   printf("By decoding Huffman code:\n");
   jiema(HT, weightch, m);
}

Algorithm test results

The following is the content of the test document:

Analysis and summary

(1) Analysis of algorithm complexity and its advantages and disadvantages
The time complexity in the process of creating Huffman code is O (n). The sorting operation involved in the Huffman process is fast sorting, and the time complexity is O (nlog ⁡ n). In the process of creating Huffman code, the optimal method is adopted, and the node with the largest weight is placed near the root node, so that the Huffman code after coding becomes simple and has no ambiguity, It's simple
(2) Experimental summary
When writing the select function, we can't find the method quickly. After careful thinking and modification, we found the two largest nodes.
At the beginning, when reading the experimental data document, I couldn't quickly correspond to each letter. Each letter was garbled. I just looked for it for a long time and couldn't find it. Finally, I saw a place where I input the Enter key when reading the keyboard input information, resulting in a lot of errors. Moreover, the corresponding array after sorting can not be found quickly. After several twists and turns, we finally use the vector container to find a solution.
In the final decoding, the 0 and 1 codes corresponding to the left and right subtrees are reversed, resulting in the translation of the whole document is not correct, resulting in a lot of random codes. Later, I found the wrong place when I slowly found the wrong place.

Keywords: Algorithm data structure linked list

Added by gt500pwr on Wed, 05 Jan 2022 16:56:58 +0200