/* encode.c - Don Yang (uguu.org) Huffman encoder encode 02/01/03 */ /*@ -compdef -compdestroy -mayaliasunique -mustfreefresh -nullpass @*/ /*@ -nullstate -temptrans @*/ #include #include #include #include #include #define BUFSIZE 2048 typedef struct _node { int freq, leaf; struct _node *left, *right; } Node; typedef struct { int size; Node *node[511]; } Heap; typedef struct { FILE *file; unsigned int byte, bitcount; } BitPool; static void CreateDictionary(Node *root, char dict[][256], char *stack, size_t stacksize); static int CreateForest(FILE *infile, /*@out@*/Heap *heap, /*@out@*/int *filesize); static void CreateHuffmanTree(Heap *heap); static int EncodeFile(FILE *infile, FILE *outfile, Node *tree, int filesize); static int EncodeUniformFile(FILE *outfile, Node *tree, int filesize); static Node *HeapPop(Heap *heap); static void HeapPush(Heap *heap, Node *node); static /*@observer@*/Node *NewNode(int freq, int leaf, Node *l, Node *r); static int WriteBit(BitPool *o, unsigned int bit); static int WriteLastByte(BitPool *o); static int WriteTree(BitPool *o, Node *root); /******************************************************************** main */ int main(int argc, char **argv) { FILE *infile, *outfile; int filesize, status; Heap heap; if( argc < 3 ) return printf("%s \n", *argv); if( (infile = fopen(argv[1], "rb")) == NULL ) { printf("Can not open %s, error %d\n", argv[1], errno); return 1; } if( (status = CreateForest(infile, &heap, &filesize)) != 0 ) { if( status == 1 ) printf("read error %d\n", errno); if( status == 3 ) printf("seek error %d\n", errno); (void)fclose(infile); return 1; } if( (outfile = fopen(argv[2], "wb+")) == NULL ) { printf("Can not create %s, error %d\n", argv[2], errno); (void)fclose(infile); return 1; } status = 0; if( heap.size == 1 ) { assert(heap.node[0] != NULL); status = EncodeUniformFile(outfile, heap.node[0], filesize); } else if( heap.size > 1 ) { assert(heap.node[0] != NULL); CreateHuffmanTree(&heap); status = EncodeFile(infile, outfile, heap.node[0], filesize); } if( status == 1 ) printf("read error %d\n", errno); if( status == 2 ) printf("write error %d\n", errno); if( status == 3 ) printf("seek error %d\n", errno); (void)fclose(infile); (void)fclose(outfile); return 0; } /* main() */ /******************************************************** CreateDictionary */ static void CreateDictionary(Node *root, char dict[][256], char *stack, size_t stacksize) { assert(stacksize < 256); if( root->leaf != -1 ) { /* Leaf node, copy bitstring to dictionary */ memcpy(dict[root->leaf], stack, stacksize); dict[root->leaf][stacksize] = (char)127; return; } /* Internal node, push bit onto stack and recurse */ stack[stacksize] = '\0'; CreateDictionary(root->left, dict, stack, stacksize + 1); stack[stacksize] = '\1'; CreateDictionary(root->right, dict, stack, stacksize + 1); } /* CreateDictionary() */ /************************************************************ CreateForest */ static int CreateForest(FILE *infile, Heap *heap, int *filesize) { unsigned char buffer[BUFSIZE]; int freq[256], i; size_t size; *filesize = heap->size = 0; heap->node[0] = NULL; if( fseek(infile, 0, SEEK_SET) != 0 ) return 3; memset(freq, 0, 256 * sizeof(int)); do { size = fread(buffer, 1, BUFSIZE, infile); for(i = 0; i < (int)size; freq[(int)buffer[i++]]++); } while( size == BUFSIZE ); for(i = 0; i < 256; i++) { if( freq[i] > 0 ) { /* Create tree leaves from nonzero frequencies only */ HeapPush(heap, NewNode(freq[i], i, NULL, NULL)); *filesize += freq[i]; } } return 0; } /* CreateForest() */ /******************************************************* CreateHuffmanTree */ static void CreateHuffmanTree(Heap *heap) { Node *left, *right; /* Combine minimum node pairs until only one node left heap->node[0] then becomes the huffman tree root. */ while( heap->size > 1 ) { left = HeapPop(heap); right = HeapPop(heap); HeapPush(heap, NewNode(left->freq + right->freq, -1, left, right)); } } /* CreateHuffmanTree() */ /************************************************************** EncodeFile */ static int EncodeFile(FILE *infile, FILE *outfile, Node *tree, int filesize) { unsigned char buffer[BUFSIZE]; char dict[256][256], str[256], *p; size_t blocksize, i, j; BitPool o; if( fseek(infile, 0, SEEK_SET) != 0 ) return 3; if( fwrite(&filesize, sizeof(int), 1, outfile) != 1 ) return 2; o.file = outfile; o.byte = o.bitcount = 0; if( WriteTree(&o, tree) != 0 ) return 2; memset(dict, 127, 256 * 256); CreateDictionary(tree, dict, str, 0); for(blocksize = BUFSIZE; filesize > 0; filesize -= blocksize) { if( filesize < BUFSIZE ) blocksize = (size_t)filesize; if( fread(buffer, blocksize, 1, infile) != 1 ) return 1; for(i = 0; i < blocksize; i++) { p = dict[(int)buffer[i]]; for(j = 0; (int)*p != 127; j++) { if( WriteBit(&o, (unsigned int)*p) != 0 ) return 2; p++; } } } if( WriteLastByte(&o) != 0 ) return 2; return 0; } /* EncodeFile() */ /******************************************************* EncodeUniformFile */ static int EncodeUniformFile(FILE *outfile, Node *tree, int filesize) { BitPool o; unsigned int bitindex; /* File size */ if( fwrite(&filesize, sizeof(int), 1, outfile) != 1 ) return 2; /* Leaf node (1bit tag + 8bit data) */ o.file = outfile; o.byte = o.bitcount = 0; if( WriteBit(&o, 1) != 0 ) return 2; for(bitindex = 0; bitindex < 8; bitindex++) { if( WriteBit(&o, (tree->leaf & (1 << bitindex)) == 0 ? 0U : 1U) != 0 ) return 2; } if( WriteLastByte(&o) != 0 ) return 2; return 0; } /* EncodeUniformFile() */ /***************************************************************** HeapPop */ static Node *HeapPop(Heap *heap) { Node *r, *tmp; int current, left, right, replace; /* Pop node */ r = heap->node[0]; /* Replace root node with last node in heap */ heap->node[current = 0] = heap->node[--(heap->size)]; /* Replace current root until both children are greater */ while( current < heap->size ) { if( (left = current * 2 + 1) >= heap->size ) break; if( (right = current * 2 + 2) >= heap->size ) { if( heap->node[current]->freq <= heap->node[left]->freq ) break; replace = left; } else { if( heap->node[current]->freq <= heap->node[left]->freq && heap->node[current]->freq <= heap->node[right]->freq ) break; replace = heap->node[left]->freq < heap->node[right]->freq ? left : right; } tmp = heap->node[current]; heap->node[current] = heap->node[replace]; heap->node[current = replace] = tmp; } return r; } /* HeapPop() */ /**************************************************************** HeapPush */ static void HeapPush(Heap *heap, Node *node) { Node *tmp; int parent, current; /* Add node to end of heap */ heap->node[current = (heap->size)++] = node; /* Bubble up until current node is greater than parent */ while( current > 0 ) { parent = (current - 1) / 2; assert(parent >= 0); if( heap->node[current]->freq >= heap->node[parent]->freq ) return; tmp = heap->node[current]; heap->node[current] = heap->node[parent]; heap->node[parent] = tmp; current = parent; } } /* HeapPush() */ /***************************************************************** NewNode */ static Node *NewNode(int freq, int leaf, Node *l, Node *r) { static int PoolSize = 0; static Node Pool[511]; Pool[PoolSize].freq = freq; Pool[PoolSize].leaf = leaf; Pool[PoolSize].left = l; Pool[PoolSize].right = r; return &Pool[PoolSize++]; } /* NewNode() */ /**************************************************************** WriteBit */ static int WriteBit(BitPool *o, unsigned int bit) { o->byte |= bit << (o->bitcount); if( ++(o->bitcount) == 8 ) { if( fputc((int)(o->byte), o->file) == EOF ) return 1; o->byte = o->bitcount = 0; } return 0; } /* WriteBit() */ /*********************************************************** WriteLastByte */ static int WriteLastByte(BitPool *o) { return o->bitcount == 0 ? 0 : fputc((int)(o->byte), o->file) == EOF ? 1 : 0; } /* WriteLastByte() */ /*************************************************************** WriteTree */ static int WriteTree(BitPool *o, Node *root) { unsigned int i; if( root->leaf == -1 ) { /* Internal node */ if( WriteBit(o, 0) != 0 ) return 1; if( WriteTree(o, root->left) != 0 ) return 1; return WriteTree(o, root->right); } /* Leaf */ if( WriteBit(o, 1) != 0 ) return 1; for(i = 0; i < 8; i++) { if( WriteBit(o, (root->leaf & (1 << i)) == 0 ? 0U : 1U) != 0 ) return 1; } return 0; } /* WriteTree() */