/* encode.c - Don Yang (uguu.org) lclint +posixlib encode.c common.c 09/15/01 */ #ifdef _WIN32 #include #include #else #include #endif #include #include #include #include #include"common.h" #include"hufftree.h" /* Performance tuning variables */ #define MIN_SUBSTRING_LENGTH 5 /* Huffman table/buffer */ static struct { unsigned char bit[HUFFMAN_TREE_DEPTH]; int length; } Table[256]; static BYTE HuffmanBuffer[HALF_BUFFER_SIZE]; /* Substring indexes typedef struct _offset { int ByteOffset; struct _offset *Next; } Offset; struct { Offset *First, *Last; } OffsetQueue[256]; */ static WORD FirstIndex[256], LastIndex[256]; static WORD NextIndex[HALF_BUFFER_SIZE + 1]; static int ByteOffset[HALF_BUFFER_SIZE + 1]; static void AddByteOffset(BYTE byte, int offset); static void Encode(FILE *infile, FILE *outfile); static void EncodeDebug(FILE *infile, FILE *outfile); static int FindDuplicateSubstring(FILE *infile, /*@out@*/int *length); static void FindInit(void); static int HuffmanEncode(int length); static void InitEntry(int index, int *string, int length); static void InitTable(void); static void InitTableEntries(int index, int *string); static void WriteLiteral(int length, FILE *outfile); static void WriteLiteralDebug(int length, FILE *outfile); static void WriteSubstring(int length, int negoffset, FILE *outfile); static void WriteSubstringDebug(int length, int negoffset, FILE *outfile); /******************************************************************** main */ int main(int argc, char **argv) { FILE *infile, *outfile; /* Set input/output */ infile = stdin; outfile = stdout; if( argc > 1 && argv[1][0] != '-' ) { if( (infile = fopen(argv[1], "rb")) == NULL ) return printf("Can not open %s\n", argv[1]); } if( argc > 2 ) { if( (outfile = fopen(argv[2], "wb+")) == NULL ) { (void)fclose(infile); return printf("Can not create %s\n", argv[2]); } } #ifdef _WIN32 (void)setmode(fileno(infile), O_BINARY); (void)setmode(fileno(outfile), O_BINARY); #endif /* Initialize ring buffer */ memset(RingBuffer, 0, BUFFER_SIZE); iChecksum = oChecksum = ProcOffset = LimitOffset = 0; FileSize = 0x7fffffff; /* Compress stream */ FindInit(); if( isatty(fileno(outfile)) != 0 ) { (void)puts("Output is a tty"); argc = 4; } if( argc > 3 ) { (void)puts("Using debug encoder"); EncodeDebug(infile, outfile); } else { Encode(infile, outfile); } /* End */ (void)fclose(infile); (void)fclose(outfile); return 0; } /* main() */ /*********************************************************** AddByteOffset */ static void AddByteOffset(BYTE byte, int offset) { WORD index; int chain; /* Find out of range index */ for(chain = 0; chain < 256; chain++) { if( FirstIndex[chain] == 0 ) continue; if( (offset - ByteOffset[FirstIndex[chain]]) >= (HALF_BUFFER_SIZE - 1) ) break; } assert(chain < 256); /* Remove index from chain */ index = FirstIndex[chain]; FirstIndex[chain] = NextIndex[FirstIndex[chain]]; if( FirstIndex[chain] == 0 ) LastIndex[chain] = 0; /* Add index to end of chain for current byte */ if( LastIndex[(int)byte] == 0 ) { FirstIndex[(int)byte] = LastIndex[(int)byte] = index; } else { assert(NextIndex[LastIndex[(int)byte]] == 0); NextIndex[LastIndex[(int)byte]] = index; LastIndex[(int)byte] = index; } ByteOffset[index] = offset; NextIndex[index] = 0; } /* AddByteOffset() */ /****************************************************************** Encode */ static void Encode(FILE *infile, FILE *outfile) { int runlength, duplength, dupoffset; /* Initialize Huffman tree */ InitTable(); runlength = 0; while( GetByte(ProcOffset, infile) != INPUT_EOF ) { /* Find duplicate substring */ dupoffset = FindDuplicateSubstring(infile, &duplength); if( duplength >= MIN_SUBSTRING_LENGTH ) { if( runlength > 0 ) WriteLiteral(runlength, outfile); WriteSubstring(duplength, dupoffset, outfile); ProcOffset += duplength; runlength = 0; continue; } /* No qualifying substring, keep literal byte in buffer */ AddByteOffset(RingBuffer[ProcOffset & BUFFER_MASK], ProcOffset); ProcOffset++; runlength++; if( runlength >= HALF_BUFFER_SIZE ) { /* Flush literal block */ WriteLiteral(runlength, outfile); runlength = 0; } } /* Flush literal block */ if( runlength > 0 ) WriteLiteral(runlength, outfile); /* Write checksum */ (void)fputc(iChecksum, outfile); } /* Encode() */ /************************************************************* EncodeDebug */ static void EncodeDebug(FILE *infile, FILE *outfile) { int runlength, duplength, dupoffset; DWORD byte; /* Initialize Huffman tree */ InitTable(); runlength = 0; while( (byte = GetByte(ProcOffset, infile)) != INPUT_EOF ) { /* Find duplicate substring */ dupoffset = FindDuplicateSubstring(infile, &duplength); if( duplength >= MIN_SUBSTRING_LENGTH ) { if( runlength > 0 ) WriteLiteralDebug(runlength, outfile); WriteSubstringDebug(duplength, dupoffset, outfile); ProcOffset += duplength; runlength = 0; continue; } /* No qualifying substring, keep literal byte in buffer */ AddByteOffset(RingBuffer[ProcOffset & BUFFER_MASK], ProcOffset); ProcOffset++; runlength++; if( runlength >= HALF_BUFFER_SIZE ) { /* Flush literal block */ WriteLiteralDebug(runlength, outfile); runlength = 0; } } /* Flush literal block */ if( runlength > 0 ) WriteLiteralDebug(runlength, outfile); fprintf(outfile, "Checksum = %d\n", iChecksum); } /* EncodeDebug() */ /************************************************** FindDuplicateSubstring */ static int FindDuplicateSubstring(FILE *infile, int *length) { WORD index; int maxoffset, maxlen; int offset, len, i; maxoffset = maxlen = 0; /* Interate through previous substrings starting with current byte */ for(index = FirstIndex[(int)RingBuffer[ProcOffset & BUFFER_MASK]]; index != 0; index = NextIndex[index]) { offset = ByteOffset[index]; len = ProcOffset - offset; /* Stop if longer match is not possible */ if( len < maxlen ) break; for(i = 1; i < len; i++) { if( GetByte(ProcOffset + i, infile) != GetByte(offset + i, infile) ) break; } if( i > maxlen ) { maxoffset = offset; maxlen = i; } } *length = maxlen; return ProcOffset - maxoffset; } /* FindDuplicateSubstring() */ /**************************************************************** FindInit */ static void FindInit(void) { WORD index; int i, offset; /* Reset indexes */ memset(FirstIndex, 0, 256 * sizeof(WORD)); memset(LastIndex, 0, 256 * sizeof(WORD)); /* Set offsets for 0 byte */ FirstIndex[0] = index = 1; offset = -HALF_BUFFER_SIZE + 1; for(i = 0; i < HALF_BUFFER_SIZE - 1; i++) { ByteOffset[index] = offset; NextIndex[index] = index + 1; index++; offset++; } NextIndex[index - 1] = 0; LastIndex[0] = index - 1; } /* FindInit() */ /*********************************************************** HuffmanEncode */ static int HuffmanEncode(int length) { int bit, byte, c, i, j, x, enclength; for(i = x = bit = byte = enclength = 0; i < length; i++) { c = (int)RingBuffer[(ProcOffset - length + i) & BUFFER_MASK]; for(j = 0; j < Table[c].length; j++) { byte |= ((unsigned)Table[c].bit[j]) << (bit++); if( bit > 7 ) { HuffmanBuffer[x++] = (BYTE)byte; if( x >= length ) return 0; bit = byte = 0; } } } if( bit > 0 ) HuffmanBuffer[x++] = (BYTE)byte; return (x >= length) ? 0 : x; } /* HuffmanEncode() */ /*************************************************************** InitEntry */ static void InitEntry(int index, int *string, int length) { int i; Table[index].length = length; for(i = 0; i < length; i++) Table[index].bit[i] = (unsigned char)(string[i] - 1); } /* InitEntry() */ /*************************************************************** InitTable */ static void InitTable(void) { int string[HUFFMAN_TREE_DEPTH]; string[0] = 0; InitTableEntries(0, string); } /* InitTable() */ /******************************************************** InitTableEntries */ static void InitTableEntries(int index, int *string) { size_t x; int i; for(x = 0; string[x] != 0; x++); string[x] = 1; string[x + 1] = 0; if( (i = Left[index]) <= 0 ) InitEntry(-i, string, x + 1); else InitTableEntries(i, string); string[x] = 2; if( (i = Right[index]) <= 0 ) InitEntry(-i, string, x + 1); else InitTableEntries(i, string); string[x] = 0; } /* InitTableEntries() */ /************************************************************ WriteLiteral */ static void WriteLiteral(int length, FILE *outfile) { int hlength, i; if( (hlength = HuffmanEncode(length)) > 0 ) { PutWord((WORD)(0x4000 | length), outfile); for(i = 0; i < hlength; PutByte(HuffmanBuffer[i++], outfile)); } else { PutWord((WORD)length, outfile); for(i = 0; i < length; i++) PutByte(RingBuffer[(ProcOffset - length + i) & BUFFER_MASK], outfile); } } /* WriteLiteral() */ /******************************************************* WriteLiteralDebug */ static void WriteLiteralDebug(int length, FILE *outfile) { int hlength; if( (hlength = HuffmanEncode(length)) > 0 ) { fprintf(outfile, "%08x: huffman:%7d bytes (%d bytes)\n", (DWORD)(ProcOffset - length), length, hlength); } else { fprintf(outfile, "%08x: literal:%7d bytes\n", (DWORD)(ProcOffset - length), length); } } /* WriteLiteralDebug() */ /********************************************************** WriteSubstring */ static void WriteSubstring(int length, int negoffset, FILE *outfile) { int i; /* Write block */ if( length < 256 ) { PutWord((WORD)(0x8000 | negoffset), outfile); PutByte((BYTE)length, outfile); } else { PutWord((WORD)(0xc000 | negoffset), outfile); PutWord((WORD)length, outfile); } /* Record offsets */ for(i = 0; i < length; i++) { AddByteOffset(RingBuffer[(ProcOffset + i) & BUFFER_MASK], ProcOffset + i); } } /* WriteSubstring() */ /***************************************************** WriteSubstringDebug */ static void WriteSubstringDebug(int length, int negoffset, FILE *outfile) { int i; /* Write block */ fprintf(outfile, "%08x: substring:%5d bytes (from %08x)\n", (DWORD)ProcOffset, length, (DWORD)(ProcOffset - negoffset)); /* Record offsets */ for(i = 0; i < length; i++) { AddByteOffset(RingBuffer[(ProcOffset + i) & BUFFER_MASK], ProcOffset + i); } } /* WriteSubstringDebug() */