/* kirika1.c - Don Yang (uguu.org) Preprocess 1: remove header dependencies. This is the last version which passes lclint 2.5q. 09/15/01 */ #include /* This is a placeholder for later... unix/win32 sources will be separate files instead of a single file distinguished by preprocessors. */ #define SetBinary(stream) /*tmode(fileno(stream), O_BINAR*/ #define BUFFER_SIZE 0x8000 #define BUFFER_MASK 0x7fff #define HALF_BUFFER_SIZE 0x3ffe #define INPUT_EOF 0xffffffffU #define MIN_SUBSTRING_LENGTH 5 typedef unsigned int DWORD; typedef unsigned short WORD; typedef unsigned char BYTE; /* Ring buffer */ static BYTE RingBuffer[BUFFER_SIZE]; static int ProcOffset, LimitOffset; static int FileSize, iChecksum, oChecksum; /* Huffman table/buffer */ #define HUFFMAN_TREE_DEPTH 15 static int Left[127] = { 1, 2, 3,-116,-108, 6, 7, 8, -68, -62, -2, 12, -20, 14, -26, 16, -70, 18, 19, 20, 21, -84, 23,-107, 25, -64, 27, 28, -69, 30, -113, -75,-125, -61, 35, 36, -46, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, -25, -12, -48, 51, 52, 53, -57, 55, 56, 57, 58, 59, -81, -127,-126,-106, -77, -91, 66, 67, -14, 69, -29, 71,-114,-117,-121, 75, -51,-111, 78, 79, 80, -115,-100, -97, 84,-105, 86, 87, 88, -52, -85, -76, -35, -37, -33, 95, 96, -45, 98, -78, 100, -60, -65, 103,-122, 105, 106, -31, -1, 109, -7, 111,-118, -82,-119, -34, 116, -92, 118, 119, -18, 121, -27, 0, 124, -96, -58, -4 }; static int Right[127] = { 77, 37, 17, 4, 5,-112, 15, -50, 9, 10, 11, 13, -28, -19, -30, -83, -56, 34, 26, 22, -41,-120, -40, 24, -54, -53, -95, 33, 29, 32, 31, -3,-123, -59, -10,-109, -98, 70,-101, 49, -102, -39, -36, -79, -88, 48, -5, -74, -9, 50, 54, -73, -47, -66, -49, 63, 62, -93, 61, 60, -11, -13, -89, 64, 65, 68, -24, -21, -90, -22, 76, 72, 73, 74, -42, -80, -8, -32, 83, 82, 81, -99,-110, 94, 85,-104,-103, 90, 89, -72, 91, 92, 93, -38, 122, 110, 97, 101, 99, -71, -86, 102, 107, 104, -16, -63, -23, 108,-124, -17, 113, 112, -67, 114, 115, 117, -43, -87, 120, -94, -6, -15, 123, -44, 125, 126, -55 }; static struct { unsigned char bit[HUFFMAN_TREE_DEPTH]; int length; } Table[128]; static BYTE HuffmanBuffer[HALF_BUFFER_SIZE]; /* Substring indexes */ static WORD FirstIndex[256], LastIndex[256]; static WORD NextIndex[HALF_BUFFER_SIZE + 1]; static int ByteOffset[HALF_BUFFER_SIZE + 1]; static void AddByteOffset(BYTE byte, int offset); static void Decode(FILE *infile, FILE *outfile); static void Encode(FILE *infile, FILE *outfile); static int FindDuplicateSubstring(FILE *infile, /*@out@*/int *length); static void FindInit(void); static DWORD GetByte(int absoffset, FILE *infile); static DWORD GetWord(FILE *infile); static void HuffmanDecode(int length, FILE *infile); static int HuffmanEncode(int length); static void InitEntry(int index, int *string, int length); static void InitTable(void); static void InitTableEntries(int index, int *string); static void PutByte(BYTE data, FILE *outfile); static void PutWord(WORD data, FILE *outfile); static void ReadLiteral(DWORD tag, FILE *infile, FILE *outfile); static void ReadSubstring(DWORD tag, FILE *infile, FILE *outfile); static void WriteLiteral(int length, FILE *outfile); static void WriteSubstring(int length, int negoffset, FILE *outfile); /******************************************************************** main */ int main(int argc, char **argv) { FILE *infile, *outfile; int i; /* Set input/output */ infile = stdin; outfile = stdout; if( argc > 2 ) { if( (infile = fopen(argv[2], "rb")) == NULL ) return printf("Can not open %s\n", argv[2]); } if( argc > 3 ) { if( (outfile = fopen(argv[3], "wb+")) == NULL ) { (void)fclose(infile); return printf("Can not create %s\n", argv[3]); } } SetBinary(infile); SetBinary(outfile); /* Initialize ring buffer */ for(i = 0; i < BUFFER_SIZE; RingBuffer[i++] = (BYTE)0); iChecksum = oChecksum = ProcOffset = LimitOffset = 0; FileSize = 0x7fffffff; if( argv[1][0] != 'x' ) { /* Compress stream */ Encode(infile, outfile); } else { /* Decompress stream */ Decode(infile, outfile); } /* End */ (void)fclose(infile); (void)fclose(outfile); return 0; } /* main() */ /*********************************************************** AddByteOffset */ static void AddByteOffset(BYTE byte, int offset) { WORD index; int chain; /* Find out of range index */ for(chain = 0; chain < 256; chain++) { if( FirstIndex[chain] == 0 ) continue; if( (offset - ByteOffset[FirstIndex[chain]]) >= (HALF_BUFFER_SIZE - 1) ) break; } /* Remove index from chain */ index = FirstIndex[chain]; FirstIndex[chain] = NextIndex[FirstIndex[chain]]; if( FirstIndex[chain] == 0 ) LastIndex[chain] = 0; /* Add index to end of chain for current byte */ if( LastIndex[(int)byte] == 0 ) { FirstIndex[(int)byte] = LastIndex[(int)byte] = index; } else { NextIndex[LastIndex[(int)byte]] = index; LastIndex[(int)byte] = index; } ByteOffset[index] = offset; NextIndex[index] = 0; } /* AddByteOffset() */ /****************************************************************** Decode */ static void Decode(FILE *infile, FILE *outfile) { DWORD tag; while( (tag = GetWord(infile)) != INPUT_EOF ) { if( (tag & 0x8000) == 0 ) ReadLiteral(tag, infile, outfile); else ReadSubstring(tag, infile, outfile); } if( iChecksum != oChecksum ) (void)fputs("Checksum failed\n", stderr); } /* Decode() */ /****************************************************************** Encode */ static void Encode(FILE *infile, FILE *outfile) { int runlength, duplength, dupoffset; /* Initialize substring finder */ FindInit(); /* Initialize Huffman tree */ InitTable(); runlength = 0; while( GetByte(ProcOffset, infile) != INPUT_EOF ) { /* Find duplicate substring */ dupoffset = FindDuplicateSubstring(infile, &duplength); if( duplength >= MIN_SUBSTRING_LENGTH ) { if( runlength > 0 ) WriteLiteral(runlength, outfile); WriteSubstring(duplength, dupoffset, outfile); ProcOffset += duplength; runlength = 0; continue; } /* No qualifying substring, keep literal byte in buffer */ AddByteOffset(RingBuffer[ProcOffset & BUFFER_MASK], ProcOffset); ProcOffset++; runlength++; if( runlength >= HALF_BUFFER_SIZE ) { /* Flush literal block */ WriteLiteral(runlength, outfile); runlength = 0; } } /* Flush literal block */ if( runlength > 0 ) WriteLiteral(runlength, outfile); /* Write checksum */ (void)fputc(iChecksum, outfile); } /* Encode() */ /************************************************** FindDuplicateSubstring */ static int FindDuplicateSubstring(FILE *infile, int *length) { WORD index; int maxoffset, maxlen; int offset, len, i; maxoffset = maxlen = 0; /* Interate through previous substrings starting with current byte */ for(index = FirstIndex[(int)RingBuffer[ProcOffset & BUFFER_MASK]]; index != 0; index = NextIndex[index]) { offset = ByteOffset[index]; len = ProcOffset - offset; /* Stop if longer match is not possible */ if( len < maxlen ) break; for(i = 1; i < len; i++) { if( GetByte(ProcOffset + i, infile) != GetByte(offset + i, infile) ) break; } if( i > maxlen ) { maxoffset = offset; maxlen = i; } } *length = maxlen; return ProcOffset - maxoffset; } /* FindDuplicateSubstring() */ /**************************************************************** FindInit */ static void FindInit(void) { WORD index; int i, offset; /* Reset indexes */ for(i = 0; i < 256; FirstIndex[i++] = 0); for(i = 0; i < 256; LastIndex[i++] = 0); /* Set offsets for 0 byte */ FirstIndex[0] = index = 1; offset = -HALF_BUFFER_SIZE + 1; for(i = 0; i < HALF_BUFFER_SIZE - 1; i++) { ByteOffset[index] = offset; NextIndex[index] = index + 1; index++; offset++; } NextIndex[index - 1] = 0; LastIndex[0] = index - 1; } /* FindInit() */ /***************************************************************** GetByte */ DWORD GetByte(int absoffset, FILE *infile) { int byte; while( absoffset >= LimitOffset ) { if( (byte = fgetc(infile)) == EOF ) { FileSize = LimitOffset++; return INPUT_EOF; } iChecksum ^= (int)byte; RingBuffer[(LimitOffset++) & BUFFER_MASK] = (BYTE)byte; } if( absoffset >= FileSize ) return INPUT_EOF; return (DWORD)RingBuffer[absoffset & BUFFER_MASK]; } /* GetByte() */ /***************************************************************** GetWord */ static DWORD GetWord(FILE *infile) { int byte1, byte2; if( (byte1 = fgetc(infile)) == EOF ) return INPUT_EOF; if( (byte2 = fgetc(infile)) == EOF ) { iChecksum = byte1; return INPUT_EOF; } return (DWORD)(((unsigned)byte2 << 8) | (unsigned)byte1); } /* GetWord() */ /*********************************************************** HuffmanDecode */ static void HuffmanDecode(int length, FILE *infile) { int bit, byte, i, x, *branch; bit = 8; for(i = x = byte = 0; i < length;) { if( bit > 7 ) { if( (byte = fgetc(infile)) == EOF ) break; bit = 0; } branch = ((byte & (1 << bit)) == 0) ? Left : Right; if( branch[x] <= 0 ) { RingBuffer[(ProcOffset + i) & BUFFER_MASK] = (BYTE)(-branch[x]); i++; x = 0; } else { x = branch[x]; } bit++; } } /* HuffmanDecode() */ /*********************************************************** HuffmanEncode */ static int HuffmanEncode(int length) { int bit, byte, c, i, j, x, enclength; for(i = x = bit = byte = enclength = 0; i < length; i++) { c = (int)RingBuffer[(ProcOffset - length + i) & BUFFER_MASK]; if( (c & 0x80) != 0 ) return 0; for(j = 0; j < Table[c].length; j++) { byte |= ((unsigned)Table[c].bit[j]) << (bit++); if( bit > 7 ) { HuffmanBuffer[x++] = (BYTE)byte; if( x >= length ) return 0; bit = byte = 0; } } } if( bit > 0 ) HuffmanBuffer[x++] = (BYTE)byte; return (x >= length) ? 0 : x; } /* HuffmanEncode() */ /*************************************************************** InitEntry */ static void InitEntry(int index, int *string, int length) { int i; Table[index].length = length; for(i = 0; i < length; i++) Table[index].bit[i] = (unsigned char)(string[i] - 1); } /* InitEntry() */ /*************************************************************** InitTable */ static void InitTable(void) { int string[HUFFMAN_TREE_DEPTH]; string[0] = 0; InitTableEntries(0, string); } /* InitTable() */ /******************************************************** InitTableEntries */ static void InitTableEntries(int index, int *string) { size_t x; int i; for(x = 0; string[x] != 0; x++); string[x] = 1; string[x + 1] = 0; if( (i = Left[index]) <= 0 ) InitEntry(-i, string, x + 1); else InitTableEntries(i, string); string[x] = 2; if( (i = Right[index]) <= 0 ) InitEntry(-i, string, x + 1); else InitTableEntries(i, string); string[x] = 0; } /* InitTableEntries() */ /***************************************************************** PutByte */ void PutByte(BYTE byte, FILE *outfile) { oChecksum ^= (int)byte; (void)fputc((int)byte, outfile); } /* PutByte() */ /***************************************************************** PutWord */ void PutWord(WORD word, FILE *outfile) { oChecksum ^= (int)(word & 0xff); oChecksum ^= (int)(word >> 8); (void)fputc((int)(word & 0xff), outfile); (void)fputc((int)(word >> 8), outfile); } /* PutWord() */ /************************************************************* ReadLiteral */ static void ReadLiteral(DWORD tag, FILE *infile, FILE *outfile) { DWORD byte; int length, i; length = (int)(tag & 0x3fff); if( (tag & 0x4000) == 0 ) { /* Literal block */ for(i = 0; i < length; i++) { byte = GetByte(ProcOffset, infile); ProcOffset++; PutByte((BYTE)byte, outfile); } } else { /* Huffman block */ HuffmanDecode(length, infile); for(i = 0; i < length; i++) PutByte(RingBuffer[(ProcOffset + i) & BUFFER_MASK], outfile); LimitOffset = ProcOffset += length; } } /* ReadLiteral() */ /*********************************************************** ReadSubstring */ static void ReadSubstring(DWORD tag, FILE *infile, FILE *outfile) { int length, negoffset, i; if( (tag & 0x4000) == 0 ) length = (int)fgetc(infile); else length = (int)GetWord(infile); negoffset = (int)(tag & 0x3fff); /* Copy bytes */ for(i = 0; i < length; i++) { PutByte(RingBuffer[ProcOffset & BUFFER_MASK] = RingBuffer[(ProcOffset - negoffset) & BUFFER_MASK], outfile); ProcOffset++; } LimitOffset = ProcOffset; } /* ReadSubstring() */ /************************************************************ WriteLiteral */ static void WriteLiteral(int length, FILE *outfile) { int hlength, i; if( (hlength = HuffmanEncode(length)) > 0 ) { PutWord((WORD)(0x4000 | length), outfile); for(i = 0; i < hlength; PutByte(HuffmanBuffer[i++], outfile)); } else { PutWord((WORD)length, outfile); for(i = 0; i < length; i++) PutByte(RingBuffer[(ProcOffset - length + i) & BUFFER_MASK], outfile); } } /* WriteLiteral() */ /********************************************************** WriteSubstring */ static void WriteSubstring(int length, int negoffset, FILE *outfile) { int i; /* Write block */ if( length < 256 ) { PutWord((WORD)(0x8000 | negoffset), outfile); PutByte((BYTE)length, outfile); } else { PutWord((WORD)(0xc000 | negoffset), outfile); PutWord((WORD)length, outfile); } /* Record offsets */ for(i = 0; i < length; i++) { AddByteOffset(RingBuffer[(ProcOffset + i) & BUFFER_MASK], ProcOffset + i); } } /* WriteSubstring() */