/* encode.c (c11.c) - Compressor - Don Yang (uguu.org) Slowly compress stdin to stdout. Assume 32bit ints. Some features: This is a one pass, lossless encoder, and the maximum possible compression ratio is something like 1/8192, where all input characters are NULL. Since it work with pipes, you can try ./encode|./encode|./encode, etc. Usually the output doesn't get any smaller. This is a O(N) algorithm, meaning compression is slow no matter what size the input is, and gets even slower linearly. Because in reality, it's something like O(536854528 * N) (told you it was slow!) Want to know how slow it is? I tried it on my boot image (602423 bytes), and here are the results: ./encode image 2055.770u 0.800s 34:38.54 98.9% 0+0k 0+0io 241pf+0w 594981/602423 = 0.9876 rar a -mm -m5 -mde -s image.rar bzimage 1.720u 0.090s 0:01.90 95.2% 0+0k 0+0io 57pf+0w 593375/602423 = 0.9850 gzip -c bzImage > image 0.450u 0.010s 0:00.51 90.1% 0+0k 0+0io 94pf+0w 592292/602423 = 0.9832 Here is another test, using log files from previous test: cat encode.log decode.log > test.txt ./encode < test.txt > test2.txt 6.340u 0.000s 0:06.38 99.3% 0+0k 0+0io 99pf+0w 5247/22758 = 0.2306 rar a -mm -m5 -mde -s test.rar test.txt 0.090u 0.060s 0:00.15 100.0% 0+0k 0+0io 56pf+0w 2379/22758 = 0.1045 gzip -c test.txt > test2.txt 0.010u 0.000s 0:00.01 100.0% 0+0k 0+0io 92pf+0w 2572/22758 = 0.1130 So you can see: this program is pretty lame. But that's all right, it wasn't meant to be useful ;) See decode.c for decompression statistics. 06/01/00 */ #include /* Constants */ #define HISTORY_SIZE 0x7fff #define BUFFER_SIZE ((HISTORY_SIZE + 1) * 2) #define MINIMUM_MATCH 5 #define BYTES_PER_HASH 1024 /* Ring buffer */ static int buffer[BUFFER_SIZE]; /* 64K * sizeof(int) */ static int bindex; /* Ring buffer index (current position) */ static int blength; /* Bytes read ahead */ /* Other globals */ static int run; /* Number of unmatched bytes */ static int isize, osize; /* File sizes */ static int isum, osum; /* XOR of all bytes */ #ifdef DEBUG static FILE *logfile; /* Debug log */ #endif /* Input wrapper */ static int input(void) { int data; isize++; isum ^= (data = getchar()); if( data == EOF ) isum ^= EOF; if( isize % BYTES_PER_HASH == 0 ) fputc('.', stderr); return data; } /* input() */ /* Output wrapper */ static void output(int data) { osize++; osum ^= (data &= 0xff); putchar(data); } /* output() */ /* Ring buffer initializer */ static void binit(void) { int i; for(i = bindex = 0; i < BUFFER_SIZE; buffer[i++] = 0); buffer[0] = input(); blength = 1; } /* binit() */ /* Seek forward in ring buffer */ static void bseek(int offset) { for(; offset; offset--) { bindex = (bindex + 1) % BUFFER_SIZE; if( !(--blength) ) { buffer[bindex] = input(); blength++; } } } /* bseek() */ /* Character reader wrapper */ static int bgetc(int offset) { for(; offset >= blength; blength++) buffer[(bindex + blength) % BUFFER_SIZE] = input(); return buffer[(bindex + offset + BUFFER_SIZE) % BUFFER_SIZE]; } /* bgetc() */ /* Flush unmatched characters */ static void bflush(void) { #ifdef DEBUG fprintf(logfile, "Run:%d\n", run); #endif /* Write header (intel order) */ output(run); output(run >> 8); /* Write data */ for(; run; run--) output(bgetc(-run)); } /* bflush() */ /* main */ int main(void) { int code, length, source, maxlength, maxsource; #ifdef DEBUG logfile = fopen("encode.log", "wt+"); #endif /* Initialize ring buffer */ isize = osize = isum = osum = 0; binit(); /* Compress bytes, starting with 0 bytes matched in pattern */ for(code = bgetc(run = 0); code != EOF; code = bgetc(0)) { /* Match pattern in history */ maxlength = 0; for( source = -MINIMUM_MATCH; source > -HISTORY_SIZE; source--) { if( bgetc(source) == code ) { /* First character matched, try match more */ for(length = 1; length < -source; length++) { if( bgetc(source + length) != bgetc(length) ) break; } if( length > maxlength ) { maxlength = length; maxsource = source; } } } if( maxlength > MINIMUM_MATCH ) { /* Pattern matched */ /* Flush unmatched data */ if( run ) bflush(); #ifdef DEBUG fprintf(logfile, "Pattern:%d (%d)\n", maxlength, maxsource); #endif /* Write matched data (intel order) */ output(maxsource); output(maxsource >> 8); output(maxlength); output(maxlength >> 8 ); /* Skip over matched data */ bseek(maxlength); } else { /* Pattern not matched */ bseek(1); run++; if( run >= HISTORY_SIZE ) bflush(); } } /* Flush last unmatched pattern */ if( run ) bflush(); /* Write checksum */ output(isum); /* End */ isize--; fprintf(stderr, "\ncompressed %d/%d\n", osize, isize); #ifdef DEBUG fprintf(logfile, "Checksum = %d\n%d/%d = %f\n", isum & 0xff, osize, isize, ((float)osize) / (float)isize); fclose(logfile); #endif return 0; } /* main() */