/* compress.c - Don Yang (uguu.org) 04/26/08 */ /*@ -branchstate -compdef @*/ #include #include #include #include /* Theoretical worse compressed size factor */ #define MAX_EXPANSION_FACTOR 8 /* Initial dictionary */ static const char InitDict[] = "/)(b9`.d,'P\n\"o8 "; /* All permutations */ static const int Permutations[24][4] = { {0,1,2,3}, {0,1,3,2}, {0,2,1,3}, {0,2,3,1}, {0,3,1,2}, {0,3,2,1}, {1,0,2,3}, {1,0,3,2}, {1,2,0,3}, {1,2,3,0}, {1,3,0,2}, {1,3,2,0}, {2,0,1,3}, {2,0,3,1}, {2,1,0,3}, {2,1,3,0}, {2,3,0,1}, {2,3,1,0}, {3,0,1,2}, {3,0,2,1}, {3,1,0,2}, {3,1,2,0}, {3,2,0,1}, {3,2,1,0} }; /* Load file to memory, returning the file as a single string */ static /*@null@*//*@only@*/char *LoadFileToString(/*@notnull@*/char *filename) { FILE *infile; char *data = NULL; long filesize; if( (infile = fopen(filename, "rb")) == NULL ) return NULL; do { /* Get file size */ if( fseek(infile, 0, SEEK_END) < 0 ) break; if( (filesize = ftell(infile)) <= 0 ) break; /* Load file */ if( fseek(infile, 0, SEEK_SET) < 0 ) break; if( (data = (char*)malloc((size_t)filesize)) == NULL ) break; if( fread(data, (size_t)filesize, 1, infile) != 1 ) { free(data); data = NULL; } } while( 0 ); (void)fclose(infile); if( data == NULL ) printf("Error loading %s\n", filename); return data; } /* Serialize number to output stream */ static void Encode(int number, char **output_cursor) { /* 8 bytes is enough for 32bit numbers */ char buffer[8]; int size, i; assert(number > 0); buffer[0] = (char)(35 + (number % 57)); size = 1; for(number /= 57; number > 0; number /= 34) buffer[size++] = (char)(93 + (number % 34)); for(i = size - 1; i >= 0; i--) { **output_cursor = buffer[i]; ++*output_cursor; } } /* Find offset+size of longest match in previously compressed text. This is very slow, if we were any more clever, we would be doing some indexing on our input string like LZW, but our input is only so small so we don't care. */ static void LongestMatch(const char *input, int max_dict_size, int max_key_size, /*@out@*/int *offset, /*@out@*/int *size) { const char *p; int first_char = (int)*input; int i, matched; /* Start searching from substring from back to front */ *offset = *size = 0; for(p = input - max_dict_size - 1; p != input;) { /* Find next occurrence of first character */ p = memchr(p + 1, first_char, (size_t)input - (size_t)p - 1); if( p == NULL ) return; assert((int)p < (int)input); /* Get length of current match */ matched = 1; for(i = 1; i < max_key_size && (p + i) != input; i++, matched++) { if( p[i] != input[i] ) break; } /* Update current best match, prefer one that is closer to current pointer since smaller offsets encode to smaller output code. */ if( matched >= *size ) { *offset = (int)input - (int)p; *size = matched; } } } /* Compress data by searching for previously seen patterns */ static void Compress(const char *input, int input_size, char *output) { int i, matched_offset, matched_size; char *output_cursor = output; for(i = (int)sizeof(InitDict) - 1; i < input_size; i += matched_size) { /* Find substring */ LongestMatch(input + i, i, input_size - i, &matched_offset, &matched_size); /* Encode output */ Encode(matched_offset, &output_cursor); Encode(matched_size, &output_cursor); } *output_cursor = '\0'; } /* Compress data, trying all permutations */ static void CompressAll(char *data[]) { char *input, *output, *best_output; int input_size, output_size, best_output_size; int i, j; /* Allocate buffers */ input_size = (int)sizeof(InitDict) - 1; for(i = 0; i < 4; input_size += (int)strlen(data[i++])); input = (char*)malloc((size_t)input_size + 1); output = (char*)malloc((size_t)input_size * MAX_EXPANSION_FACTOR); best_output = (char*)malloc((size_t)input_size * MAX_EXPANSION_FACTOR); assert(input != NULL && output != NULL && best_output != NULL); /* Try each permutation */ best_output_size = input_size * MAX_EXPANSION_FACTOR; for(i = 0; i < 24; i++) { /* Prepare input */ memset(input, 0, (size_t)input_size); strcpy(input, InitDict); for(j = 0; j < 4; j++) strcat(input, data[Permutations[i][j]]); /* Run compression */ Compress(input, input_size, output); output_size = (int)strlen(output); printf("%d %d %d %d = %d -> %d\n", Permutations[i][0], Permutations[i][1], Permutations[i][2], Permutations[i][3], input_size, output_size); /* Copy output if it's better than current */ if( output_size < best_output_size ) { best_output_size = output_size; strcpy(best_output, output); } } /* Print best results */ (void)puts(best_output); /* Free buffers */ free(input); free(output); free(best_output); } int main(void) { char *data[4]; int i; /* Load input */ data[0] = LoadFileToString("maka_soul.txt"); data[1] = LoadFileToString("soul_soul.txt"); data[2] = LoadFileToString("black_star_soul.txt"); data[3] = LoadFileToString("tsubaki_soul.txt"); for(i = 0; i < 4; i++) { if( data[i] == NULL ) { for(i = 0; i < 4; i++) { if( data[i] != NULL ) free(data[i]); } return 1; } } assert(data[0] != NULL); assert(data[1] != NULL); assert(data[2] != NULL); assert(data[3] != NULL); /* Compress */ CompressAll(data); /* Release loaded data */ for(i = 0; i < 4; i++) free(data[i]); return 0; }