/* shindou6.c - Don Yang (uguu.org) 02/25/07 */ #include #include #ifdef _WIN32 #include #include #endif #define KANA_RANGE (0x3093-0x3041) static FILE *infile; static unsigned char *buffer, *p, *const_str = "!!\0 !\xff\1" "UTF-8\0" "!\x81IOCCC" "!\1\xff\xfe\xff""can not open!\0! \0" "\xef\xbb\xbf" "EUC-JP\0" " 'PoPoPo\4""0?PoPo\3\20\17Po\2\1" "\"\xef\xbc\x81" "Shift_JIS\0" "!\xa1\xaa" "QO\220,\2""0?\220,\2\1" "UCS-2LE\0" "qxq.\2}}q.\2`$q.\2),q.\2^^q.\2__q.q.\3\1" "UCS-2BE\0" ; static int buffer_size, read_size, i, c, encoding, last_char_start, i0, next_offset, t, byte_offset, bom[] = {23, 24, 43}; #define DONT_CARE 0xdeadbeef static int data[5][19] = { { DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, 96, 0, 21, 40, 103, DONT_CARE, 0 }, { DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, 96, 38, 5, 1, 144, DONT_CARE, 0 }, { DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, DONT_CARE, 53, 4, 75, 3, 8, DONT_CARE, 0 }, { DONT_CARE, 8, DONT_CARE, DONT_CARE, 3, 7, 161, 1, DONT_CARE, 2, 3, 33089, 92, 4, 14, 3, 79, 350, 0 }, { DONT_CARE, 8, DONT_CARE, DONT_CARE, 3, 7, 36513, 1, DONT_CARE, 2, 3, 41378, 111, 4, 89, 3, 46, 767, 0 } }; #define F(func) static void func(void) #define D(index) data[encoding][index] F(AllocBuffer) { buffer = (unsigned char*)malloc(buffer_size *= 2); } F(FreeBuffer) { free(p); } F(LoadInput) { buffer_size = 0x800; AllocBuffer(); for(read_size = 0; !feof(infile); read_size += fread(buffer + read_size, 1, buffer_size - read_size, infile)) { if( read_size == buffer_size ) { p = buffer; AllocBuffer(); for(byte_offset = 0; byte_offset < buffer_size; byte_offset++) buffer[byte_offset] = p[byte_offset]; free(p); } } } F(NextChar) { c = byte_offset = 0; for(p = const_str + D(12); *p > 4 && c + 1; p++) { if( buffer[i + byte_offset] < (*p ^ 208) || buffer[i + byte_offset] > (p[1] ^ 208) ) { if( byte_offset ) { c = -1; } else { for(; *p > 4; p++); } } else { byte_offset++; p++; } } for(byte_offset = 0; c + 1 && byte_offset < *p; byte_offset++) c = (c << 8) | buffer[i++]; c = (encoding == 2 && c > 7<<21) ? ((c >> 4) & 0xf000) | ((c >> 2) & 4032) | (c & 63) : encoding ? c : (c >> 8) | ((c & 255) << 8); } F(NextCharUnconditional) { NextChar(); if( c < 0 ) i++; } F(DetectEncoding) { for(i = 0; i < 3; i++) { if( *buffer == const_str[bom[i]] && buffer[1] == const_str[bom[i]+1] && (i < 2 || buffer[2] == const_str[bom[i]+2]) ) { encoding = i; return; } } for(i = 0; i < read_size - 1; i += 2) { if( !buffer[i] && buffer[i + 1] == 10 ) { encoding = 1; return; } if( buffer[i] == 10 && !buffer[i + 1] ) { encoding = 0; return; } } for(encoding = 2; encoding < 5; encoding++) { for(i = D(18) = 0; i < read_size - 3 && D(18) + 1;) { NextChar(); if( c + 1 ) { if( c >= D(17) && c <= D(17) + KANA_RANGE ) D(18)++; } else { D(18) = -1; } } } for(i = encoding = 2; i < 5; i++) { if( data[i][18] > D(18) ) encoding = i; } } int IsPunct(int range) { for(t = 0; t < range && c - D(t); t++); return t < range; } void Output(void *bin, int size) { if( size > 0 ) fwrite(bin, size, 1, stdout); } F(WriteExclamationMark) { p = const_str + D(c > 255 ? 14 : 13); Output(p + 1, *p - 31); } int main(int argc, char **argv) { for(encoding = 5; encoding-- > 0;) { D(0) = 33; D(2) = 46; D(3) = 63; D(8) = 44; } for(; ++encoding < 3;) { D(10) = 3 + /* 0xff64 */ (D(6) = 66 + /* 0xff61 */ (D(5) = 17 + /* 0xff1f */ (D(4) = 2 + /* 0xff0e */ (D(9) = 11 + /* 0xff0c */ (D(1) = 0xff01))))); D(17) = 145 + /* 0x3093 */ (D(7) = 1 + /* 0x3002 */ (D(11) = 0x3001)); } for(; encoding < 5; encoding++) { D(1) += (i = D(11)); D(4) += i; D(5) += i; D(7) += i; D(9) += i; D(17) += i; D(6) += D(10); } if( *++argv ) { for(; *argv; ++argv) { printf("%s: ", *argv); i0 = 26; if( infile = fopen(*argv, "rb") ) { LoadInput(); fclose(infile); DetectEncoding(); i0 = D(16); p = buffer; FreeBuffer(); } puts(const_str + i0); } } else { #ifdef _WIN32 setmode(fileno(stdin), O_BINARY); setmode(fileno(stdout), O_BINARY); #endif infile = stdin; LoadInput(); DetectEncoding(); for(i = i0 = 0; i < read_size;) { last_char_start = i; NextCharUnconditional(); if( IsPunct(12) || c == 10 ) { if( IsPunct(8) ) { next_offset = i; for(i = i0; i < last_char_start;) { NextCharUnconditional(); Output(buffer + i0, i - i0); p = const_str + D(15); Output(p + 1, *p - 31); i0 = i; } NextCharUnconditional(); if( IsPunct(2) ) WriteExclamationMark(); WriteExclamationMark(); i0 = i; } else { Output(buffer + i0, last_char_start - i0); if( c - 10 ) WriteExclamationMark(); else Output(buffer + last_char_start, i - last_char_start); i0 = i; } } } p = buffer; FreeBuffer(); } return 0; }