#!/usr/bin/perl -w use strict; # Load pixel data sub LoadPixels() { while( ) { last if /pixels/; } my $pixels = ""; while( my $line = ) { next unless $line =~ /^"([a-z]+)"/; $pixels .= $1; } return $pixels; } # Run-length encode input data: # = repeat length + literal # = literal sub RLECompress($) { my ($pixels) = @_; my $compressed_data = ""; for(my $i = 0; $i < length($pixels);) { my $j = $i + 1; for(; $j < length($pixels) && $j < $i + 26 && substr($pixels, $i, 1) eq substr($pixels, $j, 1); $j++) { } if( $j > 3 ) { # 3 or more repeated characters $compressed_data .= chr(ord('A') + $j - $i) . substr($pixels, $i, 1); $i = $j; } else { # Output literal character $compressed_data .= substr($pixels, $i, 1); $i++; } } return $compressed_data; } # Find the longest subchain with a starting character by depth-first search. sub BuildSubchain($$); sub BuildSubchain($$) { my ($dict, $prefix) = @_; my $longest_chain = $prefix; foreach my $i (sort keys %$dict) { # Limit branching factor. We will get slightly less optimal # solutions, but we don't mind. return $longest_chain if length($longest_chain) >= 26; next if substr($i, 0, 1) ne substr($prefix, length($prefix) - 1); delete $$dict{$i}; my $chain = BuildSubchain($dict, $prefix . substr($i, 1)); $$dict{$i} = 1; if( length($longest_chain) < length($chain) ) { $longest_chain = $chain; } } return $longest_chain; } # Find the longest overlapping chain of bigrams from dictionary. # # Input = {aa, ab, bc, dd} # Output = aabc sub BuildChain($) { my ($dict) = @_; my $longest_chain = ""; foreach my $i (sort keys %$dict) { delete $$dict{$i}; my $chain = BuildSubchain($dict, $i); $$dict{$i} = 1; if( length($longest_chain) < length($chain) ) { $longest_chain = $chain; } } return $longest_chain; } # Build dictionary of common substrings and compress accordingly. # # pixel_pair = dictionary[input - 35]; sub DictionaryCompress($) { my ($pixels) = @_; # Build dictionary of 2-byte pairs. Only dictionary with 3-byte # entires achieve a better compression ratio, but there the keys # won't fit in printable range. # # Intuitively, we should expect 2-byte dictionary to work well # because there are only 12 colors in the palette, so each pixel # should cost one nibble on average. The reason why we needed a # dictionary is because we need the characters to be printable. my %dict = (); for(my $i = 0; $i < length($pixels); $i += 2) { $dict{substr($pixels, $i, 2)} = 1; } # Build dictionary with some overlap my $dictionary_chain = ""; while( scalar keys %dict ) { my $sub_chain = BuildChain(\%dict); for(my $i = 0; $i < length($sub_chain) - 1; $i++) { delete $dict{substr($sub_chain, $i, 2)}; } if( $dictionary_chain ne "" && substr($dictionary_chain, length($dictionary_chain) - 1) eq substr($sub_chain, 0, 1) ) { chop $dictionary_chain; } $dictionary_chain .= $sub_chain; } my $data = "dictionary = \%q{$dictionary_chain}\ndata = \%q{"; # Encode bigrams for(my $i = 0; $i < length($pixels); $i += 2) { my $offset = index($dictionary_chain, substr($pixels, $i, 2)); $data .= chr(35 + $offset); } $data .= "}\n"; return $data; } # Compress pixels using Lempel-Ziv-Welch # 34 .. 46 = literal # other = # offset = -(char - 47) - 3 # length = (char - 47) + 3 sub LZWCompress($) { my ($pixels) = @_; use constant PALETTE_SIZE => 12; use constant OFFSET_BASE => 34 + PALETTE_SIZE; use constant OFFSET_LIMIT => ord('{'); use constant MIN_MATCH => 3; use constant MAX_OFFSET => OFFSET_LIMIT - OFFSET_BASE - 1 + MIN_MATCH; my $data = ""; for(my $i = 0; $i < length($pixels);) { # Search backwards for matching substring my $longest_match = 0; my $match_offset = undef; for(my $j = $i - MAX_OFFSET < 0 ? 0 : $i - MAX_OFFSET; $j < $i; $j++) { # Avoid offsets that would result in backslash next if OFFSET_BASE + $i - $j - MIN_MATCH == ord('\\'); my $k = 0; while( $k < MAX_OFFSET && $j + $k < $i && $i + $k < length($pixels) && substr($pixels, $j + $k, 1) eq substr($pixels, $i + $k, 1) ) { $k++; } # Avoid length that would result in backslash if( OFFSET_BASE + $k - MIN_MATCH == ord("\\") ) { $k--; } if( $k >= MIN_MATCH && $k > $longest_match ) { $longest_match = $k; $match_offset = $j; } } if( defined($match_offset) ) { # Encode offset and length $data .= chr(OFFSET_BASE + $i - $match_offset - MIN_MATCH) . chr(OFFSET_BASE + $longest_match - MIN_MATCH); $i += $longest_match; } else { # Encode literal $data .= chr(34 + ord(substr($pixels, $i, 1)) - ord('a')); $i++; } } return $data; } my $pixels = LoadPixels(); print STDERR "Original size = ", length($pixels), "\n"; print STDERR "RLE = ", length(RLECompress($pixels)), "\n"; print STDERR "Dictionary = ", length(DictionaryCompress($pixels)), "\n"; print STDERR "LZW = ", length(LZWCompress($pixels)), "\n"; print LZWCompress($pixels), "\n"; __DATA__ /* XPM */ static char *tina[] = { /* width height ncolors chars_per_pixel */ "160 24 12 1", /* colors */ "a c #000000", /* 000000 -> 0: 000000 */ "e c #870000", /* 940000 -> 88: 870000 */ "l c #00005f", /* 003131 -> 17: 00005f */ "f c #FFD787", /* ffc68c -> 222: ffd787 */ "i c #FFFFD7", /* ffffde -> 230: ffffd7 */ "h c #875FD7", /* 845ade -> 98: 875fd7 */ "b c #1C1C1C", /* 181818 -> 233: 1c1c1c */ "c c #005f5f", /* 295239 -> 23: 005f5f */ "g c #D7875F", /* ce6b39 -> 173: d7875f */ "j c #D787FF", /* ce84ff -> 177: d787ff */ "d c #87AF87", /* 84ad84 -> 108: 87af87 */ "k c #AF8700", /* bd9400 -> 136: af8700 */ /* pixels */ "aaaaabbabbbhhbaaaaaaabbabbbhhbaaaaaaabbabbbhhbaaaaaaabbabbbaaaaaaaaaabbabbbaaaaaaaaaabbabbbaaaaaaaaaaaaaaaaaaaaaaaaabbbabbbbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaabccccccbhjbaaaaabccccccbhjbaaaaabccccccbhjbaaaaabcdcddcbaaaaaaaabcdcddcbaaaaaaaabcdcddcbaaaaaaaabbbabbbbbaaaaabbcdccdcbjjbaaaaaabbbabbbbbaaaaaaaaaaaaaaaaaaa", "aabbdddcddddbhbbaabbdddcddddbhbbaabbdddcddddbhbbaaabcdhjjhdcbaaaaaabcdhjjhdcbaaaaaabcdhjjhdcbaaaaabbcdccdcbjjbaaabcddcddidcbjbbaaabbcdccdcbjjbaaaaaaaaaabbbaaaaa", "abddcdiddiddcbdbabddcdiddiddcbdbabddcdiddiddcbdbaaabdchbbhcdbaaaaaabdchbbhcdbaaaaaabdchbbhcdbaaaabcddcddidcbjbbaabiccfcddddcbccbabcddcddidcbjbbaaaaaaaabjhhbbaaa", "aabbddddddcdcbdbaabbddddddcdcbdbaabbddddddcdcbdbaabcdccddccdcbaaaabcdccddccdcbaaaabcdccddccdcbaaabiccfcddddcbccbabdbfgcidcdcbdcbabiccfcddddcbccbaaaabbabbbjbdbaa", "aabdccfccicdcbdbaabdccfccicdcbdbaabdccfccicdcbdbaabcdcddddcdcbaaaabcdcddddcdcbaaaabcdcddddcdcbaaabdbfgcidcdcbdcbabdbfbcdicccbdcbabdbfgcidcdcbdcbaaabccccccbccdba", "aabicgficidccbcbaabicgficidccbcbaabicgficidccbcbaabcccdiidcccbaaaabcccdiidcccbaaaabcccdiidcccbaaabdbfbcdicccbdcbaabbflbcdiccbdbaabdbfbcdicccbdcbabbdddcddddbcdba", "aabdcgfigcddcbcbaabdcgfigcddcbcbaabdcgfigcddcbcbaaabccdiidccbaaaaaabccdiidccbaaaaaabccdiidccbaaaaabbflbcdiccbdbaaaabflibccdbccbaaabbflbcdiccbdbabddcdiddiddcbcba", "aaabcbbfbbbcbcbaaaabcbbfbbbcbcbaaaabcbbfbbbcbcbaaaaabcdiddcbaaaaaaaabcdiidcbaaaaaaaabcddidcbaaaaaaabflibccdbccbaaaabfligfbdbccbaaaabflicccdcbdbaabbddddddcdcbcba", "aabbbilfflibcdbaaaaabilfflibcdbaaaaabilfflibcbbaaabbbcdidcbaaaaaaaaabcdiidcbaaaaaaaaabcdidcbbbaaaaabfligfbdbcdbaaaabfffggbbbcdbaaaabfligfbdbdcbaabdccfccicdcbcba", "abhhbflfflfbbcdbaabbbflfflfbbcbaaaabbflfflfbhhbaabhhbbiddcbbbaaaaabbbbddddbbbbaaaaabbbcddibbhhbaaaabfffggbbbbddbaaabbffgbbbbbdbaaaabfffggbbbdbaaabicgficidccbdba", "aabjhbffffbhhbbaabjjhbffffbhjjbaaabhhbffffbhjbaaabhjbdddcbbhhbaaabjjhbcddcbhjjbaaabhhbbcdddbjhbaaabbbffgbbbbbbbbaabhbbgggbjjhbaaaaaabffgbbbbbaaaabdcgfibcddcbcdb", "aaabhhbggbhhjjbaaabjhhbggbhhjbaaabjjhhbggbhhbaaaaabhhbbbbbhhjjbaaabjhhbddbhhjbaaabjjhhbbbbbhhbaaabjhbbgggbhjjhbaaaabeffgbjhbhbaaaaaabbgbhjjhbaaaaabcbbfgbbcdbbba", "aabbjbffffbjbbbaaabbjbffffbjbbaaabbbjbffffbjbbaaaaabhbeffebjbbaaaabbjbbccbbjbbaaaabbjbeffebhbaaaaabhefffgbjhbhbaaaabeeeebjbgbaaaaaabeffbjbbhbaaaaabbilfflicbaaaa", "aabgbkeeeekbeebaaabfbkeeeekbfbaaabeebkeeeekbgbaaaaabbeeeeeebgbaaaabfbeebbeebfbaaaabgbeeeeeebbaaaaaabeeeeeejbgbaaaaaabbjjhbfgbaaaaaabeeebbfbbaaaaabhbflfflfbhbaaa", "aabfbbhjjhbffebaaabfgbhjjhbgfbaaabeffbhjjhbbfbaaaaabgbhjjhbbfgbaaabfbbhjjhbbfbaaabgfbbhjjhbgbaaaaabbbbjjhbbffbaaaaaabbeeebfgbaaaaaaabbbgfgbaaaaabjhbbffffbhhjbaa", "aaabbgfeeebfibaaabgebgeeeegbegbaaabifbeeefgbbaaaaaaabggeeegbfgbaaabebgeeeegbebaaabgfbgeeeggbaaaaabfbbbbeeebbeebaaaabgbgegbegbaaaaaabifeffbaaaaaabhbgebggbegbhbaa", "aaaabgffebgbbaaaabgfbgfeefgbfgbaaaabbgbeffgbaaaaaaaabgigegfbeebaaabfbgfeefgbfbaaabeebfgegigbaaaaabfgbbgffebbffbaaaabbbgbbffebaaaaaabffegbgbbaaaaabbfeffffefbbaaa", "aaaaabgibbebaaaaabgfbbiggibbfgbaaaaabebbigbaaaaaaaaaabifbgebgfbaaaabbbiggibbbaaaabfgbegbfibaaaaaaabbbgifgbbbffbaaaaaabgbbffbaaaaaaaabbbbffeebbaaaabffbeebfgbaaaa", "aaaabegiebebaaaaaabbbefggfebbbaaaaaabebeigebaaaaaaaabeeeebebgfbaaaaabefggfebaaaaabfgbebeeeebaaaaaaabkeebbgebbbaaaaaaabebeeebaaaaaabbeeebgeekkebaaaabfibifgbbbbaa", "aaaabkeekbbaaaaaaaaabkeeeekbaaaaaaaaabbkeekbaaaaaaaabekkebebbbaaaaaabkeeeekbaaaaaabbbebekkebaaaaaabbekkbbgeeebaaaaaaabebkkkebaaaabekkeebbbkeebaaaabbgfbffbekeeba", "aaaabekkebaaaaaaaaaabekeekebaaaaaaaaaabekkebaaaaaaaabkeekbbbaaaaaaaabekeekebaaaaaaaabbbkeekbaaaaabkkeeebabbebbaaaaaabkebeekebaaaaabbeebaaabekbaaabeebbgbbgekeeeb", "aaaabeeeebaaaaaaaaaabeeeeeebaaaaaaaaaabeeeebaaaaaaaabekkebaaaaaaaaaabekeekebaaaaaaaaaabekkebaaaaaabbbebaaabkbaaaaaaabbbkkkbkbaaaaaaabkbaaaabkbaabkeeebbffgekeekb", "aaaaabkkbaaaaaaaaaaaabkbbkbaaaaaaaaaaaabkkbaaaaaaaaaabkkbaaaaaaaaaaaabebbebaaaaaaaaaaaabkkbaaaaaaaaabkbbaaabaaaaaaaaaabbbbbbbaaaaaaaabbaaaabbaaaabbbbbbbbbbbbbbb" };