#!/usr/bin/perl -w use strict; use Digest; # Combined dictionary of (high byte, low byte, offset) words my @dictionary = qw{ black white red cyan green purple blue yellow transparent gold silver bronze platinum diamond glass stone apples bananas cherries grapes lemons mangoes melons oranges papayas peaches pears persimmons pineapples raspberries strawberries watermelons sweet fresh tasty large small heavy light soft hard solid precious rare vintage expensive spicy bitter }; # Globals my @elements; my $errors = 0; my $a0; my $raw_digest; # Compute full digest for a single opened file sub ComputeDigestForFile { $raw_digest = undef; if( open(my $handle, "<$_[0]") ) { binmode $handle; eval { my $md5 = Digest->new("MD5"); $raw_digest = $md5->addfile($handle)->digest }; } $raw_digest = "ERROR: $!" unless defined $raw_digest; } # Compute unique digests for a set of files sub ComputeDigest { # Get checksum for all inputs my @digests = (); push @ARGV, "-" if $#ARGV < 0; foreach (@ARGV) { ComputeDigestForFile($_); if( $raw_digest =~ /^ERROR: (.*)/ ) { print STDERR "Error reading $_: $1\n"; $errors++; next; } push @digests, [$raw_digest, $_]; } if( @digests ) { my $digest_length = length($digests[0][0]); # Find unique digests my %unique_digests = map {$$_[0] => ""} @digests; my $unassigned = scalar keys %unique_digests; # Select subset of bytes that is unique to each digest for(my $length = 1; $unassigned > 0 && $length <= $digest_length; $length++) { for(my $offset = 0; $offset < $digest_length - $length; $offset++) { my %unique_substrings = (); map { my $substring = substr($_, $offset, $length); if( exists $unique_substrings{$substring} ) { # Two or more digests share the same substring, so we # can't use it to tell digests apart. $unique_substrings{$substring} = undef; } else { # We might be able to identify this key using only a # substring, tentatively keep track of it now. $unique_substrings{$substring} = $_; } } keys %unique_digests; map { my $key = $unique_substrings{$_}; if( defined $key && $unique_digests{$key} eq "" ) { my @tokens = (); for(my $i = $offset; $i < $offset + $length; $i++) { my $byte = ord(substr($key, $i, 1)); push @tokens, $dictionary[$i + 32] . ' ' . $dictionary[$byte >> 4] . ' ' . $dictionary[($byte & 15) + 16]; } $unique_digests{$key} = join ' + ', @tokens; $unassigned--; } } keys %unique_substrings; } } # Output digests my $max_label_length = 0; map { $max_label_length = $_ if $max_label_length < $_ } map {length $unique_digests{$_}} keys %unique_digests; map { my $label = $unique_digests{$$_[0]}; print $label, " " x ($max_label_length + 1 - length($label)), "*$$_[1]\n"; } @digests; } } # Convert digest string into list of (offset, byte) items. # Returns empty list if digest can't be parsed. sub ParseDigest { @elements = (); map { my ($high, $low, $offset) = (undef, undef, undef); map { my $w = lc $_; my $i = 0; for(; $i <= $#dictionary && $dictionary[$i] ne $w; $i++) {} if( $i < 16 ) { $high = $i; } elsif( $i < 32 ) { $low = $i - 16; } elsif( $i <= $#dictionary ) { $offset = $i - 32; } } split /\s+/, $_; if( defined($high) && defined($low) && defined($offset) ) { push @elements, [$offset, ($high << 4) | $low]; } } split /\+/, $a0; } # Check digest for a single opened file sub CheckDigestForFile { my ($digest_elements, $file) = @_; ComputeDigestForFile($file); if( $raw_digest =~ /^ERROR: / ) { print "$file: $raw_digest\n"; return 1; } foreach (@$digest_elements) { if( ord(substr($raw_digest, $$_[0], 1)) != $$_[1] ) { print "$file: FAILED\n"; return 1; } } print "$file: OK\n"; return 0; } # Check digests for a set of files sub CheckDigest { $a0 = $#ARGV < 0 ? "-" : shift @ARGV; ParseDigest(); if( @elements ) { # Checking digest using readable digest data specified on # command line. push @ARGV, "-" if $#ARGV < 0; map { $errors += CheckDigestForFile(\@elements, $_) } @ARGV; } else { # Checking digests using data listed in digest file. open my $handle, "<$a0" or die "Can not open $a0: $!\n"; while( my $line = <$handle> ) { chomp $line; my $separator = index($line, '*'); if( $separator > 0 ) { $a0 = substr($line, 0, $separator); ParseDigest(); if( @elements ) { $errors += CheckDigestForFile(\@elements, substr($line, $separator + 1)); } } } } } # Output usage message if nothing is specified on command line or stdin if( $#ARGV < 0 && -t STDIN ) { die <<"EOT"; To compute digest: $0 files... > digest.txt To check digest: $0 -c digest.txt $0 -c 'digest_string' file EOT } if( @ARGV && $ARGV[0] eq "-c" ) { shift @ARGV; CheckDigest(); } else { ComputeDigest(); } die "$errors errors\n" if $errors;