#!/usr/bin/perl -w # Generate a base file with some random MD5, then generate 16 more files # that each share one of the MD5 bytes with the base file. Because # there is no single MD5 byte that uniquely identifies the original # file, digest for this file will require at least two MD5 bytes. use strict; use Digest::MD5 'md5'; # Generate base file my $data = (rand . "\n"); my $base_digest = md5($data); my $file; open $file, ">collision_base.txt" or die $!; print $file $data; close $file; # Generate extra files until we found a one byte collision at each # offset with the base file. my %collisions = (); for(my $i = 0; (scalar keys %collisions) < length($base_digest); $i++) { $data = "$i\n"; my $digest = md5($data); for(my $j = 0; $j < length($base_digest); $j++) { next if exists $collisions{$j}; if( substr($digest, $j, 1) eq substr($base_digest, $j, 1) ) { open $file, ">collision_$j.txt" or die $!; print $file $data; close $file; $collisions{$j} = 1; print "$i: ", (scalar keys %collisions), " collisions\n"; } } }