#!/usr/bin/perl # natsume6.pl - Don Yang (uguu.org) # # 01/02/06 use Digest::MD5; sub X { ($v, $w) = @_; if( open F, "<$v" ) { $q = Digest::MD5->new; if( $w ) { $q->addfile(*F); $o += $w; } else { read F, $r, 1024; $q->add($r); $o += length $r; } close F; $r = $q->digest; } else { print "# $v: Can not open: $!\n"; $r = undef; } return $r; } @r = @ARGV; @r = if $#ARGV < 0; chomp @r; foreach $x (@r) { @p = split /\//, $x; next if $#p < 0; @q = (); for($i = 0; $i <= $#p; $i++) { next if $p[$i] eq "."; if( $#q < 0 ) { push @q, $p[$i]; } else { if( $p[$i] ne ".." ) { push @q, $p[$i]; } elsif( $q[$#q] eq "" || $q[$#q] eq ".." ) { push @q, $p[$i]; } else { pop @q; } } } $x = join '/', @q; push @l, $x if $x ne '' && $x ne '.' && $x ne '..'; } if( $#l > 0 ) { @q = sort @l; @l = ($q[0]); foreach $x (@q) { push @l, $x if $x ne $l[$#l]; } } $k = $o = $z = $u = $e = 0; foreach $x (@l) { if( -f $x && -r _ ) { $b = -s _; $k++; $z += $b; if( $b <= 0 ) { print "ln -s -f /dev/null '$x'\n"; next; } if( !exists $a{$b} ) { $a{$b}{0} = $x; next; } if( exists $a{$b}{0} ) { $f = $a{$b}{0}; X($f, 0); delete $a{$b}{0}; $a{$b}{$r}{0} = $f; } $c = X($x, 0); if( !exists $a{$b}{$c} ) { $a{$b}{$c}{0} = $x; next; } if( exists $a{$b}{$c}{0} ) { $f = $a{$b}{$c}{0}; X($f, -s $f); delete $a{$b}{$c}{0}; $a{$b}{$c}{$r} = $f; } X($x, $b); if( !exists $a{$b}{$c}{$r} ) { $a{$b}{$c}{$r} = $x; next; } $r = $a{$b}{$c}{$r}; if( $r !~ m{^/} && $x !~ m{^/} ) { $q = $x; while( $r =~ m{^([^/]+)/(.*)} ) { $v = $1; $w = $2; last if $q !~ m{^([^/]+)/(.*)}; last if $v ne $1; $r = $w; $q = $2; } if( index($q, '/') + 1 ) { @p = split /\//, $q; $r = ("../" x $#p) . $r; } } print "ln -s -f '$r' '$x'\n"; $u++; $e += $b; } else { print "# $x: not readable\n"; } } print "# $k files, $o/$z bytes read\n", ($u > 0 ? "# $e bytes in $u duplicate files\n" : "# No duplicates found\n");