1#!perl 2 3 # dups: simple script for showing duplicate files 4 5=head1 NAME 6 7dups - Show Duplicate Files 8 9=head1 SYNOPSIS 10 11 Usage: dups files ... 12 13 dups is a fast script for discovering duplicate files. It 14 achieves its efficiency by comparing file digests rather than the 15 file contents themselves, the latter being much larger in general. 16 17 The NIST Secure Hash Algorithm (SHA) is highly collision-resistant, 18 meaning that two files with the same SHA digest have an almost 19 certain probability of being identical. 20 21 The dups script works by computing the SHA-1 digest of each file 22 and looking for matches. The search can reveal more than one set 23 of duplicates, so the output is written as follows: 24 25 match1_file1 26 match1_file2 27 match1_file3 28 etc. 29 30 match2_file1 31 match2_file2 32 etc. 33 34=head1 AUTHOR 35 36Mark Shelor <mshelor@cpan.org> 37 38=head1 SEE ALSO 39 40Perl module L<Digest::SHA> or L<Digest::SHA::PurePerl> 41 42=cut 43 44use strict; 45use Digest::SHA; 46 47die "usage: dups files ...\n" unless @ARGV; 48 49my @files = grep { -f $_ } @ARGV; 50 51my %dups; 52for my $file (@files) { 53 my $digest = Digest::SHA->new->addfile($file, "b")->hexdigest; 54 push(@{$dups{$digest}}, $file); 55} 56 57for (keys %dups) { 58 my $ref = $dups{$_}; 59 if (scalar(@$ref) > 1) { 60 print join("\n\t", @$ref), "\n\n"; 61 } 62} 63