#!/usr/pkg/bin/perl # Find duplicate Music files spread over the directory. # Usage # ./traverse [ -d directory] use strict; use Getopt::Std; use File::Find; use Digest::MD5 qw(md5 md5_hex md5_base64); use open ":encoding(utf-8)"; # ## http://blog.livedoor.jp/dankogai/archives/50828554.html use utf8; # # ## http://osksn2.hep.sci.osaka-u.ac.jp/~taku/osx/perl/perl_utf.html # # binmode STDIN => 'utf8'; # binmode STDOUT => 'utf8'; my $reject = $ARGV[0]; my $start = time(); my $DIR = '.'; # D E F A U L T T O C U R R E N T D I R my %opts; my %GLOBAL_MD5; # G L O B A L D A T A B A S E my $dupcount = 0; my $processed; my $scanned; # D I S P L A Y D O T E V E R Y T H I S # O F F I L E my $DOTS_PER_FILE = 10; my $decimal = $DOTS_PER_FILE; getopts('d:', \%opts); if ($opts{'d'}) { $DIR = $opts{'d'}; } $| = 1; # flash as fast as possible print 'Finding under: ', $DIR,"\n";; find (\&wanted, $DIR); my $elapsed = time() - $start ; print " elapsed: $elapsed sec.\n"; print " scanned: $scanned \n"; print " processed: $processed \n"; print " duplicate: $dupcount \n"; exit; sub wanted { $scanned++ ; if ( $File::Find::name =~ /\.mp3$/i || $File::Find::name =~ /\.m4a$/ || $File::Find::name =~ /\.mp4$/ ){ my $PATH = $File::Find::name; my $LEAF = $PATH ; # to be changed for leaf only $LEAF =~ s#(.*)/##; # -------------------------------------- # C A L C U L A T E M D 5 D I G E S T my $ctx = Digest::MD5->new; chomp($LEAF); open(LEAF, '<:utf8', $LEAF) || die "Fail to open: $LEAF $!\n" ; $ctx->addfile(*LEAF); my $md5 = $ctx->hexdigest; close(LEAF); # ------------------------------------- # C O M P A R E W I T H S T O R E D M D 5 if ($GLOBAL_MD5{$md5} eq '') { $GLOBAL_MD5{$md5} = $PATH; } else { my $exists = $GLOBAL_MD5{$md5}; print $exists, ' = ', $PATH , "\n"; # if ( length($exists) > length($PATH) ) { # } $dupcount++; } $processed++; } # P R I N T P R O G R E S S B A R if ( $decimal-- == 0 ) { print STDERR '.'; $decimal = $DOTS_PER_FILE; } } __END__ $File::Find::topdir $File::Find::topdev $File::Find::topino $File::Find::topmode $File::Find::topnlink .Wide character in subroutine entry at /Users/hoge/traverse line 80.