#!/usr/pkg/bin/perl
# Find duplicate Music files spread over the directory.
# Usage  
# ./traverse [ -d directory]

use strict;
use Getopt::Std;
use File::Find;
use Digest::MD5 qw(md5 md5_hex md5_base64);

use open ":encoding(utf-8)";
# ## http://blog.livedoor.jp/dankogai/archives/50828554.html
use utf8;
# 
# ## http://osksn2.hep.sci.osaka-u.ac.jp/~taku/osx/perl/perl_utf.html
# 
# binmode STDIN  => 'utf8';
# binmode STDOUT => 'utf8';


my $reject = $ARGV[0];
my $start = time();
my $DIR = '.';		# D E F A U L T  T O  C U R R E N T  D I R
my %opts;

my %GLOBAL_MD5;		# G L O B A L  D A T A  B A S E

my $dupcount = 0;
my $processed;
my $scanned;

# D I S P L A Y  D O T  E V E R Y  T H I S  #  O F  F I L E
my $DOTS_PER_FILE = 10;	
my $decimal = $DOTS_PER_FILE;

getopts('d:', \%opts);
if ($opts{'d'}) {
    $DIR = $opts{'d'};
}
$| = 1; # flash as fast as possible
print 'Finding under: ', $DIR,"\n";;
find (\&wanted, $DIR);

my $elapsed = time() - $start ;

print "   elapsed:  $elapsed sec.\n";
print "   scanned:  $scanned \n";
print " processed:  $processed \n";
print " duplicate:  $dupcount \n";

exit;

sub wanted {
    $scanned++ ;
    if ( 
	$File::Find::name =~ /\.mp3$/i  ||
	$File::Find::name =~ /\.m4a$/   ||
	$File::Find::name =~ /\.mp4$/ 
	){
	my $PATH = $File::Find::name;
	my $LEAF = $PATH ; # to be changed for leaf only
	   $LEAF =~ s#(.*)/##;

	# --------------------------------------
	# C A L C U L A T E  M D 5  D I G E S T 
	my $ctx =  Digest::MD5->new;
	chomp($LEAF);

	open(LEAF, '<:utf8', $LEAF) || die "Fail to open: $LEAF $!\n" ;
	$ctx->addfile(*LEAF);
	my $md5 = $ctx->hexdigest;
	close(LEAF);
	# -------------------------------------
	# C O M P A R E  W I T H  S T O R E D  M D 5
	if ($GLOBAL_MD5{$md5} eq '') {
	    $GLOBAL_MD5{$md5} = $PATH;
	} else {
	    my $exists = $GLOBAL_MD5{$md5};
	    print $exists, ' = ', $PATH , "\n";
#	    if ( length($exists) > length($PATH) ) {
#	    }
	    $dupcount++;
	}
	$processed++;
    }
    # P R I N T  P R O G R E S S  B A R 
    if ( $decimal-- == 0 ) {
	print STDERR '.';
	$decimal = $DOTS_PER_FILE;
    }
}
__END__
$File::Find::topdir
$File::Find::topdev
$File::Find::topino
$File::Find::topmode
$File::Find::topnlink

.Wide character in subroutine entry at /Users/ryu/traverse line 80.
