############################################################################### # Kain Shin's First Perl Program ############################################################################### #The purpose of this program is to identify all adjacent duplicate files #and move them into another folder to be manually deleted by human hands use strict; use warnings; use File::Find; use File::Copy; use File::Basename; ############################################################################### #User tunables... my $StartingDirectory = "."; my $DestinationDirectory = "./0.DuplicateFilesToBeDeleted"; ############################################################################### #used in main program to detect of the files are EXACT copies of each other... sub IsDuplicate { #Parameters... my $FileNameA = $_[0]; my $FileNameB = $_[1]; #...Parameters my $FileSizeA = -s $FileNameA; my $FileSizeB = -s $FileNameB; my $SizesMatch = ( $FileSizeA == $FileSizeB ); #Chop off the extension (last 4 chars)... $FileNameA = substr( $FileNameA, 0, length( $FileNameA )-4 ); $FileNameB = substr( $FileNameB, 0, length( $FileNameB )-4 ); #See if the names are related my $AFitsInB = ( -1 != index( $FileNameB, $FileNameA ) ); my $BFitsInA = ( -1 != index( $FileNameA, $FileNameB ) ); my $NamesRelated = $AFitsInB || $BFitsInA; return ( $SizesMatch && $NamesRelated ); } ############################################################################### #used in main program to exile the file with the longer name into the DestinationDirectory... sub MoveLongerFile { #Parameters... my $FileNameA = $_[0]; my $FileNameB = $_[1]; #...Parameters my $NameLengthA = length( $FileNameA ); my $NameLengthB = length( $FileNameB ); my $ShortestName = $FileNameA; my $LongestName = $FileNameB; if ( $NameLengthA > $NameLengthB ) { $ShortestName = $FileNameB; $LongestName = $FileNameA; } print "=== POTENTIAL DUPLICATE ===\n"; print "KEPT:$ShortestName\n"; print "MOVE:$LongestName\n"; my $DestinationName = substr( $LongestName, 1 ); $DestinationName = "$DestinationDirectory$DestinationName"; mkdir $DestinationDirectory; my $MoveSuccess = move( $LongestName, $DestinationDirectory ); if ( $MoveSuccess ) { print "TO:$DestinationName\n"; } else { print "FAILED:$DestinationName\n"; print "$!\n"; } } ############################################################################### #called from File::find... my @gFilePaths; my $gLastPath = 0; sub AddFileToPath { my $CurrentFileSize = -s $_; my $CurrentPath = $File::Find::dir; #Not a folder AND not the DestinationDirectory if ( ( $CurrentFileSize > 0 ) && ( $DestinationDirectory ne $CurrentPath ) ) { #Give feedback on which path is being added if ( !$gLastPath || ( $gLastPath ne $CurrentPath ) ) { print "Adding for Analysis:$CurrentPath\n"; $gLastPath = $CurrentPath; } push( @gFilePaths, $File::Find::name ); } } ############################################################################### #Main Program #Fill gFilePaths with sorted path goodness... find( \&AddFileToPath, $StartingDirectory ); @gFilePaths = sort @gFilePaths; print "========================================================================\n"; print "Checking for duplicates...\n"; #Now, check for duplicates... my $gLastFileName = 0; my $bDuplicatesFound = 0; foreach ( @gFilePaths ) { my $CurrentFileName = $_; if ( $gLastFileName && IsDuplicate( $gLastFileName, $CurrentFileName ) ) { $bDuplicatesFound = 1; MoveLongerFile( $gLastFileName, $CurrentFileName ); } $gLastFileName = $CurrentFileName; } print "========================================================================\n"; if ( $bDuplicatesFound ) { print "One layer of duplicates have been moved to '$DestinationDirectory'\n"; print "You may need to run this again because only adjacent files are tested against each other\n"; print "Don't forget to delete your '$DestinationDirectory' folder to finalize the pruning of duplicates\n"; } else { print "Congratulations! No duplicates were found\n"; print "Make sure you do not have a rogue '$DestinationDirectory' folder lingering around\n"; }