#!/usr/bin/perl
# Filename:	find_dup
# Author:	David Ljung Madison <DaveSource.com>
# See License:	http://MarginalHacks.com/License
# Description:	Find duplicates amongst a list of files
# Version:	1.0
use strict;

##################################################
# Setup the variables
##################################################
my $PROGNAME = $0;
$PROGNAME =~ s|.*/||;

my $SUM = "sum";	# Checksum program

##################################################
# Usage
##################################################
sub usage {
  my $msg;
  foreach $msg (@_) { print "ERROR:  $msg\n"; }
  print "\n";
  print "Usage:\t$PROGNAME [-d] <files..>\n";
  print "\tList all duplicates in a set of files\n";
  print "\n";
  print "\t-f\tDon't list the first duplicate on each line\n";
  print "\t-rm\tRemove all duplicate files\n";
  print "\t-d\tSet debug mode\n";
  print "\n";
  exit -1;
}

sub parse_args {
  my @files;
  my %opt;
  $opt{first} = 1;

  while ($#ARGV>=0) {
    my $arg=shift(@ARGV);
    if ($arg =~ /^-h$/) { usage(); }
    if ($arg =~ /^-d$/) { $opt{debug}=1; next; }
    if ($arg =~ /^-f$/) { $opt{first}=0; next; }
    if ($arg =~ /^-rm$/) { $opt{rm}=1; next; }
    if ($arg =~ /^-/) { usage("Unknown option: $arg"); }
    push(@files,$arg);
  }
  usage("Need at least two files") unless ($#files>0);

  (\%opt,@files);
}

##################################################
# Main code
##################################################
sub main {
  my ($opt,@files) = parse_args();

  # Get sums
  my %sums;
  foreach my $f ( @files ) {
    my $sum = `$SUM \Q$f\E`;  chomp($sum);
    print "$f: $sum\n" if ($opt->{debug});
    push(@{$sums{$sum}}, $f);
  }

  # Find matching sums
  my @match;
  foreach my $s ( keys %sums ) {
    next unless ($#{$sums{$s}} > 0);
    push(@match, $sums{$s});
  }

  # Double-check matches with diff
  # It is unlikely, though possible, that they have the same sum but are different
  foreach my $m ( @match ) {
    my @check = sort @$m;
    while (@check) {
      my $first = shift(@check);
      my (@same,@diff);
      foreach (@check) {
        system("diff \Q$first\E \Q$_\E");
        $? ? push(@diff,$_) : push(@same,$_);
      }
      print "Same sum but different! [$first @same] vs [@diff]\n"
        if ($opt->{debug} && @diff);
      @check = @diff;	# Go through any remainders that didn't match

      next unless @same;
      print $first," " if $opt->{first};
      print join(" ",@same), "\n";
      if ($opt->{rm}) {
        print STDERR "[$PROGNAME] ERROR: Couldn't remove all files [@same]\n"
          unless (unlink(@same) == $#same+1);
      }
    }
  }
}
main();
