The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Text::CSV::UniqueColumns;

use 5.008008;
use strict;
use warnings;

require Exporter;

our @ISA = qw(Exporter);


our $VERSION = '0.3';

my (%headerHash, @cleanup);

sub new {
    my ($class) = shift;
    my ($sFile) = shift || die "Please provide csv file as argument\n";
    my ($sCols) = shift;
    die "$sFile not found" if (!-e $sFile);

    my $self = {
        '_file' => $sFile,
        '_cols' => $sCols,
        '_headers' => ""
        };

    bless $self, $class;

    getHeaders($self,$sFile);
    die "Could not get headers\n" if (!$self->{'_headers'});

   return $self;
}

sub checkUniq {
    my ($self) = shift;
    my $sCols = shift || return "Provide cols as arguments\n";
    $self->{'_cols'} = $sCols;
    my $iCount = 1;
    my $sPasteOutput;
    my $sPasteFiles = " ";
    my @CompositeCols = split(',',$sCols);
    foreach my $sCol (@CompositeCols) {
        chomp($sCol);
        if ($headerHash{$sCol}) {
            $headerHash{"Composite$iCount"} = `cut -f$headerHash{$sCol} -d , $self->{'_file'}`;
            open (FILE , ">Composite$iCount") or return "Cannot write Composite$iCount $! \n";
            push (@cleanup, "Composite$iCount");
            print FILE $headerHash{"Composite$iCount"};
            $sPasteFiles .= " Composite$iCount";
            close(FILE);
            $iCount++;
        }
        else {
            return  "Column - $sCol not found\n INFO - Use \"-l\" option to list columns in file\n";
        }
    }
    my $sCmd = "paste  -d , $sPasteFiles > pasteOutput ";
    push (@cleanup, 'pasteOutput');
    $sPasteOutput = `$sCmd`;
    my $iCount1 =  `cat pasteOutput | sed s/' '//g | wc -l`;
    my $iCount2 =  `cat pasteOutput | sed s/' '//g | sort | uniq |  wc -l`;
    cleanUp();
    if ($iCount1 == $iCount2) {
       return "1"; #unique 
    }
    else {
       return "0";
    }
}

sub getColumnList {
    my ($self) = @_;
    foreach  (@{$self->{'_headers'}}) {
        return join(',', @{$self->{'_headers'}});
        print "$_\n";
    }
}


sub getUniqCols {
    my ($self) = @_;
    my ($sUniqCols) = " ";
    foreach my $iNo ( 0 .. (scalar(@{$self->{'_headers'}}) - 1)) {
        my $iField = $iNo + 1;
        my $sCmd = "cut -f$iField -d , $self->{'_file'} | sed s/' '//g | wc -l;";
        $sCmd .= "cut -f$iField -d ,  $self->{'_file'} | sed s/' '//g | sort | uniq | wc -l";
        my ($iCount1, $iCount2) = split("\n",`$sCmd`);

        if ( $iCount1 == $iCount2) {
            $sUniqCols .=  $self->{'_headers'}->[$iNo].",";
        }
        else {
            next;
        }
    }
    chop ($sUniqCols);
    return $sUniqCols;

}

sub buildHeaderHash {
    my ($self) = @_;
    my $iColNo = 1;
    foreach my $sCol (@{$self->{'_headers'}}) {
        $sCol =~ s/\s+//g;
        $sCol =~ s/\n//g;
        $headerHash{$sCol} = $iColNo;
        $iColNo++;
    }
}


sub getHeaders {
    my ($self,$sFile) = @_;
    print "file is $sFile \n";
    my @headers = split(',', `head -1 $sFile`);
    $self->{'_headers'} = \@headers;
    buildHeaderHash($self);
}

sub cleanUp {
    foreach my $sFile (@cleanup){
        `rm -f $sFile`;
    }
}


1;
         


__END__

=head1 NAME

Text::CSV::UniqueColumns - Perl extension for finding columns with unique values in a CSV 

=head1 SYNOPSIS

  use Text::CSV::UniqueColumns;
  
  --Create an object of the module
  $Obj = new UniqueColumns('check.csv');
  
  --To get list of columns
  $list = $Obj->getColumnList();
  
  --To check if column 'col1' has unique values.
  $Int = $Obj->checkUniq('col1');
  
  --To get a list of columns(comma seperated) having unique values.
  #Returns 1 if unique, 0 if not
  $Uniq = $Obj->getUniqCols();


=head1 DESCRIPTION

find columns with unique values of a CSV file.

Functions and their usage -- 

getColumnList - list all columns name in a csv 
checkUniq  - Check if one('Col1') or more combination of column('Col1,Col2,Col3') values are unique 
getUniqCols - give list of unique columns in an CSV.

**** Module works on UNIX boxes only ****


=head1 AUTHOR

Tushar, E<lt>tushar@cpan.org<gt>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2010 by Tushar Murudkar

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.


=cut