The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
# vim:ts=8 sw=4 sts=4 ai
require v5.6.1;
use strict;
use warnings;

=head1 NAME

hash2fv - convert a hash db file into Tie::FieldVals data.

=head1 VERSION

This describes version B<0.6202> of hash2fv.

=cut

our $VERSION = '0.6202';

=head1 SYNOPSIS

hash2fv --help | --manpage | --version

hash2fv {--fields I<fieldname> } I<hashdbfile> [ I<outfile> ]

=head1 DESCRIPTION

This script converts a hash db file (of the type used by
AutomatedArchive 3.x) into Tie::FieldVals data.

This requires the file name of the hash db file, not just the location
of the AutomatedArchive directory, so as to make this more flexible in
the files it is able to read.  While the AutomatedArchive file is usually
"I<archive_dir>/cgi-bin/files/ARCHIVE_DB.pl", someone converting it
may wish to move it to a different location, or download it from
their website, or may wish to use this script for converting files
of the same format which aren't actually AutomatedArchive files.

=head1 OPTIONS

=over

=item --fields I<fieldname>

If you want to define the order of the Fields in the output file,
or simply define the legal fields you are interested in, then
use this option to override the default of getting the Field names
from the first record in the the Hash DB file.

Repeat the --fields option for each field name.  Be sure to give
every field you want, because this I<replaces> the field definitions
from the Hash DB file.

    --fields Author --fields Category --fields Title --fields Location ...

=item --help

Print help message and exit.

=item --manpage

Print the full help documentation (manual page) and exit.

=item --verbose

Print informational messages.

=item --version

Print version information and exit.

=back

=head1 FILE FORMATS

=head2 Hash DB File Format

The format of the input hash DB file is as follows:

    %FILES = (
    1 => {
	Field => 'Value',
	AnotherField => 'AnotherValue',
	...
    },
    2 => {
	...
    },
    ...
    );
    1;

This is the format used by the AutomatedArchive suite version 3.x.
This may be the format used by version 2.x also.

This converter ignores the values of the '1', '2' (and so on) keys,
just converting the Fields and Values into records.

=head2 FieldVals Format

The output data file is in the form of Field:Value pairs, with each
record separated by a line with '=' on it.

See L<Tie::FieldVals/FILE FORMAT> for more information.

=head1 REQUIRES

    Getopt::Long
    Pod::Usage
    Getopt::ArgvFile
    Data::Dumper
    File::Basename
    Tie::FieldVals::Row

=head1 SEE ALSO

perl(1)
Getopt::Long
Getopt::ArgvFile
Pod::Usage

=cut

use Getopt::Long 2.34;
use Getopt::ArgvFile qw(argvFile);
use Pod::Usage;
use Data::Dumper;
use File::Basename;
use Tie::FieldVals::Row;

#========================================================
# Subroutines
sub init_data ($) {
    my $data_ref = shift;

    # options
    my %default_conf = ();
    $default_conf{debug} = 0;
    $default_conf{manpage} = 0;
    $default_conf{version} = 0;
    $default_conf{verbose} = 0;
    $default_conf{outfile} = '-';
    $default_conf{hashdbfile} = '';
    $default_conf{fields} = [];
    $data_ref->{options} = \%default_conf;
} # init_data

sub process_args ($) {
    my $data_ref = shift;

    my $ok = 1;

    argvFile(home=>1,current=>1,startupFilename=>'.hash2fvrc');

    pod2usage(2) unless @ARGV;

    my $op = new Getopt::Long::Parser;
    $op->configure(qw(auto_version auto_help));
    $op->getoptions($data_ref->{options},
		    'verbose!',
		    'manpage',
		    'debug!',
		    'fields=s@',
		    'hashdbfile=s',
		    'outfile=s',
		   ) or pod2usage(2);

    if ($data_ref->{options}->{'manpage'})
    {
	pod2usage({ -message => "$0 version $VERSION",
		    -exitval => 0,
		    -verbose => 2,
	    });
    }

} # process_args

sub convert_file ($) {
    my $data_ref = shift;

    my $outfile = $data_ref->{options}->{outfile};

    print STDERR "hashdbfile: ", $data_ref->{options}->{hashdbfile},
	" outfile: ", $outfile,
	"\n" if ($data_ref->{options}->{verbose});
    if ($data_ref->{options}->{debug})
    {
	print STDERR Data::Dumper->Dump([$data_ref], [qw(hash2fv)]);
    }

    my $outhandle = \*STDOUT;
    if ($outfile ne '-')
    {
	open(OUTFILE, ">$outfile") || die "Can't open '$outfile' for writing: $!";
	$outhandle = \*OUTFILE;
    }
    # open the Hash DB file
    # this will give a %FILES hash
    our %FILES = ();
    my $hashdb = $data_ref->{options}->{hashdbfile};
    my ($dbfile,$dir,$suffix) = fileparse($hashdb);
    unshift(@INC, $dir);
    require $dbfile or die "Cannot open hash DB $hashdb: $!";

    if ($data_ref->{options}->{debug})
    {
	print STDERR "hashdb=$hashdb, dir=$dir, dbfile=$dbfile\n";
	print STDERR Data::Dumper->Dump([\%FILES], [qw(FILES)]);
    }
    # go through the $FILES hash
    my $count = 0;
    my @field_names = @{$data_ref->{options}->{fields}};
    while (my ($key, $rec_ref) = each %FILES)
    {
	warn "$key\n" if $data_ref->{options}->{debug};
	if (defined $rec_ref)
	{
	    my %row = ();
	    my $row_obj;
	    if (@field_names)
	    {
		$row_obj = tie %row, 'Tie::FieldVals::Row',
		    fields=>\@field_names;
		$row_obj->set_from_hash($rec_ref);
	    }
	    else # get the field names from the first hash
	    {
		$row_obj = tie %row, 'Tie::FieldVals::Row',
		    fields=>[qw(dummy)];
		$row_obj->set_from_hash($rec_ref, override_keys=>1);
		@field_names = @{$row_obj->field_names()};
		# print the initial empty record
		foreach my $fn (@field_names)
		{
		    print $outhandle "$fn:\n";
		}
		print $outhandle "=\n";
	    }
	    # print the current record
	    print $outhandle $row_obj->get_as_string();
	    print $outhandle "\n=\n";
	    $count++;
	}
    }

    print STDERR "$count records processed\n" if ($data_ref->{options}->{verbose});

    if ($outfile ne '-')
    {
	close(OUTFILE);
    }
    print STDERR "done!\n" if ($data_ref->{options}->{verbose});
} # convert_file

#========================================================
# Main

MAIN: {
    my %data = ();

    init_data(\%data);
    process_args(\%data);

    # first argument is the hashdbfile
    if (@ARGV)
    {
	$data{options}->{hashdbfile} = shift @ARGV;
    }
    if (!$data{options}->{hashdbfile})
    {
	print STDERR "$0 no hashdbfile\n";
	return 1;
    }
    if (!-e $data{options}->{hashdbfile})
    {
	print STDERR "$0 hashdbfile not found\n";
	return 1;
    }

    # remaining argument is the output file
    if (@ARGV)
    {
	$data{options}->{outfile} = shift @ARGV;
    }
    convert_file(\%data);
}

=head1 BUGS

Please report any bugs or feature requests to the author.

=head1 AUTHOR

    Kathryn Andersen (RUBYKAT)
    perlkat AT katspace dot com
    http://www.katspace.com

=head1 COPYRIGHT AND LICENCE

Copyright (c) 2004 by Kathryn Andersen

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.


=cut

__END__