The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
#!/usr/bin/env perl
# vim:ts=8 sw=4 sts=4 ai
require v5.6.1;
use strict;
use warnings;

=head1 NAME

fv2xml - convert a Tie::FieldVals datafile into XML data.

=head1 VERSION

This describes version B<0.6203> of fv2xml.

=cut

our $VERSION = '0.6203';

=head1 SYNOPSIS

fv2xml --help | --manpage | --version

fv2xml { --match I<field>=I<pattern> } [ --match_any I<pattern> ]
[ --num_recs I<num> ] [ --start_rec I<num> ]
{ --sort_by I<field> } { --sort_numeric I<field> } { --sort_reversed I<field> }
I<datafile> [ I<outfile> ]

=head1 DESCRIPTION

This script converts a (subset of) a Tie::FieldVals datafile into
XML data.

=head1 OPTIONS

=over

=item --datafile I<filename>

The input data file (in Tie::FieldVals format). 

=item --help

Print help message and exit.

=item --manpage

Print the full help documentation (manual page) and exit.

=item --match I<field>=I<pattern>

Extract a subset of records from the file, by only including
those which match the given pattern for the given field.  This option
can be repeated for multiple fields.
For example:

    --match Author=Mary

will give the records for authors which contain the string "Mary".

=item --match_any I<pattern>

Extract a subset of records from the file by only including
those which match the given pattern in any field.

=item --num_recs I<n>

Extract a subset of at most n records from the collection (it may
be less).

=item --sort_by I<field>

Sort by this field.  Can be repeated to sort by multiple fields.
Thus, if one wished to sort by Author and then Title, one would give:

    --sort_by Author --sort_by Title

=item --sort_numeric I<field>

If sorting by this field, use this to alter the type of the sort, to
make it numeric.

For example:

    --sort_by SeriesOrder --sort_numeric SeriesOrder

To switch off numeric sort for a field, give the value of 0 to the
argument.

For example:
    --sort_by SeriesOrder --sort_numeric SeriesOrder=0

=item --sort_reversed I<field>

If sorting by this field, use this to alter the direction of the sort, to
make it sort reversed.

For example:
    --sort_by Author --sort_reversed Author

To switch off reversed sort for a field, give the value of 0 to the
argument.

For example:
    --sort_by Author --sort_reversed Author=0

=item --start_rec I<n>

Extract a subset of records starting from the nth record.

=item --verbose

Print informational messages.

=item --version

Print version information and exit.

=back

=head1 FILE FORMATS

=head2 FieldVals Format

The input data file is in the form of Field:Value pairs, with each
record separated by a line with '=' on it.

See L<Tie::FieldVals/FILE FORMAT> for more information.

=head2 XML FORMAT

The format of the output XML file is as follows:

    <fv_data>
        <record>
	   <Field>Value</Field>
	    <AnotherField>AnotherValue</AnotherField>
	    ...
	</record>
	...
    </fv_data>

Each field name is given its own tag, and the element contains
the value for that field.

=head1 REQUIRES

    Getopt::Long
    Pod::Usage
    Getopt::ArgvFile
    Data::Dumper
    Tie::FieldVals
    Tie::FieldVals::Row
    Tie::FieldVals::Select

=head1 SEE ALSO

perl(1)
Getopt::Long
Getopt::ArgvFile
Pod::Usage

=cut

use Getopt::Long 2.34;
use Getopt::ArgvFile qw(argvFile);
use Pod::Usage;
use Data::Dumper;
use Tie::FieldVals;
use Tie::FieldVals::Row;
use Tie::FieldVals::Select;

#========================================================
# Subroutines
sub init_data ($) {
    my $data_ref = shift;

    # options
    my %default_conf = ();
    $default_conf{debug} = 0;
    $default_conf{manpage} = 0;
    $default_conf{version} = 0;
    $default_conf{verbose} = 0;
    $default_conf{outfile} = '-';
    $default_conf{datafile} = '';
    $default_conf{sort_by} = [];
    $default_conf{sort_numeric} = {};
    $default_conf{sort_reversed} = {};
    $default_conf{start_rec} = 0;
    $default_conf{num_recs} = -1;
    $default_conf{match} = {};
    $default_conf{match_any} = '';
    $data_ref->{options} = \%default_conf;
} # init_data

sub process_args ($) {
    my $data_ref = shift;

    my $ok = 1;

    argvFile(home=>1,current=>1,startupFilename=>'.fv2xmlrc');

    pod2usage(2) unless @ARGV;

    my $op = new Getopt::Long::Parser;
    $op->configure(qw(auto_version auto_help));
    $op->getoptions($data_ref->{options},
		    'verbose!',
		    'manpage',
		    'debug!',
		    'datafile=s',
		    'sort_by=s@',
		    'sort_numeric:1%',
		    'sort_reversed:1%',
		    'start_rec=i',
		    'num_recs=i',
		    'match=s%',
		    'match_any=s',
		    'outfile=s',
		   ) or pod2usage(2);

    if ($data_ref->{options}->{'manpage'})
    {
	pod2usage({ -message => "$0 version $VERSION",
		    -exitval => 0,
		    -verbose => 2,
	    });
    }

} # process_args

sub convert_file ($) {
    my $data_ref = shift;

    my $outfile = $data_ref->{options}->{outfile};

    print STDERR "datafile: ", $data_ref->{options}->{datafile},
	" outfile: ", $outfile,
	"\n" if ($data_ref->{options}->{verbose});
    if ($data_ref->{options}->{verbose}
	&& %{$data_ref->{options}->{match}})
    {
	print STDERR Data::Dumper->Dump([$data_ref->{options}->{match}],
	    [qw(match)]);
    }
    if ($data_ref->{options}->{verbose}
	&& $data_ref->{options}->{match_any})
    {
	print STDERR "match any: ", $data_ref->{options}->{match_any}, "\n";
    }
    if ($data_ref->{options}->{debug})
    {
	print STDERR Data::Dumper->Dump([$data_ref], [qw(fv2xml)]);
    }
    # get the records, sort them, and grab them
    my @sel_recs = ();
    Tie::FieldVals::Select::debug($data_ref->{options}->{'debug'});
    my $sel_obj = tie @sel_recs, 'Tie::FieldVals::Select',
	datafile=>$data_ref->{options}->{'datafile'},
	selection=>$data_ref->{options}->{'match'},
	match_any=>$data_ref->{options}->{'match_any'};
    $data_ref->{_sel_recs} = \@sel_recs;
    $data_ref->{_sel_obj} = $sel_obj;
    if (@{$data_ref->{options}->{sort_by}})
    {
	$sel_obj->sort_records(
		sort_by=>$data_ref->{options}->{'sort_by'},
		sort_numeric=>$data_ref->{options}->{'sort_numeric'},
		sort_reversed=>$data_ref->{options}->{'sort_reversed'});
    }

    my $count = @sel_recs;
    print STDERR "$count records found\n" if ($data_ref->{options}->{verbose});

    my $first_rec = 0;
    if ($data_ref->{options}->{start_rec} >= 0
	and $data_ref->{options}->{start_rec} < $count)
    {
	$first_rec = $data_ref->{options}->{start_rec};
    }
    my $last_rec = $first_rec + $data_ref->{options}->{num_recs};
    if ($data_ref->{options}->{num_recs} <= 0)
    {
	$last_rec = $count;
    }
    if ($last_rec > $count)
    {
	$last_rec = $count;
    }
    if ($first_rec > $last_rec)
    {
	warn "record number $first_rec > $last_rec -- aborting\n";
	return 0;
    }
    print STDERR "records from #$first_rec to #$last_rec\n" if ($data_ref->{options}->{verbose});

    # for each record, create the XML version
    if ($outfile ne '-')
    {
	open(OUTFILE, ">$outfile") || die "Can't open '$outfile' for writing.";
	print OUTFILE "<fv_data>\n";
    }
    else
    {
	print "<fv_data>\n";
    }
    for (my $i=$first_rec; $i < $last_rec; $i++)
    {
	my $vals = $sel_recs[$i];
	my $row_obj = tied %{$vals};
	my $xml_str = $row_obj->get_xml_string();
	if ($outfile eq '-')
	{
	    print $xml_str;
	}
	else
	{
	    print OUTFILE $xml_str;
	}
    }
    if ($outfile ne '-')
    {
	print OUTFILE "</fv_data>\n";
	close(OUTFILE);
    }
    else
    {
	print "</fv_data>\n";
    }
    print STDERR "done!\n" if ($data_ref->{options}->{verbose});
} # convert_file

#========================================================
# Main

MAIN: {
    my %data = ();

    init_data(\%data);
    process_args(\%data);

    # first argument is the datafile
    if (@ARGV)
    {
	$data{options}->{datafile} = shift @ARGV;
    }
    if (!$data{options}->{datafile})
    {
	print STDERR "$0 no datafile\n";
	return 0;
    }

    # remaining argument is the output file
    if (@ARGV)
    {
	$data{options}->{outfile} = shift @ARGV;
    }
    convert_file(\%data);
}

=head1 BUGS

Please report any bugs or feature requests to the author.

=head1 AUTHOR

    Kathryn Andersen (RUBYKAT)
    perlkat AT katspace dot com
    http://www.katspace.com

=head1 COPYRIGHT AND LICENCE

Copyright (c) 2004 by Kathryn Andersen

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.


=cut

__END__