The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package XML::Flow;

#$Id: Flow.pm 833 2010-08-24 12:23:53Z zag $

=pod

=head1 NAME

XML::Flow - Store (restore) perl data structures in XML stream.

=head1 SYNOPSIS

  #read - write by imported functions ref2xml() and  xml2ref()
  use XML::Flow qw( ref2xml xml2ref);
  my $data = {1=>2,4=>[1,2,3]};
  my $xml_string = ref2xml($data);
  my $data_restored = xml2ref($xml_string);
  my $ref1 = xml2ref(\*DATA); #from embedded __DATA__

  #Write XML
  use XML::Flow;
  my $wr = new XML::Flow:: "test.xml";
  $wr->startTag("Root"); #start root tag
  $wr->startTag("Data");
  $wr->write({1=>2},[4..6]);
  $wr->closeTag("Data");
  $wr->closeTag("Root");
  $wr->close;


  #Read
  my $fs = new IO::File:: "<test.xml";
  my $rd = new XML::Flow:: $fs;
  my %tags = (
       Root=>undef,
       Data=>sub { print Dumper(\@_) },
       );
  $rd->read(\%tags);
  $fs->close;

=head1 DESCRIPTION

Easy store and restore perl data structures. It use  XML::Parser for read and XML::Writer for write 
xml.

=cut

use XML::Parser;
use XML::Writer;
use IO::File;
use Data::Dumper;
use warnings;
use Carp;
use Encode;
use strict;
require Exporter;
*import               = \&Exporter::import;
@XML::Flow::EXPORT_OK = qw(ref2xml xml2ref);
$XML::Flow::VERSION   = '0.86';
my $attrs = {
    _file        => undef,
    _file_handle => undef,
    _writer      => undef,
    _events      => {},
    _need_close  => undef
};
### install get/set accessors for this object.
for my $key ( keys %$attrs ) {
    no strict 'refs';
    *{ __PACKAGE__ . "::$key" } = sub {
        my $self = shift;
        $self->{$key} = $_[0] if @_;
        return $self->{$key};
      }
}

=head1 FUNCTIONS

=cut

=head2 ref2xml( $ref )

Serilize reference to XML string. Where $ref is reference to SCALAR, HASH or ARRAY. This function will return XML string.

    use XML::Flow qw( ref2xml xml2ref);
    my $test = {1=>2,4=>[1,2,3]};
    print ref2xml($test);

The above example would print out the message:

    <?xml version="1.0" encoding="UTF-8"?>
    <XML-FLow-Data>
      <flow_data_struct>
        <value type="hashref">
          <key name="4">
            <value type="arrayref">
              <key name="1">2</key>
              <key name="0">1</key>
              <key name="2">3</key>
            </value>
          </key>
          <key name="1">2</key>
        </value>
      </flow_data_struct>
    </XML-FLow-Data>

=cut

sub ref2xml {
    my $ref = shift || return;
    my $result;
    my $flow = ( new XML::Flow:: \$result );
    $flow->startTag("XML-FLow-Data");
    $flow->write($ref);
    $flow->endTag("XML-FLow-Data");
    return $result;
}

=head2 xml2ref($string || reference to GLOB)

This function will deserilize string generated by ref2xml.Return reference.
For example:

    use XML::Flow qw( ref2xml xml2ref);
    use Data::Dumper;
    my $testxml = q{<?xml version="1.0" encoding="UTF-8"?>
    <XML-FLow-Data>
      <flow_data_struct>
        <value type="hashref">
          <key name="4">
            <value type="arrayref">
              <key name="1">2</key>
              <key name="0">1</key>
              <key name="2">3</key>
            </value>
          </key>
          <key name="1">2</key>
        </value>
      </flow_data_struct>
    </XML-FLow-Data>};
    print Dumper(xml2ref($testxml))

will print:

    $VAR1 = {
          '1' => '2',
          '4' => [
                 '1',
                 '2',
                 '3'
               ]
        }; 

=cut

sub xml2ref {
    my $xml = shift || return;
    my $result;
    my $flow = new XML::Flow:: ref($xml) ? $xml : \$xml;
    $flow->read(
        {
            'XML-FLow-Data' => sub { shift; ($result) = @_ }
        }
    );
    return $result;
}

=head1 METHODS

=cut

=head2 new($filehandle|$filename| a reference to a text string )

Create a new XML::Flow object. The first parameter should either be a string containing filename, a reference to a text string or it should be an open IO::Handle. For example:

 my $wr = new XML::Flow:: "test.xml";

or

 my $rd = new XML::Flow:: \$string_with_xml;

or

 my $fs = new IO::File:: "<test.xml";
 my $rd = new XML::Flow:: $fs;

or

 my $fz = IO::Zlib->new($file, "wb9");
 my $wr = new XML::Flow:: $fz;

or

 my $string_for_write_xml;
 my $wr = new XML::Flow:: \$string_buffer_for_write_xml;


=cut

sub new {
    my $class = shift;
    $class = ref $class if ref $class;
    my $self = bless( {}, $class );
    if (@_) {
        my $file = shift;
        if (
            ref $file
            and
            ( UNIVERSAL::isa( $file, 'IO::Handle' ) or ( ref $file ) eq 'GLOB' )
            or UNIVERSAL::isa( $file, 'Tie::Handle' )
           )
        {
            $self->_file_handle($file);
        }
        else {
            $self->_file($file);
        }
    }
    else {
        carp "need filename or filehandle";
        return;
    }
    return $self;
}

sub _get_handle {
    my $self = shift;
    my $mode = shift;
    unless ( $self->_file_handle ) {
        return if ref( $self->_file ) eq 'SCALAR';
        $self->_file_handle(
            new IO::File::( $mode ? ">" : "<" ) . $self->_file );
        $self->_need_close(1);    #close FH when close
    }
    return $self->_file_handle;
}

sub _get_writer {
    my $self = shift;
    unless ( $self->_writer ) {
        my $fh = $self->_get_handle(1) || $self->_file;
        my $writer = new XML::Writer:: OUTPUT => $fh;
        $writer->xmlDecl("UTF-8");
        $self->_writer($writer)

    }
    return $self->_writer;
}

=head2 startTag($name [, $aname1 => $value1, ...])

Add a start tag to an XML document. This method is wraper for XML::Writer::startTag.


=cut

sub startTag {
    my $self   = shift;
    my $writer = $self->_get_writer;
    return $writer->startTag(@_);
}

sub closeTag {
    my $self   = shift;
    my $writer = $self->_get_writer;
    return $writer->endTag(@_);
}

=head2 endTag([$name])

Add a end tag to an XML document. This method is wraper for XML::Writer::endTag.


=cut

sub endTag {
    my $self   = shift;
    my $writer = $self->_get_writer;
    return $writer->endTag(@_);
}

sub __ref2xml {
    my $self   = shift;
    my $writer = shift;
    my $ref    = shift;
    return unless ref $ref;
    my $type        = 'hashref';
    my $res_as_hash = $ref;
    if ( ref $ref eq 'ARRAY' ) {
        $res_as_hash = {};
        my $key = 0;
        foreach my $val (@$ref) {
            $res_as_hash->{ $key++ } = $val;
        }
        $type = 'arrayref';
    }
    if ( ref $ref eq 'SCALAR' ) {
        $res_as_hash           = {};
        $res_as_hash->{scalar} = $$ref;
        $type                  = 'scalarref';
    }

    $writer->startTag( 'value', type => $type );
    while ( my ( $key, $val ) = each %$res_as_hash ) {
        unless ( defined $val ) {
            $writer->startTag( 'key', name => $key, value => "undef" );
            $writer->endTag('key');
            next;
        }
        $writer->startTag( 'key', name => $key );
        if ( ref($val) ) {
            $self->__ref2xml( $writer, $val );
        }
        else {
            $writer->characters( $self->_utfx2utf($val) );
        }
        $writer->endTag('key');
    }
    $writer->endTag('value');
}

sub _utfx2utf {
    my ( $self, $str ) = @_;
    $str = encode( 'utf8', $str ) if utf8::is_utf8($str);
    return $str;
}

sub _utf2utfx {
    my ( $self, $str ) = @_;
    $str = decode( 'utf8', $str ) unless utf8::is_utf8($str);
    return $str;
}

=head2 write($ref1[, $ref2, ...])

Serilize references to XML. Where $ref is reference to SCALAR, HASH or ARRAY. This method used only for write XML mode.

 $wr->write({1=>2},[4..6]);
 my $a="1";
 $wr->write(\$a);

=cut

sub write {
    my $self   = shift;
    my $writer = $self->_get_writer;
    foreach (@_) {
        $writer->startTag('flow_data_struct');
        $self->__ref2xml( $writer, $_ );
        $writer->endTag('flow_data_struct');

    }
    return;
}

sub _xml2hash_handler {
    my $self = shift;
    my ( $struct, $data, $elem, %attr ) = @_;
    my ( $state, $shared ) = @{$struct}{ 'state', 'shared' };
    my $tag_stack = $shared->{tag_stack} || [];
    $shared->{tag_stack} = $tag_stack;
    for ($state) {

        /1/ && do {
            my $new = { name => $elem, 'attr' => \%attr };
            push @$tag_stack, $new;
            if ( $elem eq 'value' ) {
                $new->{type} = $attr{type};
                for ( $new->{type} ) {
                    /hashref/       && do { $new->{value} = {} }
                      || /arrayref/ && do { $new->{value} = [] }
                }
            }
          }
          || /2/ && do {
            if ( my $current = pop @{$tag_stack} ) {
                push @{$tag_stack}, $current;
                if ( $current->{name} eq 'key' ) {
                    unless ( ref $current->{value} ) {
                        $current->{value} .= $elem;
                        return;    #clear return value
                    }
                }

            }

          }
          || /3/ && do {
            if ( my $current = pop @{$tag_stack} ) {
                my $parent = pop @{$tag_stack};
                die "Stack error " . Dumper() unless $current->{name} eq $elem;
                if ( $elem eq 'key' ) {
                    push @{$tag_stack}, $parent;
                    my $ref_val;
                    if ( exists $current->{attr}->{value}
                        and $current->{attr}->{value} eq 'undef' )
                    {
                        $current->{value} = undef;
                    }
                    else {
                        $current->{value} = '' unless defined $current->{value};
                    }
                    for ( $parent->{type} ) {
                        /hashref/ && do {
                            $parent->{value} ||= {};
                            $parent->{value}->{ $current->{attr}->{name} } =
                              $current->{value};
                          }
                          || /arrayref/ && do {
                            $parent->{value} ||= [];
                            ${ $parent->{value} }[ $current->{attr}->{name} ] =
                              $current->{value};
                          }
                          || /scalarref/ && do {
                            $parent->{value} = \$current->{value};
                          }
                    }

                }
                elsif ( $elem eq 'value' ) {
                    if ($parent) {
                        push @{$tag_stack}, $parent;
                        $parent->{value} = $current->{value};
                    }
                    else {
                        $self->_parse_stream( { %$struct, state => 4 },
                            $current->{value} );
                    }

                }

            }
            else { die "empty stack !" . Dumper( \@_ ) }
          }
    }    #for
}    #sub

sub _parse_stream {
    my $self = shift;
    my ( $struct, $data, $elem, %attr ) = @_;
    my ( $state, $shared, $tags ) = @{$struct}{ 'state', 'shared', 'tags' };
    my $have_default = exists( $tags->{'*'} );
    my $stream_stack = $shared->{stream_stack} || [];
    $shared->{stream_stack} = $stream_stack;
    if ( $state == 4 ) {
        my $current = pop @{$stream_stack};
        push @{ $current->{value} }, $data;
        push @{$stream_stack}, $current;
        $self->_events(
            {
                'curr' => sub { $self->_parse_stream(@_) }
            }
        );
        return;
    }
    if ( $elem eq 'flow_data_struct' ) {
        if ( $state == 1 ) {
            $self->_events(
                {
                    'curr' => sub { $self->_xml2hash_handler(@_) }
                }
            );
        }
        else {

            # Close flow;
        }
        return;
    }
    if ( $state == 2 && ( my $current = pop @{$stream_stack} ) ) {
        unless ( exists $current->{fake} ) {
            $current->{text} = '' unless exists $current->{text};
            $current->{text} .= $elem;
        }
        push @{$stream_stack}, $current;
    }

    if ( $state == 1 ) {
        push @{$stream_stack},
          exists( $tags->{$elem} )
          || $have_default
          ? { name => $elem, attr => \%attr }
          : { fake => 1 };
    }
    if ( $state == 3 ) {
        my $current = pop @{$stream_stack};
        my $handler;    #handler for tag
        my $default_handler_selected = 0;
        unless ($have_default) {
            return unless defined( $tags->{$elem} );
            return unless $handler = $tags->{ $current->{name} };
        }
        else {
            unless ( $handler = $tags->{ $current->{name} } ) {
                $handler                  = $tags->{'*'};
                $default_handler_selected = 1;
            }
        }
        print 'ERROR stack for ' . $elem . "->" . $current->{name}
          unless $current->{name} eq $elem;

        #before call handler push to stack text values
        my $text = delete $current->{text};

        # not save format text
        push @{ $current->{value} }, $text
          if defined $text && $text !~ /^\s+$/s;
        my @res = (
            $handler->(
                $default_handler_selected
                ? ( $current->{name} )
                : ( ),  $current->{attr},
                ref( $current->{value} )      ? @{ $current->{value} }
                : defined( $current->{text} ) ? $current->{text}
                : ()
            )
        );
        if ( my $parent = pop @{$stream_stack} ) {
            if ( scalar @res && not exists $parent->{fake} ) {

                # store braked chars streams to values
                # <tag> text text <tag2>some</tag2> continued text</tag>
                my $text = delete $parent->{text};

                # not save format text
                push @{ $parent->{value} }, $text
                  if defined $text && $text !~ /^\s+$/s;
                push @{ $parent->{value} }, @res;
            }
            push @{$stream_stack}, $parent;
        }
    }
}

sub _handle_ev {
    my $self   = shift;
    my $events = $self->_events;
    return $events->{'curr'}->(@_);
}

=head2 read({tag1=>sub1{}[, tag2=>\&sub2 })

Run XML parser. Argument is a reference to hash with tag => handler.
If handler eq undef, then tag ignore. If subroutine return non undef result, it passed to parent
tag handler. Handler called with args: ( {hash of attributes}, <reference to data> [,<reference to data>] ).
For example:

Source xml :

 <?xml version="1.0" encoding="UTF-8"?>
 <Root>
  <Obj>
    <Also>
      <flow_data_struct>
        <value type="scalarref">
          <key name="scalar">3</key>
        </value>
      </flow_data_struct>
      <flow_data_struct>
        <value type="hashref">
          <key name="1" value="undef"></key>
        </value>
      </flow_data_struct>
    </Also>
  </Obj>
 </Root>

Read code:

 my $rd = new XML::Flow:: "test.xml";
 my %tags = (
    Root=>undef,
    Obj=>sub { print Dumper(\@_) },
    Also=>sub { 
        shift; #reference to hash of attributes
        return @_},
    );
 $rd->read(\%tags);
 $rd->close;

Output:

 $VAR1 = [
          {}, #reference to hash of xml tag attributes
          \'3',
          {
            '1' => undef
          }
        ];

=cut

sub read {
    my $self = shift;
    my $tags = shift or return;
    $self->_events(
        {
            'curr' => sub { $self->_parse_stream(@_) }
        }
    );
    my $shared = {};
    my $parser = new XML::Parser(
        Handlers => {
            Start => sub {
                $self->_handle_ev(
                    { state => 1, shared => $shared, tags => $tags }, @_ );
            },
            Char => sub {
                $self->_handle_ev(
                    { state => 2, shared => $shared, tags => $tags }, @_ );
            },
            End => sub {
                $self->_handle_ev(
                    { state => 3, shared => $shared, tags => $tags }, @_ );
            },
        }
    );
    $parser->parse( $self->_get_handle() || ${ $self->_file } );
}

=head2 close()

Close all handlers (including internal).

=cut

sub close {
    my $self = shift;
    $self->_file_handle->close if $self->_need_close and $self->_file_handle;
}

1;
__END__


=head1 SEE ALSO

XML::Parser, XML::Writer

=head1 AUTHOR

Zahatski Aliaksandr, <zag@cpan.org>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2006-2010 by Zahatski Aliaksandr

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.


=cut