The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
package Treex::PML::Schema::Reader;

use strict;
use warnings;

use vars qw($VERSION);
BEGIN {
  $VERSION='2.16'; # version template
}
no warnings 'uninitialized';
use Carp;

use Scalar::Util qw(weaken blessed);
use XML::LibXML::Reader;

sub new {
  my ($class,$opts)=@_;
  my $URL = $opts->{URL};
  my @common = (
     no_xinclude_nodes => 1,
     no_cdata => 1,
     expand_xinclude => 1,
     no_blanks => 1,
     expand_entities => 1,
     suppress_errors => 0,
     suppress_warnings => 0,
  );
  if ($opts->{validate}) {
    my $rng = $opts->{relaxng_schema} || Treex::PML::FindInResources('pml_schema_inline.rng');
    if (defined $rng) {
      push @common, (RelaxNG => $rng);
    } else {
      warn __PACKAGE__.": Validation requested, but 'pml_schema_inline.rng' was not found in the ResourcePath: ".Treex::PML::ResourcePath()."\n";
    }
  }
  my ($reader,$fh);
  # print "loading schema $opts->{URL}\n";
  if ($opts->{string}) {
    $URL ||=  'string://';
    $reader = XML::LibXML::Reader->new(string => $opts->{string},
				       @common,
				       URI => $URL,
				      )
      or die "Error reading string ($URL)";
  } elsif ($opts->{fh}) {
    $URL ||=  'fh://';
    $reader = XML::LibXML::Reader->new(IO => $opts->{string}, @common )
      or die "Error reading file-handle $fh ($URL)";
  } elsif (blessed($opts->{reader}) and $opts->{reader}->isa('XML::LibXML::Reader')) {
    $reader = $opts->{reader};
    $URL ||= $reader->document->URI;
  } else {
    my $file = $opts->{URL};
    print STDERR "parsing schema $file\n" if $Treex::PML::Debug;
    $fh = eval { Treex::PML::IO::open_uri($file) };
    croak "Couldn't open PML schema file '$file'\n".$@ if (!$fh || $@);
    $reader = XML::LibXML::Reader->new(FD => $fh, @common, URI => $URL )
      or die "Error reading $file";
  }
  return bless [$reader,$opts,$fh], $class;
}
sub DESTROY {
  my ($self)=@_;
  my $fh = $self->file_handle;
  Treex::PML::IO::close_uri($fh) if $fh;
}
sub reader {
  return ref($_[0]) && $_[0][0];
}
sub options {
  return ref($_[0]) && $_[0][1];
}
sub file_handle {
  return ref($_[0]) && $_[0][2];
}

sub parse_element {
  my ($self,$parent)=@_;
  my $reader = $self->reader;
  my $opts = $self->options;
  my (@children,@attrs);
  my $el_ns = $reader->namespaceURI;
  my $el_name = $reader->localName;
  my $has_default_ns = $el_ns eq $opts->{DefaultNs} ? 1 : 0;
  my $el_ns_name = ($has_default_ns) ? $el_name :  '{'.$el_ns.'}'.$el_name;
  my $prefix = $reader->prefix;
  my %val = (
    -xml_name => $el_ns_name,
    ($has_default_ns ? () : (-xml_ns => $el_ns)),
    (defined($prefix) && length($prefix) ? (-xml_prefix => $prefix) : ()),
    -parent => $parent,
    -attributes => \@attrs,
    );
  weaken($val{-parent}) if $val{-parent};

  if ($reader->moveToFirstAttribute==1) {
    do {{
      my $name = $reader->name;
      push @attrs,$name;
      $val{$name} = $reader->value;
    }} while ($reader->moveToNextAttribute);
    $reader->moveToElement;
  }
  my $obj = \%val;
  {
    my $class = $opts->{Bless}{$el_ns_name} || $opts->{Bless}{'*'};
    if (defined $class) {
      bless $obj,$class;
      $obj->init($opts) if $obj->can('init');
    }
  }
  my $depth = $reader->depth;
  my $status;
   while (($status = $reader->read==1)) {
    last unless $reader->depth > $depth;
    my $nodeType = $reader->nodeType;
    my $chld;
    my $redo = 0;
    if ($nodeType == XML_READER_TYPE_ELEMENT) {
      $chld = $self->parse_element($obj);
      $redo = 1;
    } elsif ($nodeType == XML_READER_TYPE_TEXT or
	  $nodeType == XML_READER_TYPE_CDATA) {
      $chld = bless {
	-xml_name => '#text',
	-value => $reader->value,
      }, 'Treex::PML::Schema::XMLNode';
    } elsif ($nodeType == XML_READER_TYPE_COMMENT) {
      $chld = bless {
	-xml_name => '#comment',
	-value => $reader->value,
      }, 'Treex::PML::Schema::XMLNode';
    } elsif ($nodeType == XML_READER_TYPE_PROCESSING_INSTRUCTION) {
      $chld = bless {
	-xml_name => '#processing-instruction',
	-name => $reader->name,
	-value => $reader->value,
      }, 'Treex::PML::Schema::XMLNode';
    } elsif ($nodeType == XML_READER_TYPE_END_ELEMENT or
	     $nodeType == XML_READER_TYPE_SIGNIFICANT_WHITESPACE or
	     $nodeType == XML_READER_TYPE_WHITESPACE) {
      next;
    } else {
      $chld = bless {
	-xml_name => '#other',
	-xml_nodetype => $nodeType,
	-name => $reader->name,
	($reader->hasValue ? (-value => $reader->value) : ()),
	-xml => $reader->readOuterXml,
       }, 'Treex::PML::Schema::XMLNode';
    }
    push @children, $chld if defined $chld;
    redo if $redo;
  }
  if ($status == -1) {
    croak "XMLReader error in $opts->{URL} near line ".$reader->lineNumber;
  }

  my $i=0;
  my %try_data;
  if (my $cont = $opts->{TextOnly}{$el_ns_name}) {
    my $text;
    foreach my $c (@children) {
      if ($c->{'-xml_name'} ne '#text') {
	warn "Ignoring unexpected node ".$c->{'-xml_name'}." in a text-only element $el_ns_name\n";
      } else {
	$text.=$c->{-value};
      }
    }
    $val{$cont} = $text;
  } else {
    foreach my $c (@children) {
      $c->{'-#'} = $i++;
      my $name = $c->{-xml_name};
      if (!ref($val{$name})) {
	if (exists $val{$name}) {
	  warn "Collision between an attribute and child-element $name\n";
	  $val{'@'.$name} = delete $val{$name}
	}
      }
      my $value;
      if (my $cont = $opts->{Stringify}{$name}) {
	$value = $c->{$cont};
	$value='' unless defined $value;
      } else {
	$value = $c;
      }
      if ($opts->{Solitary}{$name}) {
	if (exists $val{$name}) {
	  warn "Multiple occurences of the child-element '$name'\n";
	}
	$val{$name} = $value
      } elsif (my $key = $opts->{KeyAttr}{$name}) {
	my $val  = delete $c->{$key};
	$c->{-name}=$val;
	$val{$name}{$val} = $value;
      } else {
	push @{$val{$name}}, $value;
      }
      weaken($c->{-parent} = $obj);
    }
    $obj->{'-##'} = $i;
  }
  if (UNIVERSAL::can($obj,'post_process')) {
    $obj->post_process($opts);
  }
  return $obj;
}


1;
__END__
# Below is stub documentation for your module. You'd better edit it!

=head1 NAME

Treex::PML::Schema::Reader - an auxiliary class for parsing PML schemas

=head1 DESCRIPTION

This class is used in the Treex::PML::Schema->new constructor to acutally parse
the XML representation of a PML schema into Perl data structures. It
is a simple, faster, and much more extensible replacement for
XML::Simple. Treex::PML::Schema::Reader uses XML::LibXML::Reader for XML
parsing.


=head1 SEE ALSO

L<Treex::PML::Schema::XMLNode>, L<Treex::PML::Schema>, L<XML::LibXML::Reader>, L<XML::Simple>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2008-2010 by Petr Pajas

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.2 or,
at your option, any later version of Perl 5 you may have available.

=head1 BUGS

None reported... yet.

=cut