#!/usr/bin/perl
use strict;
use warnings;
# DO NOT RELY ON THIS AS A REAL XML PARSER!
# It is not intended to be used actually as an XML parser, simply to stand as
# an example of how you might use Parser::MGC to parse an XML-like syntax
# There are a great many things it doesn't do correctly; it lacks at least the
# following features:
# Entities
# Processing instructions
# Comments
# CDATA
package XmlParser;
use base qw( Parser::MGC );
sub parse
{
my $self = shift;
my $rootnode = $self->parse_node;
$rootnode->kind eq "element" or die "Expected XML root node";
$rootnode->name eq "xml" or die "Expected XML root node";
return [ $rootnode->children ];
}
sub parse_node
{
my $self = shift;
# A "node" is either an XML element subtree or plaintext
$self->any_of(
\&parse_plaintext,
\&parse_element,
);
}
sub parse_plaintext
{
my $self = shift;
my $str = $self->substring_before( '<' );
$self->fail( "No plaintext" ) unless length $str;
return XmlParser::Node::Plain->new( $str );
}
sub parse_element
{
my $self = shift;
my $tag = $self->parse_tag;
$self->commit;
my $node = bless [ node => $tag->{name}, $tag->{attrs} ], "XmlParser::Node";
return XmlParser::Node::Element->new( $tag->{name}, $tag->{attrs} ) if $tag->{selfclose};
my $childlist = $self->sequence_of( \&parse_node );
$self->parse_close_tag->{name} eq $tag->{name}
or $self->fail( "Expected $tag->{name} to be closed" );
return XmlParser::Node::Element->new( $tag->{name}, $tag->{attrs}, @$childlist );
}
sub parse_tag
{
my $self = shift;
$self->expect( '<' );
my $tagname = $self->token_ident;
my @attrs = @{ $self->sequence_of( \&parse_tag_attr ) };
my $selfclose = $self->maybe_expect( '/' );
$self->expect( '>' );
return {
name => $tagname,
attrs => { map { ( $_->[0], $_->[1] ) } @attrs },
selfclose => $selfclose,
};
}
sub parse_close_tag
{
my $self = shift;
$self->expect( '</' );
my $tagname = $self->token_ident;
$self->expect( '>' );
return { name => $tagname };
}
sub parse_tag_attr
{
my $self = shift;
my $attrname = $self->token_ident;
$self->expect( '=' );
return [ $attrname => $self->parse_tag_attr_value ];
}
sub parse_tag_attr_value
{
my $self = shift;
# TODO: This sucks
return $self->token_string;
}
use Data::Dumper;
if( !caller ) {
my $parser = __PACKAGE__->new;
my $ret = $parser->from_file( \*STDIN );
print Dumper( $ret );
}
package XmlParser::Node;
sub new { my $class = shift; bless [ @_ ], $class }
package XmlParser::Node::Plain;
use base qw( XmlParser::Node );
sub kind { "plain" }
sub text { shift->[0] }
package XmlParser::Node::Element;
use base qw( XmlParser::Node );
sub kind { "element" }
sub name { shift->[0] }
sub attrs { shift->[1] }
sub children { my $self = shift; @{$self}[2..$#$self] }
1;