##############################################################################
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# Boston, MA 02111-1307, USA.
#
# Jabber
# Copyright (C) 1998-2004 Jabber Software Foundation http://jabber.org/
#
##############################################################################
package XML::Stream::Parser;
=head1 NAME
XML::Stream::Parser - SAX XML Parser for XML Streams
=head1 SYNOPSIS
Light weight XML parser that builds XML::Parser::Tree objects from the
incoming stream and passes them to a function to tell whoever is using
it that there are new packets.
=head1 DESCRIPTION
This module provides a very light weight parser
=head1 METHODS
=head1 EXAMPLES
=head1 AUTHOR
By Ryan Eatmon in January of 2001 for http://jabber.org/
=head1 COPYRIGHT
This module is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut
use strict;
use vars qw( $VERSION );
$VERSION = "1.22";
sub new
{
my $self = { };
bless($self);
my %args;
while($#_ >= 0) { $args{ lc pop(@_) } = pop(@_); }
$self->{PARSING} = 0;
$self->{DOC} = 0;
$self->{XML} = "";
$self->{CNAME} = ();
$self->{CURR} = 0;
$args{nonblocking} = 0 unless exists($args{nonblocking});
$self->{NONBLOCKING} = delete($args{nonblocking});
$self->{DEBUGTIME} = 0;
$self->{DEBUGTIME} = $args{debugtime} if exists($args{debugtime});
$self->{DEBUGLEVEL} = 0;
$self->{DEBUGLEVEL} = $args{debuglevel} if exists($args{debuglevel});
$self->{DEBUGFILE} = "";
if (exists($args{debugfh}) && ($args{debugfh} ne ""))
{
$self->{DEBUGFILE} = $args{debugfh};
$self->{DEBUG} = 1;
}
if ((exists($args{debugfh}) && ($args{debugfh} eq "")) ||
(exists($args{debug}) && ($args{debug} ne "")))
{
$self->{DEBUG} = 1;
if (lc($args{debug}) eq "stdout")
{
$self->{DEBUGFILE} = new FileHandle(">&STDERR");
$self->{DEBUGFILE}->autoflush(1);
}
else
{
if (-e $args{debug})
{
if (-w $args{debug})
{
$self->{DEBUGFILE} = new FileHandle(">$args{debug}");
$self->{DEBUGFILE}->autoflush(1);
}
else
{
print "WARNING: debug file ($args{debug}) is not writable by you\n";
print " No debug information being saved.\n";
$self->{DEBUG} = 0;
}
}
else
{
$self->{DEBUGFILE} = new FileHandle(">$args{debug}");
if (defined($self->{DEBUGFILE}))
{
$self->{DEBUGFILE}->autoflush(1);
}
else
{
print "WARNING: debug file ($args{debug}) does not exist \n";
print " and is not writable by you.\n";
print " No debug information being saved.\n";
$self->{DEBUG} = 0;
}
}
}
}
$self->{SID} = exists($args{sid}) ? $args{sid} : "__xmlstream__:sid";
$self->{STYLE} = (exists($args{style}) ? lc($args{style}) : "tree");
$self->{DTD} = (exists($args{dtd}) ? lc($args{dtd}) : 0);
if ($self->{STYLE} eq "tree")
{
$self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); };
$self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); };
$self->{HANDLER}->{startElement} = sub{ &XML::Stream::Tree::_handle_element(@_); };
$self->{HANDLER}->{endElement} = sub{ &XML::Stream::Tree::_handle_close(@_); };
$self->{HANDLER}->{characters} = sub{ &XML::Stream::Tree::_handle_cdata(@_); };
}
elsif ($self->{STYLE} eq "node")
{
$self->{HANDLER}->{startDocument} = sub{ $self->startDocument(@_); };
$self->{HANDLER}->{endDocument} = sub{ $self->endDocument(@_); };
$self->{HANDLER}->{startElement} = sub{ &XML::Stream::Node::_handle_element(@_); };
$self->{HANDLER}->{endElement} = sub{ &XML::Stream::Node::_handle_close(@_); };
$self->{HANDLER}->{characters} = sub{ &XML::Stream::Node::_handle_cdata(@_); };
}
$self->setHandlers(%{$args{handlers}});
$self->{XMLONHOLD} = "";
return $self;
}
###########################################################################
#
# debug - prints the arguments to the debug log if debug is turned on.
#
###########################################################################
sub debug
{
return if ($_[1] > $_[0]->{DEBUGLEVEL});
my $self = shift;
my ($limit,@args) = @_;
return if ($self->{DEBUGFILE} eq "");
my $fh = $self->{DEBUGFILE};
if ($self->{DEBUGTIME} == 1)
{
my ($sec,$min,$hour) = localtime(time);
print $fh sprintf("[%02d:%02d:%02d] ",$hour,$min,$sec);
}
print $fh "XML::Stream::Parser: $self->{STYLE}: @args\n";
}
sub setSID
{
my $self = shift;
my $sid = shift;
$self->{SID} = $sid;
}
sub getSID
{
my $self = shift;
return $self->{SID};
}
sub setHandlers
{
my $self = shift;
my (%handlers) = @_;
foreach my $handler (keys(%handlers))
{
$self->{HANDLER}->{$handler} = $handlers{$handler};
}
}
sub parse
{
my $self = shift;
my $xml = shift;
return unless defined($xml);
return if ($xml eq "");
if ($self->{XMLONHOLD} ne "")
{
$self->{XML} = $self->{XMLONHOLD};
$self->{XMLONHOLD} = "";
}
# XXX change this to not use regex?
while($xml =~ s/<\!--.*?-->//gs) {}
$self->{XML} .= $xml;
return if ($self->{PARSING} == 1);
$self->{PARSING} = 1;
if(!$self->{DOC} == 1)
{
my $start = index($self->{XML},"<");
if ((substr($self->{XML},$start,3) eq "<?x") ||
(substr($self->{XML},$start,3) eq "<?X"))
{
my $close = index($self->{XML},"?>");
if ($close == -1)
{
$self->{PARSING} = 0;
return;
}
$self->{XML} = substr($self->{XML},$close+2,length($self->{XML})-$close-2);
}
&{$self->{HANDLER}->{startDocument}}($self);
$self->{DOC} = 1;
}
while(1)
{
if (length($self->{XML}) == 0)
{
$self->{PARSING} = 0;
return $self->returnData(0);
}
my $eclose = -1;
$eclose = index($self->{XML},"</".$self->{CNAME}->[$self->{CURR}].">")
if ($#{$self->{CNAME}} > -1);
if ($eclose == 0)
{
$self->{XML} = substr($self->{XML},length($self->{CNAME}->[$self->{CURR}])+3,length($self->{XML})-length($self->{CNAME}->[$self->{CURR}])-3);
$self->{PARSING} = 0 if ($self->{NONBLOCKING} == 1);
&{$self->{HANDLER}->{endElement}}($self,$self->{CNAME}->[$self->{CURR}]);
$self->{PARSING} = 1 if ($self->{NONBLOCKING} == 1);
$self->{CURR}--;
if ($self->{CURR} == 0)
{
$self->{DOC} = 0;
$self->{PARSING} = 0;
&{$self->{HANDLER}->{endDocument}}($self);
return $self->returnData(0);
}
next;
}
my $estart = index($self->{XML},"<");
my $cdatastart = index($self->{XML},"<![CDATA[");
if (($estart == 0) && ($cdatastart != 0))
{
my $close = index($self->{XML},">");
if ($close == -1)
{
$self->{PARSING} = 0;
return $self->returnData(0);
}
my $empty = (substr($self->{XML},$close-1,1) eq "/");
my $starttag = substr($self->{XML},1,$close-($empty ? 2 : 1));
my $nextspace = index($starttag," ");
my $attribs;
my $name;
if ($nextspace != -1)
{
$name = substr($starttag,0,$nextspace);
$attribs = substr($starttag,$nextspace+1,length($starttag)-$nextspace-1);
}
else
{
$name = $starttag;
}
my %attribs = $self->attribution($attribs);
if (($self->{DTD} == 1) && (exists($attribs{xmlns})))
{
}
&{$self->{HANDLER}->{startElement}}($self,$name,%attribs);
if($empty == 1)
{
&{$self->{HANDLER}->{endElement}}($self,$name);
}
else
{
$self->{CURR}++;
$self->{CNAME}->[$self->{CURR}] = $name;
}
$self->{XML} = substr($self->{XML},$close+1,length($self->{XML})-$close-1);
next;
}
if ($cdatastart == 0)
{
my $cdataclose = index($self->{XML},"]]>");
if ($cdataclose == -1)
{
$self->{PARSING} = 0;
return $self->returnData(0);
}
&{$self->{HANDLER}->{characters}}($self,substr($self->{XML},9,$cdataclose-9));
$self->{XML} = substr($self->{XML},$cdataclose+3,length($self->{XML})-$cdataclose-3);
next;
}
if ($estart == -1)
{
$self->{XMLONHOLD} = $self->{XML};
$self->{XML} = "";
}
elsif (($cdatastart == -1) || ($cdatastart > $estart))
{
&{$self->{HANDLER}->{characters}}($self,$self->entityCheck(substr($self->{XML},0,$estart)));
$self->{XML} = substr($self->{XML},$estart,length($self->{XML})-$estart);
}
}
}
sub attribution
{
my $self = shift;
my $str = shift;
$str = "" unless defined($str);
my %attribs;
while(1)
{
my $eq = index($str,"=");
if((length($str) == 0) || ($eq == -1))
{
return %attribs;
}
my $ids;
my $id;
my $id1 = index($str,"\'");
my $id2 = index($str,"\"");
if((($id1 < $id2) && ($id1 != -1)) || ($id2 == -1))
{
$ids = $id1;
$id = "\'";
}
if((($id2 < $id1) && ($id1 == -1)) || ($id2 != -1))
{
$ids = $id2;
$id = "\"";
}
my $nextid = index($str,$id,$ids+1);
my $val = substr($str,$ids+1,$nextid-$ids-1);
my $key = substr($str,0,$eq);
while($key =~ s/\s//) {}
$attribs{$key} = $self->entityCheck($val);
$str = substr($str,$nextid+1,length($str)-$nextid-1);
}
return %attribs;
}
sub entityCheck
{
my $self = shift;
my $str = shift;
while($str =~ s/\<\;/\</) {}
while($str =~ s/\>\;/\>/) {}
while($str =~ s/\"\;/\"/) {}
while($str =~ s/\&apos\;/\'/) {}
while($str =~ s/\&\;/\&/) {}
return $str;
}
sub parsefile
{
my $self = shift;
my $fileName = shift;
open(FILE,"<",$fileName);
my $file;
while(<FILE>) { $file .= $_; }
$self->parse($file);
close(FILE);
return $self->returnData();
}
sub returnData
{
my $self = shift;
my $clearData = shift;
$clearData = 1 unless defined($clearData);
my $sid = $self->{SID};
if ($self->{STYLE} eq "tree")
{
return unless exists($self->{SIDS}->{$sid}->{tree});
my @tree = @{$self->{SIDS}->{$sid}->{tree}};
delete($self->{SIDS}->{$sid}->{tree}) if ($clearData == 1);
return ( \@tree );
}
if ($self->{STYLE} eq "node")
{
return unless exists($self->{SIDS}->{$sid}->{node});
my $node = $self->{SIDS}->{$sid}->{node}->[0];
delete($self->{SIDS}->{$sid}->{node}) if ($clearData == 1);
return $node;
}
}
sub startDocument
{
my $self = shift;
}
sub endDocument
{
my $self = shift;
}
sub startElement
{
my $self = shift;
my ($sax, $tag, %att) = @_;
return unless ($self->{DOC} == 1);
if ($self->{STYLE} eq "debug")
{
print "$self->{DEBUGHEADER} \\\\ (",join(" ",%att),")\n";
$self->{DEBUGHEADER} .= $tag." ";
}
else
{
my @NEW;
if($#{$self->{TREE}} < 0)
{
push @{$self->{TREE}}, $tag;
}
else
{
push @{ $self->{TREE}[ $#{$self->{TREE}}]}, $tag;
}
push @NEW, \%att;
push @{$self->{TREE}}, \@NEW;
}
}
sub characters
{
my $self = shift;
my ($sax, $cdata) = @_;
return unless ($self->{DOC} == 1);
if ($self->{STYLE} eq "debug")
{
my $str = $cdata;
$str =~ s/\n/\#10\;/g;
print "$self->{DEBUGHEADER} || $str\n";
}
else
{
return if ($#{$self->{TREE}} == -1);
my $pos = $#{$self->{TREE}};
if ($pos > 0 && $self->{TREE}[$pos - 1] eq "0")
{
$self->{TREE}[$pos - 1] .= $cdata;
}
else
{
push @{$self->{TREE}[$#{$self->{TREE}}]}, 0;
push @{$self->{TREE}[$#{$self->{TREE}}]}, $cdata;
}
}
}
sub endElement
{
my $self = shift;
my ($sax, $tag) = @_;
return unless ($self->{DOC} == 1);
if ($self->{STYLE} eq "debug")
{
$self->{DEBUGHEADER} =~ s/\S+\ $//;
print "$self->{DEBUGHEADER} //\n";
}
else
{
my $CLOSED = pop @{$self->{TREE}};
if($#{$self->{TREE}} < 1)
{
push @{$self->{TREE}}, $CLOSED;
if($self->{TREE}->[0] eq "stream:error")
{
$self->{STREAMERROR} = $self->{TREE}[1]->[2];
}
}
else
{
push @{$self->{TREE}[$#{$self->{TREE}}]}, $CLOSED;
}
}
}
1;