The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# $Id: PerlSAX.pm,v 1.6 2000/02/28 10:40:21 matt Exp $

package XML::XPath::PerlSAX;
use XML::XPath::XMLParser;
use strict;

sub new {
	my $class = shift;
	my %args = @_;
	bless \%args, $class;
}

sub parse {
	my $self = shift;

	die "XML::XPath::PerlSAX: parser instance ($self) already parsing\n"
		if (defined $self->{ParseOptions});

	# If there's one arg and it's an array ref, assume it's a node we're parsing
	my $args;
	if (@_ == 1 && ref($_[0]) =~ /^(text|comment|element|namespace|attribute|pi)$/) {
#		warn "Parsing node\n";
		my $node = shift;
#		warn "PARSING: $node ", XML::XPath::XMLParser::as_string($node), "\n\n";
		$args = { Source => { Node => $node } };
	}
	else {
		$args = (@_ == 1) ? shift : { @_ };
	}

	my $parse_options = { %$self, %$args };
	$self->{ParseOptions} = $parse_options;

	# ensure that we have at least one source
	if (!defined $parse_options->{Source} ||
		!defined $parse_options->{Source}{Node}) {
		die "XML::XPath::PerlSAX: no source defined for parse\n";
	}

	# assign default Handler to any undefined handlers
	if (defined $parse_options->{Handler}) {
		$parse_options->{DocumentHandler} = $parse_options->{Handler}
			if (!defined $parse_options->{DocumentHandler});
	}

	# ensure that we have a DocumentHandler
	if (!defined $parse_options->{DocumentHandler}) {
		die "XML::XPath::PerlSAX: no Handler or DocumentHandler defined for parse\n";
	}

	# cache DocumentHandler in self for callbacks
	$self->{DocumentHandler} = $parse_options->{DocumentHandler};

	if ((ref($parse_options->{Source}{Node}) eq 'element') &&
			!($parse_options->{Source}{Node}->[node_parent])) {
		# Got root node
		$self->{DocumentHandler}->start_document( { } );
		$self->parse_node($parse_options->{Source}{Node});
		return $self->{DocumentHandler}->end_document( { } );
	}
	else {
		$self->parse_node($parse_options->{Source}{Node});
	}

	# clean up parser instance
	delete $self->{ParseOptions};
	delete $self->{DocumentHandler};

}

sub parse_node {
	my $self = shift;
	my $node = shift;
#	warn "parse_node $node\n";
	if (ref($node) eq 'element' && $node->[node_parent]) {
		# bundle up attributes
		my @attribs;
		foreach my $attr (@{$node->[node_attribs]}) {
			if ($attr->[node_prefix]) {
				push @attribs, $attr->[node_prefix] . ":" . $attr->[node_key];
			}
			else {
				push @attribs, $attr->[node_key];
			}
			push @attribs, $attr->[node_value];
		}
		
		$self->{DocumentHandler}->start_element(
				{ Name => $node->[node_name],
				  Attributes => \@attribs,
				}
			);
		foreach my $kid (@{$node->[node_children]}) {
			$self->parse_node($kid);
		}
		$self->{DocumentHandler}->end_element(
				{
					Name => $node->[node_name],
				}
			);
	}
	elsif (ref($node) eq 'text') {
		$self->{DocumentHandler}->characters($node->[node_text]);
	}
	elsif (ref($node) eq 'comment') {
		$self->{DocumentHandler}->comment($node->[node_comment]);
	}
	elsif (ref($node) eq 'pi') {
		$self->{DocumentHandler}->processing_instruction(
				{
					Target => $node->[node_target],
					Data => $node->[node_data]
				}
			);
	}
	elsif (ref($node) eq 'element') { # root node
		# just do kids
		foreach my $kid (@{$node->[node_children]}) {
			$self->parse_node($kid);
		}
	}
	else {
		die "Unknown node type: '", ref($node), "' ", scalar(@$node), "\n";
	}
}

1;

__END__

=head1 NAME

XML::XPath::PerlSAX - A PerlSAX event generator for my wierd node structure

=head1 SYNOPSIS

	use XML::XPath;
	use XML::XPath::PerlSAX;
	use XML::DOM::PerlSAX;
	
	my $xp = XML::XPath->new(filename => 'test.xhtml');
	my $paras = $xp->find('/html/body/p');
	
	my $handler = XML::DOM::PerlSAX->new();
	my $generator = XML::XPath::PerlSAX->new( Handler => $handler );
	
	foreach my $node ($paras->get_nodelist) {
		my $domtree = $generator->parse($node);
		# do something with $domtree
	}

=head1 DESCRIPTION

This module generates PerlSAX events to pass to a PerlSAX handler such
as XML::DOM::PerlSAX. It operates specifically on my wierd tree format.

Unfortunately SAX doesn't seem to cope with namespaces, so these are
lost completely. I believe SAX2 is doing namespaces.

=head1 Other

The XML::DOM::PerlSAX handler I tried was completely broken (didn't even
compile before I patched it a bit), so I don't know how correct this
is or how far it will work.

This software may only be distributed as part of the XML::XPath package.