The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
# $Id: Parser.pm,v 1.26 2008/10/04 14:00:24 gavin Exp $
# Copyright (c) 2003-2008 Gavin Brown. All rights reserved. This program is
# free software; you can redistribute it and/or modify it under the same
# terms as Perl itself.
package Gtk2::Ex::PodViewer::Parser;
use base 'Pod::Parser';
use Carp;
use IO::Scalar;
use vars qw(%ENTITIES $LINK_TEXT_TEMPLATE $GETTEXT);
use Exporter;
use bytes;
use strict;

require 5.8.0;

our @EXPORT_OK = qw(&decode_entities);

# This table is taken near verbatim from Pod::PlainText in Pod::Parser, which
# got it near verbatim from the original Pod::Text.  It is therefore credited
# to Tom Christiansen, and I'm glad I didn't have to write it.  :)  "iexcl" to
# "divide" added by Tim Jenness.
our %ENTITIES	= (
	'amp'		=>	'&',		# ampersand
	'apos'		=>	"'",		# apostrophe
	'lt'		=>	'<',		# left chevron, less-than
	'gt'		=>	'>',		# right chevron, greater-than
	'quot'		=>	'"',		# double quote
	'sol'		=>	'/',		# solidus (forward slash)
	'verbar'	=>	'|',		# vertical bar
	"Aacute"	=>	"\xC1",		# capital A, acute accent
	"aacute"	=>	"\xE1",		# small a, acute accent
	"Acirc"		=>	"\xC2",		# capital A, circumflex accent
	"acirc"		=>	"\xE2",		# small a, circumflex accent
	"AElig"		=>	"\xC6",		# capital AE diphthong (ligature)
	"aelig"		=>	"\xE6",		# small ae diphthong (ligature)
	"Agrave"	=>	"\xC0",		# capital A, grave accent
	"agrave"	=>	"\xE0",		# small a, grave accent
	"Aring"		=>	"\xC5",		# capital A, ring
	"aring"		=>	"\xE5",		# small a, ring
	"Atilde"	=>	"\xC3",		# capital A, tilde
	"atilde"	=>	"\xE3",		# small a, tilde
	"Auml"		=>	"\xC4",		# capital A, dieresis or umlaut mark
	"auml"		=>	"\xE4",		# small a, dieresis or umlaut mark
	"Ccedil"	=>	"\xC7",		# capital C, cedilla
	"ccedil"	=>	"\xE7",		# small c, cedilla
	"Eacute"	=>	"\xC9",		# capital E, acute accent
	"eacute"	=>	"\xE9",		# small e, acute accent
	"Ecirc"		=>	"\xCA",		# capital E, circumflex accent
	"ecirc"		=>	"\xEA",		# small e, circumflex accent
	"Egrave"	=>	"\xC8",		# capital E, grave accent
	"egrave"	=>	"\xE8",		# small e, grave accent
	"ETH"		=>	"\xD0",		# capital Eth, Icelandic
	"eth"		=>	"\xF0",		# small eth, Icelandic
	"Euml"		=>	"\xCB",		# capital E, dieresis or umlaut mark
	"euml"		=>	"\xEB",		# small e, dieresis or umlaut mark
	"Iacute"	=>	"\xCD",		# capital I, acute accent
	"iacute"	=>	"\xED",		# small i, acute accent
	"Icirc"		=>	"\xCE",		# capital I, circumflex accent
	"icirc"		=>	"\xEE",		# small i, circumflex accent
	"Igrave"	=>	"\xCC",		# capital I, grave accent
	"igrave"	=>	"\xEC",		# small i, grave accent
	"Iuml"		=>	"\xCF",		# capital I, dieresis or umlaut mark
	"iuml"		=>	"\xEF",		# small i, dieresis or umlaut mark
	"Ntilde"	=>	"\xD1",		# capital N, tilde
	"ntilde"	=>	"\xF1",		# small n, tilde
	"Oacute"	=>	"\xD3",		# capital O, acute accent
	"oacute"	=>	"\xF3",		# small o, acute accent
	"Ocirc"		=>	"\xD4",		# capital O, circumflex accent
	"ocirc"		=>	"\xF4",		# small o, circumflex accent
	"Ograve"	=>	"\xD2",		# capital O, grave accent
	"ograve"	=>	"\xF2",		# small o, grave accent
	"Oslash"	=>	"\xD8",		# capital O, slash
	"oslash"	=>	"\xF8",		# small o, slash
	"Otilde"	=>	"\xD5",		# capital O, tilde
	"otilde"	=>	"\xF5",		# small o, tilde
	"Ouml"		=>	"\xD6",		# capital O, dieresis or umlaut mark
	"ouml"		=>	"\xF6",		# small o, dieresis or umlaut mark
	"szlig"		=>	"\xDF",		# small sharp s, German (sz ligature)
	"THORN"		=>	"\xDE",		# capital THORN, Icelandic
	"thorn"		=>	"\xFE",		# small thorn, Icelandic
	"Uacute"	=>	"\xDA",		# capital U, acute accent
	"uacute"	=>	"\xFA",		# small u, acute accent
	"Ucirc"		=>	"\xDB",		# capital U, circumflex accent
	"ucirc"		=>	"\xFB",		# small u, circumflex accent
	"Ugrave"	=>	"\xD9",		# capital U, grave accent
	"ugrave"	=>	"\xF9",		# small u, grave accent
	"Uuml"		=>	"\xDC",		# capital U, dieresis or umlaut mark
	"uuml"		=>	"\xFC",		# small u, dieresis or umlaut mark
	"Yacute"	=>	"\xDD",		# capital Y, acute accent
	"yacute"	=>	"\xFD",		# small y, acute accent
	"yuml"		=>	"\xFF",		# small y, dieresis or umlaut mark
	"laquo"		=>	"\xAB",		# left pointing double angle quotation mark
	"lchevron"	=>	"\xAB",		# synonym (backwards compatibility)
	"raquo"		=>	"\xBB",		# right pointing double angle quotation mark
	"rchevron"	=>	"\xBB",		# synonym (backwards compatibility)
	"iexcl"		=>	"\xA1",		# inverted exclamation mark
	"cent"		=>	"\xA2",		# cent sign
	"pound"		=>	"\xA3",		# (UK) pound sign
	"curren"	=>	"\xA4",		# currency sign
	"yen"		=>	"\xA5",		# yen sign
	"brvbar"	=>	"\xA6",		# broken vertical bar
	"sect"		=>	"\xA7",		# section sign
	"uml"		=>	"\xA8",		# diaresis
	"copy"		=>	"\xA9",		# Copyright symbol
	"ordf"		=>	"\xAA",		# feminine ordinal indicator
	"not"		=>	"\xAC",		# not sign
	"shy"		=>	'',		# soft (discretionary) hyphen
	"reg"		=>	"\xAE",		# registered trademark
	"macr"		=>	"\xAF",		# macron, overline
	"deg"		=>	"\xB0",		# degree sign
	"plusmn"	=>	"\xB1",		# plus-minus sign
	"sup2"		=>	"\xB2",		# superscript 2
	"sup3"		=>	"\xB3",		# superscript 3
	"acute"		=>	"\xB4",		# acute accent
	"micro"		=>	"\xB5",		# micro sign
	"para"		=>	"\xB6",		# pilcrow sign	= paragraph sign
	"middot"	=>	"\xB7",		# middle dot	= Georgian comma
	"cedil"		=>	"\xB8",		# cedilla
	"sup1"		=>	"\xB9",		# superscript 1
	"ordm"		=>	"\xBA",		# masculine ordinal indicator
	"frac14"	=>	"\xBC",		# vulgar fraction one quarter
	"frac12"	=>	"\xBD",		# vulgar fraction one half
	"frac34"	=>	"\xBE",		# vulgar fraction three quarters
	"iquest"	=>	"\xBF",		# inverted question mark
	"times"		=>	"\xD7",		# multiplication sign
	"divide"	=>	"\xF7",		# division sign
	"nbsp"		=>	"\x01",		# non-breaking space
);

our $LINK_TEXT_TEMPLATE = '{section} in the {document} manpage';

our $GETTEXT = 0;
eval qq(
	use Locale::gettext;
	$GETTEXT = 1;
);

=pod

=head1 NAME

Gtk2::Ex::PodViewer::Parser - a custom POD Parser for Gtk2::Ex::PodViewer.

=head1 SYNOPSIS

	$Gtk2::Ex::PodViewer::Parser::LINK_TEXT_TEMPLATE = '{section} in the {document} manpage';

	my $parser = Gtk2::Ex::PodViewer::Parser->new(
		buffer	=> $Gtk2TextView->get_buffer,
	);

	$parser->parse_from_file($file);

=head1 DESCRIPTION

Gtk2::Ex::PodViewer::Parser is a custom Pod parser for the Gtk2::Ex::PodViewer widget. You should never need to use it directly.

It is based on L<Pod::Parser>.

=head1 METHODS

=cut

sub new {
	my $package = shift;
	my %args = @_;
	my $parser = $package->SUPER::new;
	$parser->{buffer} = $args{buffer};
	$parser->{iter} = $parser->{buffer}->get_iter_at_offset(0);
	bless($parser, $package);
	return $parser;
}

sub command {
	my ($parser, $command, $paragraph, $line_num) = @_;
	if ($command =~ /^head/i) {
		$paragraph =~ s/[\s\r\n]*$//g;
		my $mark = $parser->{buffer}->create_mark($paragraph, $parser->{iter}, 1);
		push(@{$parser->{marks}}, [$paragraph, $mark, $parser->{iter}]);
		$parser->insert_text($paragraph, $line_num, $command);
		$parser->insert_text("\n\n", $line_num);
	} elsif (lc($command) eq 'item') {
		my $dot = chr(183);
		$paragraph =~ s/\n*$//g;
		if ($paragraph eq '*') {
			$parser->insert_text("$dot ", $line_num, qw(word_wrap bold indented));
		} elsif ($paragraph =~ /^\*\s*/) {
			$paragraph =~ s/^\*\s*//;
			$parser->insert_text("$dot ", $line_num, qw(word_wrap bold indented));
			$parser->insert_text("$paragraph\n\n", $line_num, qw(word_wrap indented));
		} elsif ($paragraph =~ /^\d+$/i) {
			$parser->insert_text("$paragraph ", $line_num, qw(word_wrap bold indented));
		} else {
			$parser->insert_text("$dot ", $line_num, qw(word_wrap bold indented));
			$parser->insert_text("$paragraph\n\n", $line_num, qw(word_wrap indented));
		}
	} elsif ($command !~ /^(pod|cut|for|over|back)$/i) {
		carp("unknown command: '$command' on line $line_num");
		$parser->insert_text($paragraph, $line_num, qw(word_wrap));
	}
}

sub verbatim {
	my ($parser, $paragraph, $line_num) = @_;
	$parser->insert_text($paragraph, $line_num, qw(monospace));
}

sub textblock {
	my ($parser, $paragraph, $line_num) = @_;
	$paragraph =~ s/[\r\n]/ /sg;
	$paragraph .= "\n\n";
	$parser->insert_text($paragraph, $line_num, qw(word_wrap));
}

sub insert_text {
	my ($parser, $paragraph, $line_num, @tags) = @_;

	my %tagnames = (
		I	=> 'italic',
		B	=> 'bold',
		C	=> 'typewriter',
		L	=> 'link',
		F	=> 'italic',
		S	=> 'monospace',
		E	=> 'word_wrap',
		X	=> 'normal',
		Z	=> 'normal',
	);

	$parser->parse_text(
		{
			-expand_ptree => sub {
				my ($parser, $ptree) = @_;

				foreach ($ptree->children) {
					if (ref($_) eq 'Pod::InteriorSequence') {
						my $sequence = $_;
						my $command = $sequence->cmd_name;
						my $text = $sequence->parse_tree->raw_text;

						if ($command eq 'E') {
							$text = $ENTITIES{$text} || $text;

						} elsif ($command eq 'L') {
							push(@{$parser->{links}}, [$text, $parser->{iter}->get_offset]);

							if ($text =~ /\|/) {
								($text, undef) = split(/\|/, $text, 2);
							}

							if ($text =~ /\/[^:]/ && $text !~ /:\/\//) {
								my ($doc, $section) = split(/\//, $text, 2);
								if ($doc eq '') {
									$text = $section;
								} else {
									$text = ($GETTEXT ? gettext($LINK_TEXT_TEMPLATE) : $LINK_TEXT_TEMPLATE);
									$text =~ s/\{section\}/$section/g;
									$text =~ s/\{document\}/$doc/g;
								}
							}

						}
						if (!exists($tagnames{$command})) {
							carp("warning: unknown formatting code '$command'\n");

						} else {
							$parser->{buffer}->insert_with_tags_by_name($parser->{iter}, decode_entities($text), $tagnames{$command}, @tags);

						}

					} else {
						my $text = $_;
						$parser->{buffer}->insert_with_tags_by_name($parser->{iter}, decode_entities($text), @tags);

					}
				}
			}
		},
		$paragraph,
		$line_num
	);

	return 1;
}

sub clear_marks {
	$_[0]->{marks} = [];
	return 1;
}

sub get_marks {
	my @names;
	map { push(@names, @{$_}[0]) } @{$_[0]->{marks} };
	return @names;
}

sub get_mark {
	my ($parser, $name) = @_;
	foreach my $mark (@{$parser->{marks}}) {
		return @{$mark}[1] if (@{$mark}[0] eq $name);
	}
	return undef;
}

sub parse_from_file {
	my ($self, $file) = @_;
	if (!open(FILE, '<:utf8', $file)) {
		carp("Cannot open '$file': $!");
		return undef;

	} else {
		my $data;
		while (<FILE>) {
			$data .= $_;
		}
		close(FILE);
		return $self->parse_from_string($data);
	}
}

=pod

One neat method not implemented by Pod::Parser is

	$parser->parse_from_string($string);

This parses a scalar containing POD data, using IO::Scalar to create a tied filehandle.

=cut

sub parse_from_string {
	my ($self, $string) = @_;
	my $handle = IO::Scalar->new(\$string);
	$self->{_source} = $string;
	$self->parse_from_filehandle($handle);
	$handle->close;
	return 1;
}

=pod

=head1 IMPORTABLE FUNCTIONS

	use Gtk2::Ex::PodViewer::Parser qw(decode_entities);
	my $text = decode_entities($pod);

This function takes a string of POD, and returns it with all the POD entities (eg C<EE<lt>gtE<gt>> =E<gt> "E<gt>") decoded into readable characters.

=cut

sub decode_entities {
	my $text = shift;
	$text =~ s/E<([^<]*)>/$ENTITIES{$1}/g;
	$text =~ s/\w{1}<([^<]*)>/$1/g;
	return $text;
}

sub source {
	my $self = shift;
	return $self->{_source};
}

=pod

=head1 VARIABLES

The C<$LINK_TEXT_TEMPLATE> class variable contains a string that is used to generate link text for POD links for the form

	LE<lt>foo/barE<gt>

This string is run through the C<gettext()> function from L<Locale::gettext> (if installed) before it is used, so if your application supports internationalisation, then the string will be translated if it appears in your translation domain. It contains two tokens, C<{section}> and C<{document}>, that are replaced with C<foo> and C<bar> respectively.

=head1 SEE ALSO

=over

=item *

L<Gtk2::Ex::PodViewer>

=item *

L<Pod::Parser>

=item *

L<Locale::gettext>

=back

=head1 AUTHORS

Gavin Brown, Torsten Schoenfeld and Scott Arrington.

=head1 COPYRIGHT

(c) 2003-2005 Gavin Brown (gavin.brown@uk.com). All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 

=cut

1;