# -*- perl -*-
#############################################################################
# Pod/objects.pm -- objects representing POD
#
# Copyright (C) 2001 by Marek Rouchal. All rights reserved.
# This package is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#############################################################################
use strict;
package Pod::objects;
# for CPAN
$Pod::objects::VERSION = '1.02';
=head1 NAME
Pod::objects - package with objects for representing POD documents
=head1 SYNOPSIS
require Pod::objects;
my $root = Pod::root->new;
=head1 DESCRIPTION
The following section describes the objects returned by
L<Pod::Compiler|Pod::Compiler> and their methods. These objects all
inherit from L<Tree::DAG_Node|Tree::DAG_Node>, so all methods described
there are valid as well.
The set/retrieve methods all work in the following way: If no argument
is specified, the corresponding value is returned. Otherwise the
object's value is set to the given argument and returned.
=head2 Common methods
The following methods are common for all the classes:
=over 4
=item class->B<new>( @parameters )
Create a new object instance of C<class>. See the individual classes for
valid parameters.
=cut
# base class for all POD objects
package Pod::_obj;
require Tree::DAG_Node;
@Pod::_obj::ISA = qw(Tree::DAG_Node);
sub new
{
my $this = shift;
my $class = ref($this) || $this;
my @params = @_;
my $self = $class->SUPER::new;
bless $self, $class;
$self->initialize(@params);
return $self;
}
# stub to be overridden
sub initialize
{
1;
}
=item $obj->B<line>( $num )
Store/retrieve the line number where this object occurred in the source
file. Sets and returns the new value if I<$num> is defined and simply
returns the value otherwise.
=cut
sub line
{
return (@_ > 1) ? ($_[0]->{_line} = $_[1]) : $_[0]->{_line};
}
=item $obj->B<as_pod>()
This method returns the object in POD syntax, including its child
objects. Basically this gives you the same code as in the input file.
=item $obj->B<contents_as_pod>()
Return this object's children in POD format.
=cut
sub contents_as_pod($)
{
shift->_as_pod;
}
sub _as_pod($;$$)
{
my ($self,$first,$last) = @_;
my $str = defined $first ? $first : '';
foreach($self->daughters) {
$str .= $_->as_pod;
}
$str .= defined $last ? $last : '';
$str;
}
=item $obj->B<as_text>()
This method returns the object as simple text in ISO-8859-1 encoding.
All POD markup is discarded.
=item $obj->B<contents_as_text>()
Return this object's children as simple text in ISO-8859-1 encoding.
This strips all POD markup.
=back
=cut
sub contents_as_text($)
{
my $text = $_[0]->_as_text;
$text =~ s/^\s+|\s+$//sg;
$text =~ s/\s+/ /sg;
$text;
}
sub _as_text($$)
{
my ($self,$first,$last) = @_;
my $text = $first || '';
foreach($self->daughters) {
$text .= $_->as_text;
}
$text .= $last || '';
$text;
}
##############################################################################
=head2 Pod::root
This object represents the root of the POD document and thus serves
mainly as a storage for the following classes. It inherits from
L<Storable|Storable>, so that it can easily be stored to and retrived
from the file system.
=over 4
=item Pod::root->B<new>( %params )
The creation method takes an optional argument C<-linelength =E<gt>
num>. If this is set to a non-zero value, the B<as_pod> method of
B<Pod::para> will reformat the POD to use as much of each line up
to I<num> characters.
=cut
package Pod::root;
require Storable;
@Pod::root::ISA = qw(Pod::_obj Storable);
use Carp;
sub initialize
{
my ($self, %params) = @_;
map { $self->{$_} = $params{$_} } keys %params;
$self->{_nodes} = Pod::node::collection->new;
$self->{_line} = 0;
1;
}
sub as_pod($)
{
my $self = shift;
my $last = ($self->daughters)[-1];
$self->_as_pod().
($last && !$last->isa('Pod::perlcode') ? "=cut\n" : '');
}
sub as_text($)
{
shift->_as_text('');
}
=item $root->B<store>( $file )
=item $root->B<store_fd>( $filehandle )
Store the object (and all its contents) to the given file name/handle using
L<Storable|Storable> for subsequent retrieval.
=item Pod::root->B<read>( $file )
=item Pod::root->B<read_fd>( $filehandle )
Read the given file/handle into a B<Pod::root> object. I<$file> must contain a
B<Pod::root> object stored by L<Storable|Storable>, otherwise a fatal
error is raised.
=cut
sub read($$)
{
my ($class,$file) = @_;
my $obj = Storable::retrieve($file);
# check obj against class
if(!$obj || !$obj->isa($class)) {
$obj ||= 'undef';
croak "Fatal: file '$file' contains '$obj' instead of '$class'\n";
}
$obj;
}
sub read_fd($$)
{
my ($class,$handle) = @_;
my $obj = Storable::fd_retrieve($handle);
# check obj against class
if(!$obj || !$obj->isa($class)) {
$obj ||= 'undef';
croak "Fatal: handle '$handle' contains '$obj' instead of '$class'\n";
}
$obj;
}
=item $root->B<nodes>()
Retrieves this POD's node collection, which is maintained as a
B<Pod::node::collection> object.
=cut
sub nodes
{
return (@_ > 1) ? ($_[0]->{_nodes} = $_[1]) : $_[0]->{_nodes};
}
=item $root->B<errors>()
Returns the number of errors occured during parsing.
=cut
sub errors
{
return (@_ > 1) ? ($_[0]->{_errors} = $_[1]) : $_[0]->{_errors};
}
=item $root->B<warnings>()
Returns the number of warnings occured during parsing.
=cut
sub warnings
{
return (@_ > 1) ? ($_[0]->{_errors} = $_[1]) : $_[0]->{_errors};
}
=item $root->B<links>()
Returns all B<Pod::link> objects within the parsed document.
=back
=cut
sub links
{
my $self = shift;
my $links = [];
$self->walk_down({ callback => \&_get_links, _links => $links });
@$links;
}
sub _get_links
{
my ($node,$href) = @_;
if($node->isa("Pod::link")) {
push(@{$href->{_links}}, $node);
return 0;
}
1;
}
##############################################################################
=head2 Pod::perlcode
If the B<-perlcode> option was true for B<Pod::Compiler>, then these
objects represent Perl code blocks in the parsed file.
The B<as_text> method returns the empty string, B<as_pod> the contents.
=cut
package Pod::perlcode;
@Pod::perlcode::ISA = qw(Pod::_obj);
=over 4
=item Pod::perlcode->B<new>( $text )
The B<new> constructor takes an optional parameter, which is the string
representing the code block. This string may contain newlines.
=cut
sub initialize
{
my ($self,$text) = @_;
$self->{_contents} = defined $text ? $text : '';
1;
}
=item $perlcode->B<contents>( $string )
This methods sets/retrieves the code block.
=cut
sub contents
{
return (@_ > 1) ? ($_[0]->{_contents} = $_[1]) : $_[0]->{_contents};
}
=item $perlcode->B<append>( $code )
This methods appends I<$code> to the contents.
=back
=cut
sub append
{
my ($self,$text) = @_;
if(defined $text) {
$self->{_contents} .= $text;
}
$self->{_contents};
}
sub as_pod($)
{
my $self = shift;
my $pre = $self->left_sister;
if($pre && !$pre->isa('Pod::perlcode')) {
return "=cut\n\n".$self->{_contents};
}
$self->{_contents};
}
sub as_text($)
{
# not applicable
'';
}
##############################################################################
=head2 Pod::para
This represents a simple text paragraph. See B<Pod::root> above for an
option that forces the reformatting of the POD code of a paragraph to a
certain line length. Forced line breaks, i.e. a newline followed by
whitespace is I<not> affected.
=cut
package Pod::para;
@Pod::para::ISA = qw(Pod::_obj);
sub as_pod($)
{
my $self = shift;
my $pre = $self->left_sister;
my $mom = $self->mother;
# print =pod if
# - no preceding node and mother is not a list
# - preceding node
my $p = '';
if(!$pre && (!$mom || !$mom->isa('Pod::clist'))) {
$p = "=pod\n\n";
}
if($pre && $pre->isa('Pod::perlcode')) {
$p = "=pod\n\n";
}
my $root = $self->root;
my $maxlen;
if($root && ($maxlen = $root->{-linelength})) {
my @chunks = split(/[ \t]*\n([ \t]+)/, $self->_as_pod());
unshift @chunks, '';
my $pod = '';
do {
my ($prefix,$chunk) = splice(@chunks,0,2);
$chunk =~ s/\s+/ /gs;
$chunk = $prefix.$chunk;
$chunk =~ s/(.{1,$maxlen})( |$)/$1.(length $2 ? "\n" : '')/sge;
$pod .= $chunk."\n";
} while(@chunks);
return "$pod\n";
}
$self->_as_pod($p)."\n\n";
}
sub as_text($)
{
my $self = shift;
$self->_as_text('');
}
##############################################################################
=head2 Pod::verbatim
This represents a verbatim paragraph, i.e. a block that has leading
whitespace on its first line.
=cut
package Pod::verbatim;
@Pod::verbatim::ISA = qw(Pod::_obj);
sub initialize
{
shift->{_content} = [];
}
=over 4
=item $verbatim->B<addline>( $line )
Adds one line to this verbatim paragraph. I<$line> should not
contain carriage returns nor newlines.
=cut
sub addline($$)
{
push(@{$_[0]->{_content}}, $_[1]);
}
=item $verbatim->B<content>( @lines )
Set this verbatim paragraph's contents to I<@lines>. If I<@lines> is
omitted, this method simply returns the current contents, i.e. an array
of strings that reresent the individual lines.
The contents can be cleared completely by saying
C<$verbatim-E<gt>content( undef )>.
=back
=cut
sub content($@)
{
my $self = shift;
if(@_) {
if(defined $_[0]) {
@{$self->{_content}} = @_;
} else {
$self->{_content} = [];
}
}
@{$self->{_content}};
}
sub as_pod($)
{
my $self = shift;
my $pre = $self->left_sister;
( (!$pre || $pre->isa('Pod::perlcode')) ? "=pod\n\n" : '').
join("\n",$self->content)."\n\n";
}
sub as_text($)
{
my $self = shift;
join("\n",$self->content);
}
##############################################################################
=head2 Pod::head
This class represents C<=headX> directives. The B<new> method accepts a
single argument which denotes the heading level, default is 1.
=cut
package Pod::head;
@Pod::head::ISA = qw(Pod::_obj);
sub initialize
{
$_[0]->{_level} = $_[1] || 1;
}
=over 4
=item $head->B<level>( $num )
This sets/retrieves the heading level. Officially supported are only 1
and 2, but higher numbers are not rejected here.
=cut
sub level($$)
{
return (@_ > 1) ? ($_[0]->{_level} = $_[1]) : $_[0]->{_level};
}
=item $head->B<node>( $node )
This sets/retrieves the heading's node information. I<$node> and the
return value are instances of B<Pod::node> or I<undef>.
=cut
sub node($$)
{
return (@_ > 1) ? ($_[0]->{_node} = $_[1]) : $_[0]->{_node};
}
=item $head->B<nodeid>()
This retrieves the heading's node id from B<node> above. Just a shortcut
for C<$head-E<gt>node-E<gt>id>, and safe in case the node is not
defined. Returns the id string or I<undef>.
=cut
sub nodeid($)
{
my $self = shift;
$self->{_node} ? $self->{_node}->id : undef;
}
=item $head->B<nodetext>()
This retrieves the heading's node text from B<node> above. Just a shortcut
for C<$head-E<gt>node-E<gt>text>, and safe in case the node is not
defined. Returns the node text or I<undef>. The node text is derived
from what comes after C<=item>, stripping C<*> (bullets) and C<\d.?>
(numbers) as well as all POD markup. The result is what POD links can
link to from other documents.
=back
=cut
sub nodetext($)
{
my $self = shift;
$self->{_node} ? $self->{_node}->text : undef;
}
sub as_pod($)
{
my $self = shift;
$self->_as_pod("=head".$self->level." ","\n\n");
}
sub as_text($)
{
my $self = shift;
$self->_as_text('');
}
##############################################################################
=head2 Pod::clist
This stores everything that is enclosed by C<=over ... =back>. Note that
such a brace may not span C<=head>s.
=cut
package Pod::clist;
@Pod::clist::ISA = qw(Pod::_obj);
sub initialize
{
$_[0]->{_auto} = 0;
$_[0]->{_type} = '';
$_[0]->{_indent} = 4;
}
=over 4
=item $list->B<autoopen>( $flag )
This sets/retrieves the I<autoopen> property. A list gets this property
when the parser encounters an C<=item> without a previous C<=over>. The
parser then opens a (implicit) list which has the I<autoopen> property
set to true.
=cut
sub autoopen($$)
{
return (@_ > 1) ? ($_[0]->{_auto} = $_[1]) : $_[0]->{_auto};
}
=item $list->B<indent>( $num )
This sets/retrieves the indent level of the list, i.e. the value that
follows C<=over>. Default is 4.
=cut
sub indent($$)
{
return (@_ > 1) ? ($_[0]->{_indent} = $_[1]) : $_[0]->{_indent};
}
=item $list->B<type>( $string )
This sets/retrieves the list type. The parser tries to guess this type
from the C<=item>s it encounters. The three possible types are
C<bullet>, C<number>, and C<definition>. In case of doubt, C<definition>
wins.
=cut
sub type($$)
{
return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type};
}
=item $list->B<has_items>()
This retrieves the number of C<=item>s in this list.
=back
=cut
sub has_items
{
scalar(grep($_->isa('Pod::item'),shift->daughters));
}
sub as_pod($)
{
my $self = shift;
$self->_as_pod("=over ".$self->indent."\n\n","=back\n\n")
}
sub as_text($)
{
my $self = shift;
$self->_as_text('');
}
##############################################################################
=head2 Pod::item
This stores a list's C<=item>.
=cut
package Pod::item;
@Pod::item::ISA = qw(Pod::_obj);
sub initialize
{
shift->{_prefix} = '';
}
=over 4
=item $item->B<prefix>( $string )
This sets the item's prefix. A prefix can be either 'C<*>' or 'C<o>' in
case of a bullet list or a number, optionally followed by a dot for
numbered lists. This is stored separately because links to such nodes do
not contain the prefix.
In case of a numbered list this method returns subsequent numbers for
each item independent of what was parsed.
=cut
sub prefix
{
return (@_ > 1) ? ($_[0]->{_prefix} = $_[1]) : $_[0]->{_prefix};
}
=item $item->B<node>()
=item $item->B<nodeid>()
=item $item->B<nodetext>()
See L<"Pod::head"> for the description of these methods.
=back
=cut
sub node($$)
{
return (@_ > 1) ? ($_[0]->{_node} = $_[1]) : $_[0]->{_node};
}
sub nodeid($)
{
my $self = shift;
$self->{_node} ? $self->{_node}->id : undef;
}
sub nodetext($)
{
my $self = shift;
$self->{_node} ? $self->{_node}->text : undef;
}
sub _prefix
{
my $self = shift;
my $prefix = $self->{_prefix};
my $mum = $self->mother;
if($mum) {
my $type = $mum->type;
if($type =~ /^bullet/) {
$prefix = '*';
}
elsif($type =~ /^number/) {
my $num = scalar(grep($_->isa('Pod::item'), $self->left_sisters))+1;
$prefix = "$num.";
}
else { # definition
$prefix = 'Z<>'.$prefix if(length $prefix);
}
}
$prefix;
}
sub _nodetext
{
my $self = shift;
my $text = $self->contents_as_text();
my $mum = $self->mother;
my $pf = '';
if(!$mum || $mum->type() =~ /^definition/) {
$pf = $self->{_prefix};
}
$pf.(length($pf) && length($text) ? ' ' : '').$text;
}
sub as_pod($)
{
my $self = shift;
my $prefix = $self->_prefix();
my $contents = $self->contents_as_pod || '';
'=item'.($prefix?" $prefix":'').($contents?" $contents":'')."\n\n";
}
sub as_text($)
{
my $self = shift;
my $prefix = $self->_prefix();
$self->_as_text($prefix?"$prefix ":'');
}
##############################################################################
=head2 Pod::begin
This stores everything between C<=begin ... =end>. It is unclear how POD
directives in such a block should be handled. The behaviour is undefined
and may change in the future.
B<as_pod> returns the original contents, B<as_text> returns the empty
string.
=cut
package Pod::begin;
@Pod::begin::ISA = qw(Pod::_obj);
sub initialize
{
$_[0]->{_type} = 'unknown';
$_[0]->{_args} = '';
$_[0]->line(0);
$_[0]->{_chunks} = [];
}
=over 4
=item $begin->B<type>( $string )
This set/retrieves the begin/end block type, i.e. the first argument
after C<=begin>.
=cut
sub type($$)
{
return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type};
}
=item $begin->B<args>( $string )
This set/retrieves the begin/end block arguments, i.e. everything the
follows the first argument after C<=begin>.
=cut
sub args($$)
{
return (@_ > 1) ? ($_[0]->{_args} = $_[1]) : $_[0]->{_args};
}
=item $begin->B<addchunk>( $string )
This adds a chunk to the begin/end block. A chunk is a paragraph.
=cut
sub addchunk($$)
{
push(@{$_[0]->{_chunks}},$_[1]);
}
=item $begin->B<contents>()
Return the current contents, i.e. the array of all chunks.
=back
=cut
sub contents($)
{
return @{shift->{_chunks}};
}
sub as_pod($)
{
my $self = shift;
"=begin ".$self->type.($self->args ? " ".$self->args:'')."\n\n".
join("\n",@{$self->{_chunks}})."\n\n=end\n\n";
}
sub as_text($)
{
# the individual formatters must redefine this if
# this method is desired
'';
}
##############################################################################
=head2 Pod::for
This stores C<=for> paragraphs. The B<as_pod> method return the original
contents, B<as_text> returns the empty string.
=cut
package Pod::for;
@Pod::for::ISA = qw(Pod::_obj);
sub initialize
{
$_[0]->{_type} = 'unknown';
$_[0]->{_args} = '';
$_[0]->line(0);
$_[0]->{_chunks} = [];
}
=over 4
=item $for->B<type>( $string )
This sets/retrieves the formatter specification of the C<=for> pargraph.
=cut
sub type($$)
{
return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type};
}
=item $for->B<args>( $string )
This sets/retrieves everything following the formatter specification
up to the next newline.
=cut
sub args($$)
{
return (@_ > 1) ? ($_[0]->{_args} = $_[1]) : $_[0]->{_args};
}
=item $for->B<content>( $string )
This sets/retrieves the C<=for> paragraph's contents, i.e. everything
following the first newline after the C<=for> directive.
=back
=cut
sub content($)
{
return (@_ > 1) ? ($_[0]->{_content} = $_[1]) : $_[0]->{_content};
}
sub as_pod($)
{
my $self = shift;
$self->_as_pod("=for ".$self->type.($self->args ? " ".$self->args:'')."\n".
$self->content)."\n\n";
}
sub as_text($)
{
# the individual formatters must redefine this if
# this method is desired
'';
}
##############################################################################
=head2 Textual Objects
The following sections describe objects that represent text. They have some
common methods:
=cut
package Pod::_text;
# base class for all textual objects
@Pod::_text::ISA = qw(Pod::_obj);
=over 4
=item $textobj->B<nested>()
Gives a string that contains the interior sequence codes in which this
object is nested. A string object XXX inside C<BE<lt>...IE<lt>XXXE<gt>...E<gt>>
would thus return C<BI>.
=cut
sub nested
{
my $mom = shift->mother;
if($mom && $mom->can('_code')) {
return $mom->nested.$mom->_code;
}
'';
}
=item $textobj->B<as_pod>()
Gives the POD code of this object, including its children.
=cut
# stubs
sub as_pod($)
{
my $self = shift;
$self->_code.'<'.($self->contents_as_pod).'>';
}
# for Pod::Parser
*raw_text = \&as_pod;
=item $textobj->B<as_text>()
Gives the objects text contents. No POD markup will be returned.
=back
=cut
sub as_text($)
{
shift->contents_as_text;
}
sub _code
{
'';
}
##############################################################################
=head2 Pod::string
This object contains plain ASCII strings. Note that the contents can well
include angle brackets (C<E<lt>E<gt>>). When converted into POD code, these
are automatically escaped where necesary.
The B<new> constructor takes an optional argument: the string.
=cut
package Pod::string;
@Pod::string::ISA = qw(Pod::_text);
sub initialize
{
$_[0]->content(defined $_[1] ? $_[1] : '');
}
=over 4
=item $string->B<content>( $text )
Set/retrieve the string's contents.
=back
=cut
sub content($$)
{
return (@_ > 1) ? ($_[0]->{_content} = $_[1]) : $_[0]->{_content};
}
sub as_pod($)
{
my $self = shift;
# deal with <>
my $str = $self->{_content};
my $mum;
$str =~ s{((^|[A-Z])<|>)}{
if($1 eq '>') {
if(($mum = $self->mother) && $mum->isa('Pod::_text')) {
# I am nested, so quote the closing >
'E<gt>';
} else {
'>';
}
} else {
"$2E<lt>";
}
}ge;
$str;
}
# for Pod::Parser
*raw_text = \&as_pod;
sub as_text($)
{
shift->content;
}
##############################################################################
=head2 Pod::bold
This class represents the C<BE<lt>...E<gt>> (bold) interior sequence.
=cut
package Pod::bold;
@Pod::bold::ISA = qw(Pod::_text);
sub _code { 'B'; }
##############################################################################
=head2 Pod::italic
This class represents the C<IE<lt>...E<gt>> (italic) interior sequence.
=cut
package Pod::italic;
@Pod::italic::ISA = qw(Pod::_text);
sub _code { 'I'; }
##############################################################################
=head2 Pod::code
This class represents the C<CE<lt>...E<gt>> (code/courier) interior sequence.
=cut
package Pod::code;
@Pod::code::ISA = qw(Pod::_text);
sub _code { 'C'; }
##############################################################################
=head2 Pod::file
This class represents the C<FE<lt>...E<gt>> (file) interior sequence.
=cut
package Pod::file;
@Pod::file::ISA = qw(Pod::_text);
sub _code { 'F'; }
##############################################################################
=head2 Pod::nonbreaking
This class represents the C<SE<lt>...E<gt>> (nonbreaking space)
interior sequence.
=cut
package Pod::nonbreaking;
@Pod::nonbreaking::ISA = qw(Pod::_text);
sub _code { 'S'; }
##############################################################################
=head2 Pod::zero
This class represents the C<ZE<lt>E<gt>> (zero width character)
interior sequence. Note that this sequence cannot have children.
=cut
package Pod::zero;
@Pod::zero::ISA = qw(Pod::_text);
sub _code { 'Z'; }
sub as_pod($)
{
'Z<>';
}
# for Pod::Parser
*raw_text = \&as_pod;
sub as_text($)
{
'';
}
##############################################################################
=head2 Pod::idx
This class represents the C<XE<lt>...E<gt>> (index) interior sequence.
The text therein is not printed in the resulting manpage, but is supposed to
appear in an index with a hyperlink to the place where it occurred.
=cut
package Pod::idx;
@Pod::idx::ISA = qw(Pod::_text);
sub _code { 'X'; }
=over 4
=item $idx->B<node>()
=item $idx->B<nodeid>()
=item $idx->B<nodetext>()
See L<"Pod::head"> for the description of these methods.
=back
=cut
sub node($$)
{
return (@_ > 1) ? ($_[0]->{_node} = $_[1]) : $_[0]->{_node};
}
sub nodeid($)
{
my $self = shift;
$self->{_node} ? $self->{_node}->id : undef;
}
sub nodetext($)
{
my $self = shift;
$self->{_node} ? $self->{_node}->text : undef;
}
sub as_text($)
{
# index entry is not shown in text
'';
}
##############################################################################
=head2 Pod::entity
This class represents the C<EE<lt>...E<gt>> (entity) interior sequence.
This object has no children, just a value. Entities encountered in the POD
source that map to standard ASCII characters (most notably C<lt>, C<gt>,
C<sol> and C<verbar>) are not kept as entities, but converted into or appended
to the preceding B<Pod::string>, but only if the nesting of this entity permits.
Entities may be specified as textual entities (C<auml>, C<szlig>, etc.),
or a numeric. The usual Perl encodings are valid here: C<123> is decimal,
C<0x3a> is hexadecimal, C<0177> is octal.
The B<new> constructor takes an optional argument, namely the numeric code
of the entity to create.
The B<as_text> method returns the corresponding ISO-8859-1 (Latin-1)
character. Sorry, no unicode support yet.
=cut
package Pod::entity;
@Pod::entity::ISA = qw(Pod::_text);
sub _code { 'E'; }
# stolen from HTML::Entities
my %ENTITIES = (
# Some normal chars that have special meaning in SGML context
amp => '&', # ampersand
'gt' => '>', # greater than
'lt' => '<', # less than
quot => '"', # double quote
# PUBLIC ISO 8879-1986//ENTITIES Added Latin 1//EN//HTML
AElig => 'Æ', # capital AE diphthong (ligature)
Aacute => 'Á', # capital A, acute accent
Acirc => 'Â', # capital A, circumflex accent
Agrave => 'À', # capital A, grave accent
Aring => 'Å', # capital A, ring
Atilde => 'Ã', # capital A, tilde
Auml => 'Ä', # capital A, dieresis or umlaut mark
Ccedil => 'Ç', # capital C, cedilla
ETH => 'Ð', # capital Eth, Icelandic
Eacute => 'É', # capital E, acute accent
Ecirc => 'Ê', # capital E, circumflex accent
Egrave => 'È', # capital E, grave accent
Euml => 'Ë', # capital E, dieresis or umlaut mark
Iacute => 'Í', # capital I, acute accent
Icirc => 'Î', # capital I, circumflex accent
Igrave => 'Ì', # capital I, grave accent
Iuml => 'Ï', # capital I, dieresis or umlaut mark
Ntilde => 'Ñ', # capital N, tilde
Oacute => 'Ó', # capital O, acute accent
Ocirc => 'Ô', # capital O, circumflex accent
Ograve => 'Ò', # capital O, grave accent
Oslash => 'Ø', # capital O, slash
Otilde => 'Õ', # capital O, tilde
Ouml => 'Ö', # capital O, dieresis or umlaut mark
THORN => 'Þ', # capital THORN, Icelandic
Uacute => 'Ú', # capital U, acute accent
Ucirc => 'Û', # capital U, circumflex accent
Ugrave => 'Ù', # capital U, grave accent
Uuml => 'Ü', # capital U, dieresis or umlaut mark
Yacute => 'Ý', # capital Y, acute accent
aacute => 'á', # small a, acute accent
acirc => 'â', # small a, circumflex accent
aelig => 'æ', # small ae diphthong (ligature)
agrave => 'à', # small a, grave accent
aring => 'å', # small a, ring
atilde => 'ã', # small a, tilde
auml => 'ä', # small a, dieresis or umlaut mark
ccedil => 'ç', # small c, cedilla
eacute => 'é', # small e, acute accent
ecirc => 'ê', # small e, circumflex accent
egrave => 'è', # small e, grave accent
eth => 'ð', # small eth, Icelandic
euml => 'ë', # small e, dieresis or umlaut mark
iacute => 'í', # small i, acute accent
icirc => 'î', # small i, circumflex accent
igrave => 'ì', # small i, grave accent
iuml => 'ï', # small i, dieresis or umlaut mark
ntilde => 'ñ', # small n, tilde
oacute => 'ó', # small o, acute accent
ocirc => 'ô', # small o, circumflex accent
ograve => 'ò', # small o, grave accent
oslash => 'ø', # small o, slash
otilde => 'õ', # small o, tilde
ouml => 'ö', # small o, dieresis or umlaut mark
szlig => 'ß', # small sharp s, German (sz ligature)
thorn => 'þ', # small thorn, Icelandic
uacute => 'ú', # small u, acute accent
ucirc => 'û', # small u, circumflex accent
ugrave => 'ù', # small u, grave accent
uuml => 'ü', # small u, dieresis or umlaut mark
yacute => 'ý', # small y, acute accent
yuml => 'ÿ', # small y, dieresis or umlaut mark
# Some extra Latin 1 chars that are listed in the HTML3.2 draft (21-May-96)
copy => '©', # copyright sign
reg => '®', # registered sign
nbsp => "\240", # non breaking space
# Additional ISO-8859/1 entities listed in rfc1866 (section 14)
iexcl => '¡',
cent => '¢',
pound => '£',
curren => '¤',
yen => '¥',
brvbar => '¦',
sect => '§',
uml => '¨',
ordf => 'ª',
laquo => '«',
'not' => '¬', # not is a keyword in perl
shy => '',
macr => '¯',
deg => '°',
plusmn => '±',
sup1 => '¹',
sup2 => '²',
sup3 => '³',
acute => '´',
micro => 'µ',
para => '¶',
middot => '·',
cedil => '¸',
ordm => 'º',
raquo => '»',
frac14 => '¼',
frac12 => '½',
frac34 => '¾',
iquest => '¿',
'times' => '×', # times is a keyword in perl
divide => '÷',
# some POD special entities
verbar => '|',
sol => '/'
);
=over 4
=item $entity->B<decode>( $string )
This method can be given any type of entity encoding and sets the entity
value to the resulting code. This code is I<undef> if the given string was
not recognized.
=cut
sub decode
{
my ($class,$str) = @_;
my $ent = $class->new;
$str =~ s/^\s+|\s+$//sg;
my $value;
if($str =~ /^(0x[0-9a-f]+)$/i) {
# hexadecimal
$value = hex($1);
}
elsif($str =~ /^(0[0-7]+)$/) {
# octal
$value = oct($1);
}
elsif($str =~ /^(\d+)$/) {
# decimal
$value = $1;
}
elsif($str =~ /^(\w+)$/i) {
$value = defined $ENTITIES{$1} ? ord($ENTITIES{$1}) : undef;
}
return undef unless($value);
$ent->value($value);
$ent;
}
sub initialize
{
$_[0]->{_value} = defined $_[1] ? $_[1] : '';
}
=item $entity->B<value>( $num )
Sets/retrieves the numeric value of this entity.
=cut
sub value($$)
{
# value is number in ISO-8859-1
return (@_ > 1) ? ($_[0]->{_value} = $_[1]) : $_[0]->{_value};
}
=item $entity->B<as_pod>()
Returns the POD representation of this entity. If a textual encoding like
C<auml> is known for the value it is used, otherwise decimal encoding.
=back
=cut
sub as_pod($)
{
my $self = shift;
my $value = $self->value;
my $chr = chr($value);
# deal with nonbreaking space entity
return 'S< >' if($chr eq $ENTITIES{nbsp});
my ($ent) = grep($_->[1] eq $chr, map { [ $_ => $ENTITIES{$_} ] }
keys %ENTITIES);
# TODO global parameter for dec/hex/oct encoding
# this is dec
$ent = $ent ? $ent->[0] : $value;
"E<$ent>";
}
# for Pod::Parser
*raw_text = \&as_pod;
sub as_text($)
{
my $self = shift;
chr($self->{_value});
}
##############################################################################
=head2 Pod::link
This is a class for representation of POD hyperlinks. The code to parse the
corresponding POD code is entirely in B<Pod::Compiler>.
=cut
package Pod::link;
@Pod::link::ISA = qw(Pod::_text);
sub _code { 'L'; }
=over 4
=item Pod::link->B<new>()
The B<new()> method can be passed a set of key/value pairs for one-stop
initialization.
=cut
use Carp;
sub initialize {
my $self = shift;
#$self->{_line} ||= '';
#$self->{_file} ||= '';
#$self->{_page} ||= '';
#$self->{_node} ||= '';
#$self->{_type} ||= '';
$self->{_mansect} ||= '';
$self->{_alttext} ||= [];
if(defined $_[0] && ref($_[0]) && ref($_[0]) eq 'HASH') {
# called with a list of parameters
%$self = (%$self, %{$_[0]});
}
$self;
}
=item $link->B<as_text>()
This method returns the textual representation of the hyperlink as above,
but without markers (read only). Depending on the link type this is one of
the following alternatives (links to same or other POD document):
page: L<perl> the perl manpage
item: L<perlvar/$!> the $! entry in the perlvar manpage
item: L</DESTROY> the DESTOY entry elsewhere in this
document
head: L<perldoc/"OPTIONS"> the section on OPTIONS in the perldoc
manpage
head: L<"DESCRIPTION"> the section on DESCRIPTION elsewhere
in this document
The following are not offical, but are supported:
man: L<sed(1)> the sed(1) manpage
url: L<http://www.perl.com> http://www.perl.com
(same for ftp: news: mailto:)
If an alternative text (C<LE<lt>alttext|...E<gt>>) was specified, this
text (without POD markup) is returned.
All POD formatters should use the same text for the different types
of links. Clever formatters create two hyperlinks for item or section links to
another page: one to the top of the page (the page name) and one to the
node within the page (the node name).
=cut
# The complete link's text
sub as_text {
my $self = shift;
my @alttext = @{$self->{_alttext}};
if(@alttext) {
my $s = join('', map { $_->as_text } @alttext);
$s =~ s/\s+/ /gs;
$s =~ s/^\s+|\s+$//gs;
return $s;
}
my $type = $self->{_type};
my $node = $self->{_node};
my $page = $self->{_page}.
(length $self->{_mansect} ? '('.$self->{_mansect}.')' : '');
if($type eq 'url') {
return $node;
}
(!$node ? '' : $type eq 'item' ?
"the $node entry" : "the section on $node" ) .
($page ? ($node ? ' in ':'') . "the $page manpage" :
' elsewhere in this document');
}
=item $link->B<page>()
This method sets or returns the POD page this link points to. If empty,
the link points to the current document itself.
=cut
# The POD page the link appears on
sub page {
return (@_ > 1) ? ($_[0]->{_page} = $_[1]) : $_[0]->{_page};
}
=item $link->B<node>()
As above, but the destination node text (either head or item) of the link.
=cut
# The link destination
sub node {
return (@_ > 1) ? ($_[0]->{_node} = $_[1]) : $_[0]->{_node};
}
=item $link->B<alttext>()
Sets or returns an alternative text specified in the link.
=cut
# Potential alternative text
sub alttext {
my $self = shift;
if (@_) {
$self->{_alttext} = [ @_ ];
}
@{$self->{_alttext}};
}
=item $link->B<type>()
The node type, either C<page>, C<man>, C<head> or C<item>. As an
unofficial type, there is also C<url>, derived from e.g.
C<LE<lt>http://perl.comE<gt>>
=cut
# The type: item or headn
sub type {
return (@_ > 1) ? ($_[0]->{_type} = $_[1]) : $_[0]->{_type};
}
=item $link->B<mansect>()
The node type, either C<page>, C<man>, C<head> or C<item>.
As an unofficial type,
there is also C<url>, derived from e.g. C<LE<lt>http://perl.comE<gt>>
=cut
# manual section of page
sub mansect {
return (@_ > 1) ? ($_[0]->{_mansect} = $_[1]) : $_[0]->{_mansect};
}
=item $link->B<as_pod>()
Returns the link as C<LE<lt>...E<gt>>.
=back
=cut
sub _escape_brackets
{
$_[0] =~ s{((^|[A-Z])<|>)}{
if($1 eq '>') {
"E<gt>";
}
else {
"$2E<lt>";
}
}ge;
}
# The link itself
sub as_pod {
my $self = shift;
my $link = ($self->page() || '').
(length $self->{_mansect} ? '('.$self->{_mansect}.')' : '');
my $node = $self->node();
my $type = $self->type() || '';
if($type eq 'url') {
$link = $node unless length $link;
_escape_brackets($link);
}
elsif($node) {
_escape_brackets($node);
$node =~ s/\|/E<verbar>/g;
$node =~ s:/:E<sol>:g;
if($self->type() eq 'head') {
$link .= ($link ? '/' : '') . qq{"$node"};
}
else { # item
$link .= '/' . $node;
}
}
my @txt = $self->alttext();
if(@txt) {
my $text = join('', map { $_->as_pod } @txt);
$text =~ s/\|/E<verbar>/g;
$text =~ s:/:E<sol>:g;
$link = "$text|$link";
}
"L<$link>";
}
# for Pod::Parser
*raw_text = \&as_pod;
##############################################################################
=head1 ADDITIONAL CLASSES
The following classes to not inherit from any other package and serve as
a convenience storage for POD-related data.
=head2 Pod::node
This class stores information about a POD node, i.e. a potential
hyperlink destination. This is derived from C<=headX>, C<=item> and
C<XE<lt>...E<gt>> entries. See also L<"Pod::node::collection">.
=cut
package Pod::node;
# This class uses an array as storage - it does not
# consume as much memory as hashes. Reason: This is stored
# in memory for most many-POD translators for resolving
# links.
=over 4
=item Pod::node->B<new>( %params )
Creates a new instance of B<Pod::node>. Optional parameters are
B<text>, B<id>, B<type>. See below.
=cut
sub new
{
my $this = shift;
my $class = ref($this) || $this;
my $self = [];
bless $self, $class;
$self->initialize(@_);
return $self;
}
sub initialize
{
my ($self,%params) = @_;
foreach(keys %params) {
unless($self->can($_)) {
warn "Internal error: illegal property '$_' for class '".
ref($self)."'\n";
next;
}
$self->[0] = $params{text};
$self->[1] = $params{id};
$self->[2] = $params{type};
}
$self->[3] = 0; # number of hits
$self;
}
=item $node->B<text>( $string )
Sets/retrieves the node's text. The text is a plain string without any
POD markup in ISO-8859-1 encoding.
=cut
sub text
{
# stored in #0
return (@_ > 1) ? ($_[0]->[0] = $_[1]) : $_[0]->[0];
}
=item $node->B<id>( $string )
Sets/retrieves the node's unique id. The id is a string that is unique
in the POD document and can be used as a hyperlink anchor.
=cut
sub id
{
# stored in #1
return (@_ > 1) ? ($_[0]->[0] = $_[1]) : $_[0]->[1];
}
=item $node->B<type>( $string )
Sets/retrieves the node's type, which is either C<headX> (X being a
number), C<item> or C<X>, depending on from which POD construct this
node was derived.
=cut
sub type
{
# stored in #2
return (@_ > 1) ? ($_[0]->[2] = $_[1]) : $_[0]->[2];
}
=item $node->B<was_hit>()
Increments the number of hits to this node. Should be called whenever a
link was resolved to this node.
=cut
sub was_hit
{
$_[0]->[3]++;
}
=item $node->B<hits>()
Retrieves the number of hits on this node.
=back
=cut
sub hits
{
$_[0]->[3];
}
##############################################################################
=head2 Pod::node::collection
This class is merely an array that holds B<Pod::node>s. It provides some
methods to search in this set of nodes.
=cut
package Pod::node::collection;
use Carp;
sub new
{
my $this = shift;
my $class = ref($this) || $this;
my $self = [];
bless $self, $class;
return $self;
}
=over 4
=item $ncollection->B<all>()
Return an array of all nodes. Nodes are instances of B<Pod::node>.
=cut
sub all
{
my $self = shift;
@$self;
}
=item $ncollection->B<add>( @nodes )
Add the given nodes to the collection. A fatal error occurs when trying
to add non-B<Pod::node>s to the collection. Returns true.
=cut
sub add
{
my ($self, @new) = @_;
foreach(@new) {
unless($_->isa('Pod::node')) {
croak "Fatal: Tried to add a non-Pod::node to Pod::node::collection";
}
push(@$self, $_);
}
1;
}
=item $ncollection->B<get_by_text>( $string )
Returns an array of nodes or the first matching node (depending on
context) that exactly matches the node text. The return value should
normally be either the empty array or I<undef> for no match and exactly
one element that matches, unless several nodes with the same text are in
the collection, which should never occur.
=cut
sub get_by_text
{
my ($self,$text) = @_;
my @res = grep($_->text eq $text, @$self);
if(wantarray) {
return @res;
}
$res[0];
}
=item $ncollection->B<get_by_rx>( $regexp )
Same as above, but get the node by matching the given I<$regexp> on the
node text. A fatal error occurs if the regexp has syntax errors.
=cut
sub get_by_rx
{
my ($self,$rx) = @_;
my @res = grep($_->text =~ /$rx/, @$self);
if(wantarray) {
return @res;
}
$res[0];
}
=item $ncollection->B<get_by_id>( $string )
Same as above, but get the node by its unique id.
=cut
sub get_by_id
{
my ($self,$id) = @_;
my @res = grep($_->id eq $id, @$self);
if(wantarray) {
return @res;
}
$res[0];
}
=item $ncollection->B<get_by_type>( $string )
Same as above, but get the node by its type. The string is treated as a
regexp, so you can get all C<=head> nodes by specifying C<"head"> or all
C<=head1> nodes by giving C<"head1">.
=cut
sub get_by_type
{
my ($self,$type) = @_;
my @res = grep($_->type =~ /^\Q$type\E/, @$self);
if(wantarray) {
return @res;
}
$res[0];
}
=item $ncollection->B<ids>()
Return an array of all node ids.
=cut
sub ids
{
my $self = shift;
map { $_->id } @$self;
}
=item $ncollection->B<texts>()
Return an array of all node texts.
=back
=cut
sub texts
{
my $self = shift;
map { $_->text } @$self;
}
##############################################################################
=head2 Pod::doc
A convenience class for storing POD document information, especially by
converters. See also L<"Pod::doc::collection">.
=cut
package Pod::doc;
use Carp;
=over 4
=item Pod::doc->B<new>( %params )
Create a new instance of B<Pod::doc>, assigning the given optional
parameters. See below for the parameter names, they are identical with
the accessor methods.
=cut
sub new
{
my ($this,%params) = @_;
my $class = ref($this) || $this;
my $self = +{%params};
bless $self, $class;
return $self;
}
=item $doc->B<name>( $string )
Set/retrieve the canonical name of the POD document, e.g. C<perldoc> or
C<Pod::Compiler>.
=cut
sub name
{
return (@_ > 1) ? ($_[0]->{name} = $_[1]) : $_[0]->{name};
}
=item $doc->B<source>( $file )
Set/retrieve the source file name of the POD document, e.g.
F</usr/local/bin/perldoc> or
F</usr/local/lib/perl5/site_perl/Pod/Compiler.pm>.
=cut
sub source
{
return (@_ > 1) ? ($_[0]->{source} = $_[1]) : $_[0]->{source};
}
=item $doc->B<temp>( $file )
Set/retrieve the temporary file name of the POD document (to be created
by (Pod::root object)->store), e.g. F</tmp/perldoc.tmp> or
F</tmp/Pod__Compiler.tmp>. You have to "invent" a method for generating
temp filenames yourself, see also L<File::Temp>.
=cut
sub temp
{
return (@_ > 1) ? ($_[0]->{temp} = $_[1]) : $_[0]->{temp};
}
=item $doc->B<destination>( $file )
Set/retrieve the destination file name of the POD document, e.g.
F</usr/local/share/perl/html/perldoc.html> or
F</usr/local/share/perl/Pod/Compiler.html>.
=cut
sub destination
{
return (@_ > 1) ? ($_[0]->{destination} = $_[1]) : $_[0]->{destination};
}
=item $doc->B<nodes>( $nodecollection )
Set/retrieve the this POD document's node collection. When setting, the
given argument must be a B<Pod::node::collection> object, otherwise a
fatal error occurs.
=back
=cut
sub nodes
{
my ($self,$arg) = @_;
# check for Pod::node::collection
if(defined $arg) {
unless(ref($arg) && $arg->isa('Pod::node::collection')) {
croak "Fatal: tried to set a non-Pod::node::collection as Pod::doc::nodes";
}
$self->{nodes} = $arg;
}
$self->{nodes};
}
##############################################################################
=head2 Pod::doc::collection
This class serves as a container for a set of B<Pod::doc> objects and
defines some methods for such a collection. This object is simply a hash
with the canonical POD name as key and the corresponding B<Pod::doc>
object as value.
=cut
package Pod::doc::collection;
use Carp;
=over 4
=item Pod::doc::collection->B<new>()
Create a new collection instance.
=cut
sub new
{
my $this = shift;
my $class = ref($this) || $this;
my $self = +{};
bless $self, $class;
return $self;
}
=item $dcollection->B<all_names>()
Return an array of all sorted documents names in the collection.
=cut
sub all_names
{
my $self = shift;
sort keys %{$self};
}
=item $dcollection->B<all_objs>()
Return an array of all B<Pod::doc>s in the collection. There is no
specific sort order.
=cut
sub all_objs
{
my $self = shift;
values %{$self};
}
=item $dcollection->B<get>( $name )
Return the B<Pod::doc> object associated with the name I<$name> or
I<undef> if no such name is in the collection.
=cut
sub get
{
$_[0]->{$_[1]};
}
=item $dcollection->B<add>( $name , $object )
=item $dcollection->B<add>( $object )
Add the given B<Pod::doc> object to the collection. The two-argument
form explicitely sets the name to I<$name>, otherwise the objects name
is used. Exceptions occur if arguments are missing or have the wrong
type or the name is empty.
=cut
sub add
{
my ($self,$name,$obj) = @_;
unless(defined $name) {
croak "Error: missing argument for Pod::doc::collection::add";
}
if(ref($name) && $name->isa('Pod::doc')) {
$obj = $name;
$name = $obj->name();
}
unless(defined $name && length $name) {
croak "Error: improper name specified for Pod::doc::collection::add (given Pod::doc does not have a proper name set?)";
}
unless(defined $obj && $obj->isa('Pod::doc')) {
croak "Fatal: improper object specified for Pod::doc::collection::add";
}
$obj->name($name); # set this name, ensure consistency
$self->{$name} = $obj;
}
=item $dcollection->B<resolve_link>( $link , $name )
This method tries to resolve the given link (object of class
B<Pod::link>) in the document named I<$name> within the document
collection. Returns the B<Pod::doc> and the B<Pod::node> in case of
success. If the node was found in the current POD (defined by I<$name>)
then the first return value will be the empty string. If a node was
found, its hit count is automatically incremented. Example:
my ($page,$node) = $dcollection->resolve_link( $link, $myname );
unless(defined $page) {
warn "Error: Cannot resolve link.\n";
}
elsif(!$page) {
# node is in the current POD $myname
}
else {
# link to another POD
}
=back
=cut
sub resolve_link
{
my ($self,$link,$name) = @_;
my $type = $link->type;
unless($type =~ /^(page|head|item)$/) {
return undef;
}
my $page = $link->page || $name;
my $doc = $self->get($page);
unless($doc) {
return undef;
}
my $text = $link->node;
my $ncoll = $doc->nodes;
unless($ncoll) {
return ($doc, undef);
}
my $node = $ncoll->get_by_rx("^\Q$text\E(\\s|$)");
$node->was_hit if(defined $node);
$doc = '' if($doc->name eq $name);
($doc,$node);
}
##############################################################################
=head1 SEE ALSO
L<Pod::Compiler>, L<Pod::Parser>, L<Pod::Find>, L<Pod::Checker>
=head1 AUTHOR
Marek Rouchal <marekr@cpan.org>
=cut
1;