#line 1
# vim: set ts=2 sts=2 sw=2 expandtab smarttab:
#
# This file is part of Pod-Markdown
#
# This software is copyright (c) 2004 by Marcel Gruenauer.
#
# This is free software; you can redistribute it and/or modify it under
# the same terms as the Perl 5 programming language system itself.
#
use 5.008;
use strict;
use warnings;
package Pod::Markdown;
# git description: v2.001-2-gadb8327
$Pod::Markdown::VERSION = '2.002';
BEGIN {
$Pod::Markdown::AUTHORITY = 'cpan:RWSTAUNER';
}
# ABSTRACT: Convert POD to Markdown
use Pod::Simple 3.14 (); # external links with text
use parent qw(Pod::Simple::Methody);
our %URL_PREFIXES = (
sco => 'http://search.cpan.org/perldoc?',
metacpan => 'https://metacpan.org/pod/',
man => 'http://man.he.net/man',
);
$URL_PREFIXES{perldoc} = $URL_PREFIXES{metacpan};
sub new {
my $class = shift;
my %args = @_;
my $self = $class->SUPER::new();
$self->preserve_whitespace(1);
$self->accept_targets(qw( markdown html ));
my $data = $self->_private;
while( my ($attr, $val) = each %args ){
$data->{ $attr } = $val;
}
for my $type ( qw( perldoc man ) ){
my $attr = $type . '_url_prefix';
# Use provided argument or default alias.
my $url = $self->$attr || $type;
# Expand alias if defined (otherwise use url as is).
$data->{ $attr } = $URL_PREFIXES{ $url } || $url;
}
$self->_prepare_fragment_formats;
return $self;
}
## Attribute accessors ##
my @attr = qw(
man_url_prefix
perldoc_url_prefix
perldoc_fragment_format
markdown_fragment_format
include_meta_tags
);
{
no strict 'refs'; ## no critic
foreach my $attr ( @attr ){
*$attr = sub { return $_[0]->_private->{ $attr } };
}
}
sub _prepare_fragment_formats {
my ($self) = @_;
foreach my $attr ( @attr ){
next unless $attr =~ /^(\w+)_fragment_format/;
my $type = $1;
my $format = $self->$attr;
# If one was provided.
if( $format ){
# If the attribute is a coderef just use it.
next if ref($format) eq 'CODE';
}
# Else determine a default.
else {
if( $type eq 'perldoc' ){
# Choose a default that matches the destination url.
my $target = $self->perldoc_url_prefix;
foreach my $alias ( qw( metacpan sco ) ){
if( $target eq $URL_PREFIXES{ $alias } ){
$format = $alias;
}
}
# This seems like a reasonable fallback.
$format ||= 'pod_simple_xhtml';
}
else {
$format = $type;
}
}
# The short name should become a method name with the prefix prepended.
my $prefix = 'format_fragment_';
$format =~ s/^$prefix//;
die "Unknown fragment format '$format'"
unless $self->can($prefix . $format);
# Save it.
$self->_private->{ $attr } = $format;
}
return;
}
## Backward compatible API ##
# For backward compatibility (previously based on Pod::Parser):
# While Pod::Simple provides a parse_from_file() method
# it's primarily for Pod::Parser compatibility.
# When called without an output handle it will print to STDOUT
# but the old Pod::Markdown never printed to a handle
# so we don't want to start now.
sub parse_from_file {
my ($self, $file) = @_;
$self->output_string(\($self->{_as_markdown_}));
$self->parse_file($file);
}
# Likewise, though Pod::Simple doesn't define this method at all.
sub parse_from_filehandle { shift->parse_from_file(@_) }
## Document state ##
sub _private {
my ($self) = @_;
$self->{_Pod_Markdown_} ||= {
indent => 0,
stacks => [],
states => [{}],
link => [],
};
}
sub _increase_indent {
++$_[0]->_private->{indent} >= 1
or die 'Invalid state: indent < 0';
}
sub _decrease_indent {
--$_[0]->_private->{indent} >= 0
or die 'Invalid state: indent < 0';
}
sub _new_stack {
push @{ $_[0]->_private->{stacks} }, [];
push @{ $_[0]->_private->{states} }, {};
}
sub _last_string {
$_[0]->_private->{stacks}->[-1][-1];
}
sub _pop_stack_text {
$_[0]->_private->{last_state} = pop @{ $_[0]->_private->{states} };
join '', @{ pop @{ $_[0]->_private->{stacks} } };
}
sub _stack_state {
$_[0]->_private->{states}->[-1];
}
sub _save {
my ($self, $text) = @_;
push @{ $self->_private->{stacks}->[-1] }, $text;
# return $text; # DEBUG
}
sub _save_line {
my ($self, $text) = @_;
$self->_save($text . $/);
}
# For paragraphs, etc.
sub _save_block {
my ($self, $text) = @_;
$self->_stack_state->{blocks}++;
$self->_save_line($self->_indent($text) . $/);
}
## Formatting ##
sub _chomp_all {
my ($self, $text) = @_;
1 while chomp $text;
return $text;
}
sub _indent {
my ($self, $text) = @_;
my $level = $self->_private->{indent};
if( $level ){
my $indent = ' ' x ($level * 4);
# Capture text on the line so that we don't indent blank lines (/^\x20{4}$/).
$text =~ s/^(.+)/$indent$1/mg;
}
return $text;
}
sub as_markdown {
my ($parser, %args) = @_;
my @header;
# Don't add meta tags again if we've already done it.
if( $args{with_meta} && !$parser->include_meta_tags ){
@header = $parser->_build_markdown_head;
}
return join("\n" x 2, @header, $parser->{_as_markdown_});
}
sub _build_markdown_head {
my $parser = shift;
my $data = $parser->_private;
return join "\n",
map { qq![[meta \l$_="$data->{$_}"]]! }
grep { defined $data->{$_} }
qw( Title Author );
}
## Escaping ##
# http://daringfireball.net/projects/markdown/syntax#backslash
# Markdown provides backslash escapes for the following characters:
#
# \ backslash
# ` backtick
# * asterisk
# _ underscore
# {} curly braces
# [] square brackets
# () parentheses
# # hash mark
# + plus sign
# - minus sign (hyphen)
# . dot
# ! exclamation mark
# However some of those only need to be escaped in certain places:
# * Backslashes *do* need to be escaped or they may be swallowed by markdown.
# * Word-surrounding characters (/[`*_]/) *do* need to be escaped mid-word
# because the markdown spec explicitly allows mid-word em*pha*sis.
# * I don't actually see anything that curly braces are used for.
# * Escaping square brackets is enough to avoid accidentally
# creating links and images (so we don't need to escape plain parentheses
# or exclamation points as that would generate a lot of unnecesary noise).
# Parentheses will be escaped in urls (&end_L) to avoid premature termination.
# * We don't need a backslash for every hash mark or every hyphen found mid-word,
# just the ones that start a line (likewise for plus and dot).
# (Those will all be handled by _escape_paragraph_markdown).
# Backslash escape markdown characters to avoid having them interpreted.
sub _escape_inline_markdown {
local $_ = $_[1];
# s/([\\`*_{}\[\]()#+-.!])/\\$1/g; # See comments above.
s/([\\`*_\[\]])/\\$1/g;
return $_;
}
# Escape markdown characters that would be interpreted
# at the start of a line.
sub _escape_paragraph_markdown {
local $_ = $_[1];
# Escape headings, horizontal rules, (unordered) lists, and blockquotes.
s/^([-+#>])/\\$1/mg;
# Markdown doesn't support backslash escapes for equal signs
# even though they can be used to underline a header.
# So use html to escape them to avoid having them interpreted.
s/^([=])/sprintf '&#x%x;', ord($1)/mge;
# Escape the dots that would wrongfully create numbered lists.
s/^( (?:>\s+)? \d+ ) (\.\x20)/$1\\$2/xgm;
return $_;
}
## Parsing ##
sub handle_text {
my ($self, $text) = @_;
# Markdown is for html, so use html entities.
$text =~ s/ / /g
if $self->_private->{nbsp};
# Unless we're in a code span or verbatim block.
unless( $self->_private->{no_escape} ){
# We could, in theory, alter what gets escaped according to context
# (for example, escape square brackets (but not parens) inside link text).
# The markdown produced might look slightly nicer but either way you're
# at the whim of the markdown processor to interpret things correctly.
# For now just escape everything.
# Don't let literal characters be interpreted as markdown.
$text = $self->_escape_inline_markdown($text);
}
$self->_save($text);
}
sub start_Document {
my ($self) = @_;
$self->_new_stack;
}
sub end_Document {
my ($self) = @_;
$self->_check_search_header;
my $end = pop @{ $self->_private->{stacks} };
@{ $self->_private->{stacks} } == 0
or die 'Document ended with stacks remaining';
my @doc = $self->_chomp_all(join('', @$end)) . $/;
if( $self->include_meta_tags ){
unshift @doc, $self->_build_markdown_head, ($/ x 2);
}
print { $self->{output_fh} } @doc;
}
## Blocks ##
sub start_Verbatim {
my ($self) = @_;
$self->_new_stack;
$self->_private->{no_escape} = 1;
}
sub end_Verbatim {
my ($self) = @_;
my $text = $self->_pop_stack_text;
$text = $self->_indent_verbatim($text);
$self->_private->{no_escape} = 0;
# Verbatim blocks do not generate a separate "Para" event.
$self->_save_block($text);
}
sub _indent_verbatim {
my ($self, $paragraph) = @_;
# NOTE: Pod::Simple expands the tabs for us (as suggested by perlpodspec).
# Pod::Simple also has a 'strip_verbatim_indent' attribute
# but it doesn't sound like it gains us anything over this method.
# POD verbatim can start with any number of spaces (or tabs)
# markdown should be 4 spaces (or a tab)
# so indent any paragraphs so that all lines start with at least 4 spaces
my @lines = split /\n/, $paragraph;
my $indent = ' ' x 4;
foreach my $line ( @lines ){
next unless $line =~ m/^( +)/;
# find the smallest indentation
$indent = $1 if length($1) < length($indent);
}
if( (my $smallest = length($indent)) < 4 ){
# invert to get what needs to be prepended
$indent = ' ' x (4 - $smallest);
# Prepend indent to each line.
# We could check /\S/ to only indent non-blank lines,
# but it's backward compatible to respect the whitespace.
# Additionally, both pod and markdown say they ignore blank lines
# so it shouldn't hurt to leave them in.
$paragraph = join "\n", map { length($_) ? $indent . $_ : '' } @lines;
}
return $paragraph;
}
sub start_Para {
$_[0]->_new_stack;
}
sub end_Para {
my ($self) = @_;
my $text = $self->_pop_stack_text;
$text = $self->_escape_paragraph_markdown($text);
$self->_save_block($text);
}
## Headings ##
sub start_head1 { $_[0]->_start_head(1) }
sub end_head1 { $_[0]->_end_head(1) }
sub start_head2 { $_[0]->_start_head(2) }
sub end_head2 { $_[0]->_end_head(2) }
sub start_head3 { $_[0]->_start_head(3) }
sub end_head3 { $_[0]->_end_head(3) }
sub start_head4 { $_[0]->_start_head(4) }
sub end_head4 { $_[0]->_end_head(4) }
sub _check_search_header {
my ($self) = @_;
# Save the text since the last heading if we want it for metadata.
if( my $last = $self->_private->{search_header} ){
for( $self->_private->{$last} = $self->_last_string ){
s/\A\s+//;
s/\s+\z//;
}
}
}
sub _start_head {
my ($self) = @_;
$self->_check_search_header;
$self->_new_stack;
}
sub _end_head {
my ($self, $num) = @_;
my $h = '#' x $num;
my $text = $self->_pop_stack_text;
$self->_private->{search_header} =
$text =~ /NAME/ ? 'Title'
: $text =~ /AUTHOR/ ? 'Author'
: undef;
# TODO: option for $h suffix
# TODO: put a name="" if $self->{embed_anchor_tags}; ?
# https://rt.cpan.org/Ticket/Display.html?id=57776
$self->_save_block(join(' ', $h, $text));
}
## Lists ##
# TODO: over_empty
sub _start_list {
my ($self) = @_;
$self->_new_stack;
# Nest again b/c start_item will pop this to look for preceding content.
$self->_increase_indent;
$self->_new_stack;
}
sub _end_list {
my ($self) = @_;
$self->_handle_between_item_content;
# Finish the list.
# All the child elements should be blocks,
# but don't end with a double newline.
my $text = $self->_chomp_all($self->_pop_stack_text);
# FIXME:
$_[0]->_save_line($text . $/);
}
sub _handle_between_item_content {
my ($self) = @_;
# This might be empty (if the list item had no additional content).
if( my $text = $self->_pop_stack_text ){
# Else it's a sub-document.
# If there are blocks we need to separate with blank lines.
if( $self->_private->{last_state}->{blocks} ){
$text = $/ . $text;
}
# If not, we can condense the text.
# In this module's history there was a patch contributed to specifically
# produce "huddled" lists so we'll try to maintain that functionality.
else {
$text = $self->_chomp_all($text) . $/;
}
$self->_save($text)
}
$self->_decrease_indent;
}
sub _start_item {
my ($self) = @_;
$self->_handle_between_item_content;
$self->_new_stack;
}
sub _end_item {
my ($self, $marker) = @_;
$self->_save_line($self->_indent($marker . ' ' . $self->_pop_stack_text));
# Store any possible contents in a new stack (like a sub-document).
$self->_increase_indent;
$self->_new_stack;
}
sub start_over_bullet { $_[0]->_start_list }
sub end_over_bullet { $_[0]->_end_list }
sub start_item_bullet { $_[0]->_start_item }
sub end_item_bullet { $_[0]->_end_item('-') }
sub start_over_number { $_[0]->_start_list }
sub end_over_number { $_[0]->_end_list }
sub start_item_number {
$_[0]->_start_item;
# It seems like this should be a stack,
# but from testing it appears that the corresponding 'end' event
# comes right after the text (it doesn't surround any embedded content).
# See t/nested.t which shows start-item, text, end-item, para, start-item....
$_[0]->_private->{item_number} = $_[1]->{number};
}
sub end_item_number {
my ($self) = @_;
$self->_end_item($self->_private->{item_number} . '.');
}
# Markdown doesn't support definition lists
# so do regular (unordered) lists with indented paragraphs.
sub start_over_text { $_[0]->_start_list }
sub end_over_text { $_[0]->_end_list }
sub start_item_text { $_[0]->_start_item }
sub end_item_text { $_[0]->_end_item('-')}
# perlpodspec equates an over/back region with no items to a blockquote.
sub start_over_block {
# NOTE: We don't actually need to indent for a blockquote.
$_[0]->_new_stack;
}
sub end_over_block {
my ($self) = @_;
# Chomp first to avoid prefixing a blank line with a `>`.
my $text = $self->_chomp_all($self->_pop_stack_text);
# NOTE: Paragraphs will already be escaped.
# I don't really like either of these implementations
# but the join/map/split seems a little better and benches a little faster.
# You would lose the last newline but we've already chomped.
#$text =~ s{^(.)?}{'>' . (defined($1) && length($1) ? (' ' . $1) : '')}mge;
$text = join $/, map { length($_) ? '> ' . $_ : '>' } split qr-$/-, $text;
$self->_save_block($text);
}
## Custom Formats ##
sub start_for {
my ($self, $attr) = @_;
$self->_new_stack;
if( $attr->{target} eq 'html' ){
# Use another stack so we can indent
# (not syntactily necessary but seems appropriate).
$self->_new_stack;
$self->_increase_indent;
$self->_private->{no_escape} = 1;
# Mark this so we know to undo it.
$self->_stack_state->{for_html} = 1;
}
}
sub end_for {
my ($self) = @_;
# Data gets saved as a block (which will handle indents),
# but if there was html we'll alter this, so chomp and save a block again.
my $text = $self->_chomp_all($self->_pop_stack_text);
if( $self->_private->{last_state}->{for_html} ){
$self->_private->{no_escape} = 0;
# Save it to the next stack up so we can pop it again (we made two stacks).
$self->_save($text);
$self->_decrease_indent;
$text = join "\n", '<div>', $self->_chomp_all($self->_pop_stack_text), '</div>';
}
$self->_save_block($text);
}
# Data events will be emitted for any formatted regions that have been enabled
# (by default, `markdown` and `html`).
sub start_Data {
my ($self) = @_;
# TODO: limit this to what's in attr?
$self->_private->{no_escape}++;
$self->_new_stack;
}
sub end_Data {
my ($self) = @_;
my $text = $self->_pop_stack_text;
$self->_private->{no_escape}--;
$self->_save_block($text);
}
## Codes ##
sub start_B { $_[0]->_save('**') }
sub end_B { $_[0]->start_B() }
sub start_I { $_[0]->_save('_') }
sub end_I { $_[0]->start_I() }
sub start_C {
my ($self) = @_;
$self->_new_stack;
$self->_private->{no_escape}++;
}
sub end_C {
my ($self) = @_;
$self->_private->{no_escape}--;
$self->_save( $self->_wrap_code_span($self->_pop_stack_text) );
}
# Use code spans for F<>.
sub start_F { shift->start_C(@_); }
sub end_F { shift ->end_C(@_); }
sub start_S { $_[0]->_private->{nbsp}++; }
sub end_S { $_[0]->_private->{nbsp}--; }
sub start_L {
my ($self, $flags) = @_;
$self->_new_stack;
push @{ $self->_private->{link} }, $flags;
}
sub end_L {
my ($self) = @_;
my $flags = pop @{ $self->_private->{link} }
or die 'Invalid state: link end with no link start';
my ($type, $to, $section) = @{$flags}{qw( type to section )};
my $url = (
$type eq 'url' ? $to
: $type eq 'man' ? $self->format_man_url($to, $section)
: $type eq 'pod' ? $self->format_perldoc_url($to, $section)
: undef
);
my $text = $self->_pop_stack_text;
# NOTE: I don't think the perlpodspec says what to do with L<|blah>
# but it seems like a blank link text just doesn't make sense
if( !length($text) ){
$text =
$section ?
$to ? sprintf('"%s" in %s', $section, $to)
: ('"' . $section . '"')
: $to;
}
# FIXME: What does Pod::Simple::X?HTML do for this?
# if we don't know how to handle the url just print the pod back out
if (!$url) {
$self->_save(sprintf 'L<%s>', $flags->{raw});
return;
}
# In the url we need to escape quotes and parentheses lest markdown
# break the url (cut it short and/or wrongfully interpret a title).
# Backslash escapes do not work for the space and quotes.
# URL-encoding the space is not sufficient
# (the quotes confuse some parsers and produce invalid html).
# I've arbitratily chosen HTML encoding to hide them from markdown
# while mangling the url as litle as possible.
$url =~ s/([ '"])/sprintf '&#x%x;', ord($1)/ge;
# We also need to double any backslashes that may be present
# (lest they be swallowed up) and stop parens from breaking the url.
$url =~ s/([\\()])/\\$1/g;
# TODO: put section name in title if not the same as $text
$self->_save('[' . $text . '](' . $url . ')');
}
sub start_X {
$_[0]->_new_stack;
}
sub end_X {
my ($self) = @_;
my $text = $self->_pop_stack_text;
# TODO: mangle $text?
# TODO: put <a name="$text"> if configured
}
# A code span can be delimited by multiple backticks (and a space)
# similar to pod codes (C<< code >>), so ensure we use a big enough
# delimiter to not have it broken by embedded backticks.
sub _wrap_code_span {
my ($self, $arg) = @_;
my $longest = 0;
while( $arg =~ /([`]+)/g ){
my $len = length($1);
$longest = $len if $longest < $len;
}
my $delim = '`' x ($longest + 1);
my $pad = $longest > 0 ? ' ' : '';
return $delim . $pad . $arg . $pad . $delim;
}
## Link Formatting (TODO: Move this to another module) ##
sub format_man_url {
my ($self, $to) = @_;
my ($page, $part) = ($to =~ /^ ([^(]+) (?: \( (\S+) \) )? /x);
return $self->man_url_prefix . ($part || 1) . '/' . ($page || $to);
}
sub format_perldoc_url {
my ($self, $name, $section) = @_;
my $url_prefix = $self->perldoc_url_prefix;
my $url = '';
# If the link is to another module (external link).
if ($name) {
$url = $url_prefix . $name;
}
# See https://rt.cpan.org/Ticket/Display.html?id=57776
# for a discussion on the need to mangle the section.
if ($section){
my $method = $url
# If we already have a prefix on the url it's external.
? $self->perldoc_fragment_format
# Else an internal link points to this markdown doc.
: $self->markdown_fragment_format;
$method = 'format_fragment_' . $method
unless ref($method);
{
# Set topic to enable code refs to be simple.
local $_ = $section;
$section = $self->$method($section);
}
$url .= '#' . $section;
}
return $url;
}
# TODO: simple, pandoc, etc?
sub format_fragment_markdown {
my ($self, $section) = @_;
# If this is an internal link (to another section in this doc)
# we can't be sure what the heading id's will look like
# (it depends on what is rendering the markdown to html)
# but we can try to follow popular conventions.
# http://johnmacfarlane.net/pandoc/demo/example9/pandocs-markdown.html#header-identifiers-in-html-latex-and-context
#$section =~ s/(?![-_.])[[:punct:]]//g;
#$section =~ s/\s+/-/g;
$section =~ s/\W+/-/g;
$section =~ s/-+$//;
$section =~ s/^-+//;
$section = lc $section;
#$section =~ s/^[^a-z]+//;
$section ||= 'section';
return $section;
}
{
# From Pod::Simple::XHTML 3.28.
# The strings gets passed through encode_entities() before idify().
# If we don't do it here the substitutions below won't operate consistently.
# encode_entities {
my %entities = (
q{>} => 'gt',
q{<} => 'lt',
q{'} => '#39',
q{"} => 'quot',
q{&} => 'amp',
);
my
$ents = join '', keys %entities;
# }
sub format_fragment_pod_simple_xhtml {
my ($self, $t) = @_;
# encode_entities {
$t =~ s/([$ents])/'&' . ($entities{$1} || sprintf '#x%X', ord $1) . ';'/ge;
# }
# idify {
for ($t) {
s/<[^>]+>//g; # Strip HTML.
s/&[^;]+;//g; # Strip entities.
s/^\s+//; s/\s+$//; # Strip white space.
s/^([^a-zA-Z]+)$/pod$1/; # Prepend "pod" if no valid chars.
s/^[^a-zA-Z]+//; # First char must be a letter.
s/[^-a-zA-Z0-9_:.]+/-/g; # All other chars must be valid.
s/[-:.]+$//; # Strip trailing punctuation.
}
# }
return $t;
}
}
sub format_fragment_pod_simple_html {
my ($self, $section) = @_;
# From Pod::Simple::HTML 3.28.
# section_name_tidy {
$section =~ s/^\s+//;
$section =~ s/\s+$//;
$section =~ tr/ /_/;
$section =~ tr/\x00-\x1F\x80-\x9F//d if 'A' eq chr(65); # drop crazy characters
#$section = $self->unicode_escape_url($section);
# unicode_escape_url {
$section =~ s/([^\x00-\xFF])/'('.ord($1).')'/eg;
# Turn char 1234 into "(1234)"
# }
$section = '_' unless length $section;
return $section;
# }
}
sub format_fragment_metacpan { shift->format_fragment_pod_simple_xhtml(@_); }
sub format_fragment_sco { shift->format_fragment_pod_simple_html(@_); }
1;
__END__
#line 1265