## Base Class for Creating Text Format Parsers
#
# Document::Parser is a base class that you can use to easily generate a
# parser for text document markups (like Wiki or POD markups).
#
# See this parser as an example:
#
# http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm
#
# And this module for usage of the parser:
#
# http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm
#
# Copyright (c) 2007. Ingy döt Net. All rights reserved.
#
# Licensed under the same terms as Perl itself.
##
package Document::Parser;
use strict;
use warnings;
## Synopsis:
#
# package MyParser;
# use base 'Document::Parser';
#
# sub create_grammar {
# return {
# # ... define a grammar hash here ...
# };
# }
##
##------------------------------------------------------------------------------
# Parser object constructor/initializer
##------------------------------------------------------------------------------
sub new {
my $class = shift;
return bless { @_ }, ref($class) || $class;
}
##------------------------------------------------------------------------------
# $parsed = $parser->parse($wikitext);
##------------------------------------------------------------------------------
sub parse {
my $self = shift;
$self->{input} ||= shift;
$self->{grammar} ||= $self->set_grammar;
$self->{receiver} ||= $self->set_receiver;
$self->{receiver}->init;
$self->parse_blocks('top');
return $self->{receiver}->content;
}
##
# Call `set_receiver` to reset the receiver for a new parse.
sub set_receiver {
my $self = shift;
$self->{receiver} = shift || $self->create_receiver;
}
sub set_grammar {
my $self = shift;
$self->{grammar} = shift || $self->create_grammar;
}
#-------------------------------------------------------------------------------
# Parse input into a series of blocks. With each iteration the parser must
# match a block at position 0 of the text, and remove that block from the
# input reparse it further. This continues until there is no input left.
#-------------------------------------------------------------------------------
sub parse_blocks {
my $self = shift;
my $container_type = shift;
my $types = $self->{grammar}{$container_type}{blocks};
while (my $length = length $self->{input}) {
for my $type (@$types) {
my $matched = $self->find_match(matched_block => $type) or next;
substr($self->{input}, 0, $matched->{end}, '');
$self->handle_match($type, $matched);
last;
}
die $self->reduction_error
unless length($self->{input}) < $length;
}
return;
}
#-------------------------------------------------------------------------------
# This code parses a chunk into interleaved pieces of plain text and
# phrases. It repeatedly tries to match every possible phrase and
# then takes the match closest to the start. Everything before a
# match is written as text. Matched phrases are subparsed according
# to their rules. This continues until the input is all eaten.
#-------------------------------------------------------------------------------
sub parse_phrases {
my $self = shift;
my $container_type = shift;
my $types = $self->{grammar}{$container_type}{phrases};
while (length $self->{input}) {
my $match;
for my $type (@$types) {
my $matched = $self->find_match(matched_phrase => $type) or next;
if (not defined $match or $matched->{begin} < $match->{begin}) {
$match = $matched;
$match->{type} = $type;
last if $match->{begin} == 0;
}
}
if (! $match) {
$self->{receiver}->text_node($self->{input});
last;
}
my ($begin, $end, $type) = @{$match}{qw(begin end type)};
$self->{receiver}->text_node(substr($self->{input}, 0, $begin))
unless $begin == 0;
substr($self->{input}, 0, $end, '');
$type = $match->{type};
$self->handle_match($type, $match);
}
return;
}
sub find_match {
my ($self, $matched_func, $type) = @_;
my $matched;
if (my $regexp = $self->{grammar}{$type}{match}) {
if (ref($regexp) eq 'ARRAY') {
for my $re (@$regexp) {
if ($self->{input} =~ $re) {
$matched = $self->$matched_func;
last;
}
}
return unless $matched;
}
else {
return unless $self->{input} =~ $regexp;
$matched = $self->$matched_func;
}
}
else {
my $func = "match_$type";
$matched = $self->$func or return;
}
return $matched;
}
sub handle_match {
my ($self, $type, $match) = @_;
my $func = "handle_$type";
if ($self->can($func)) {
$self->$func($match, $type);
}
else {
my $grammar = $self->{grammar}{$type};
my $parse = $grammar->{blocks}
? 'parse_blocks'
: 'parse_phrases';
my @filter = $grammar->{filter}
? ($grammar->{filter})
: ();
$self->subparse($parse, $match, $type, @filter);
}
}
sub subparse {
my ($self, $func, $match, $type, $filter) = @_;
$self->{receiver}->begin_node($type);
my $parser = $self->new(
grammar => $self->{grammar},
receiver => $self->{receiver}->new,
input => $filter
? do { $_ = $match->{text}; &$filter(); $_ }
: $match->{text},
);
$parser->$func($type);
$self->{receiver}->insert($parser->{receiver});
$self->{receiver}->end_node($type);
}
#-------------------------------------------------------------------------------
# Helper functions
#
# These are the odds and ends called by the code above.
#-------------------------------------------------------------------------------
sub reduction_error {
my $self = shift;
return ref($self) . qq[ reduction error for:\n"$self->{input}"];
}
sub matched_block {
my $begin = defined $_[2] ? $_[2] : $-[0];
die "All blocks must match at position 0"
if "$begin" ne "0";
return +{
text => ($_[1] || $1),
end => ($_[3] || $+[0]),
};
}
sub matched_phrase {
return +{
text => ($_[1] || $1),
begin => (defined $_[2] ? $_[2] : $-[0]),
end => ($_[3] || $+[0]),
};
}
1;
=for perldoc
This POD generated by Perldoc-0.21.
DO NOT EDIT. Your changes will be lost.
=encoding utf8
=head1 NAME
Document::Parser - Base Class for Creating Text Format Parsers
=head1 SYNOPSIS
package MyParser;
use base 'Document::Parser';
sub create_grammar {
return {
# ... define a grammar hash here ...
};
}
=head1 DESCRIPTION
Document::Parser is a base class that you can use to easily generate a
parser for text document markups (like Wiki or POD markups).
See this parser as an example:
http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm
And this module for usage of the parser:
http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm
=head1 AUTHOR
Ingy döt Net
=head1 COPYRIGHT
Copyright (c) 2007. Ingy döt Net. All rights reserved.
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
See http://www.perl.com/perl/misc/Artistic.html
=cut