The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.

use v6;
module Pod::Event::Parser-0.0.2;

my @EVENTS = (
    'start_document',
    'end_document',
    
    'start_element',
    'end_element',

    'start_modifier',
    'end_modifier',

    'verbatim',        
    'string',
    'newline'
);

sub parse (Str $filename, Hash %events is rw) returns Void is export {
    
    for @EVENTS -> $e { %events{$e} = sub { } unless ?%events{$e} }
    
    my $is_parsing = 0;
    my @for_or_begin;
    
    my $fh = open($filename);    
    loop {
        my $line = $fh.readline;
        last unless $line.defined; # exit as soon as possible
        chomp($line);
        if ($line ~~ rx:perl5{^=pod}) {
            $is_parsing = 1;
            %events<start_document>();
        }
        else {
            if ($is_parsing) {
                given $line {
                    when rx:perl5{^=cut} { 
                        $is_parsing = 0;
                        %events<end_document>();                    
                    }
                    when rx:perl5{^=head(\d)\s(.*?)$} {
                        my $size = "$0";
                        %events<start_element>('header', $size);
                        interpolate($1, %events);
                        %events<end_element>('header', $size);                       
                    }
                    when rx:perl5{^=over\s(\d)$} {
                        %events<start_element>('list', "$0");
                    }
                    when rx:perl5{^=item\s(.*?)$} {
                        %events<start_element>('item');
                        interpolate($0, %events);
                        %events<end_element>('item');                     
                    }
                    when rx:perl5{^=back} {
                        %events<end_element>('list');
                    }
                    when rx:perl5{^=begin\s(.*?)$} {
                        push(@for_or_begin, 'begin');
                        %events<start_element>('begin', "$0");
                    }                    
                    when rx:perl5{^=for\s(.*?)$} {
                        push(@for_or_begin, 'for');
                        %events<start_element>('for', "$0");
                    }                    
                    when rx:perl5{^=end\s(.*?)$} {
                        my $last_for_or_begin = pop(@for_or_begin);
                        if ($last_for_or_begin eq 'for') {
                            %events<end_element>('for', "$0");
                        }
                        else {
                            %events<end_element>('begin', "$0");
                        }
                    }                    
                    when rx:perl5{^\s(.*?)$} {
                        my $verbatim = "$0\n";
                        my $_line = $fh.readline;
                        while (defined($_line)             && 
                              !($_line ~~ rx:perl5{^\n$})  && 
                                $_line ~~ rx:perl5{^\s(.*?)$} ) {                                
                            $verbatim ~= "$0\n";
                            $_line = $fh.readline;
                        }                        
                        %events<start_element>('verbatim');
                        %events<verbatim>($verbatim);
                        %events<end_element>('verbatim');                        
                    }
                    default {
                        if ($line ne '') {
                            %events<start_element>('paragraph');
                            interpolate($line, %events);   
                            my $_line = $fh.readline;
                            while (defined($_line)             && 
                                   !($_line ~~ rx:perl5{^\n$}) ) {
                                chomp($_line);
                                interpolate($_line, %events);
                                $_line = $fh.readline;
                            }                          
                            %events<end_element>('paragraph');
                        }
                        else {
                            %events<newline>();
                        }
                    }
                }
            } 
        }
    }
    $fh.close();
}

sub interpolate (Str $text, Hash %events) returns Void {    
    %events<start_element>('line_interpolation');
    # grab all the tokens with a split
	my @tokens = split(rx:perl5{([A-Z]<\s+|[A-Z]<|\s+>|>)}, $text);
    @tokens = @tokens.grep:{ defined($_) }; 
	# this is a memory stack for modifiers
	# it helps us track down problems
	my @modifier_stack;
	for (@tokens) -> $token {	
        given $token {
            when rx:perl5{^([A-Z])<$} {
                push(@modifier_stack, $0);
                %events<start_modifier>($0);
            }
            when rx:perl5{^>$} {
                # if we dont have anything on the stack
                # then we are not balanced, and should die
                (+@modifier_stack) || die "Unbalanced close modifier (>) found";
                # if we have one the stack ..
                # then this one probably matches,
                # so we can pop that modifier off  
                my $last_mod = pop(@modifier_stack);
                %events<end_modifier>($last_mod);
            }
            default {
                %events<string>($token);
            }
        }
	}
	# if we find we still have some modifiers
	# on the stack, then we have an error, so 
	# we need to throw an exception about it.
	(!@modifier_stack) 
		|| die "Unbalanced modifier(s) found (" ~ join(", ", @modifier_stack) ~ ") in the string (@tokens)";
    %events<end_element>('line_interpolation');        
}

=pod

=head1 NAME

Pod::Event::Parser - A simple event based POD parser

=head1 SYNOPSIS

  use v6;
  use Pod::Event::Parser;
  
  parse("path/to/file.pod", %event_handlers);

=head1 DESCRIPTION

This is a straightforward event based POD parser, it is modeled after XML SAX 
parsers and is currently in the early stages of development.   

=head1 LIMITATIONS & CAVEATS

Basic parsing functionality is fairly solid right now. The F<t/complex.t> test 
actually roundtrip parses this document successfully. However, more complex POD 
documents may not do as well. Error reporting is still pretty minimal and the 
parser is not very forgiving about whitespace in some situations. 

=head1 FUNCTIONS

=over 4

=item B<parse (Str $filename, Hash %events)>

The main C<parse> function currently takes a C<$filename> and a Hash of C<%events> 
which it uses to process each line of the file.

=back

=head1 EVENTS

=head2 Document Events

=over 4

=item I<start_document>

This event is fired when a '=pod' directive is reached.

=item I<end_document>

This event is fired when a '=cut' directive is reached.

=back

=head2 Element Events

For each POD element, an event is fired, the first argument to all these events is
the C<$event_type> (header, list, item, etc.). 

=over 4

=item I<start_element ('header', $size)>

This event is fired once the '=head(n)' directive is reached. This does not include 
the text of the header, that will be processed as a interpolated string (to allow for
any modifiers).

=item I<end_element ('header')>

This event is fired after the '=head(n)' directive and it's accompanying text has been
parsed.

=item I<start_element ('begin')>

This event is fired when a '=begin' directive is reached.

=item I<end_element ('begin')>

This event is fired when a '=end' directive (which is preceeded by an '=begin' directive) 
is reached.

=item I<start_element ('for')>

This event is fired when a '=for' directive is reached.

=item I<end_element ('for')>

This event is fired when a '=end' directive (which is preceeded by an '=for' directive) 
is reached.    
    
=item I<start_element ('list')>

This event is fired when an '=over' directive is reached.

=item I<end_element ('list')>

This event is fired when a '=back' directive is reached.
    
=item I<start_element ('item')>

This event is fired when an '=item' directive is reached, the next event fired will be
any text which follows the item.

=item I<end_element ('item')>

This event is fired once the last '=item' directive, followed by any text on the same line.

=item I<start_element ('verbatim')>

This event is fired once a verbatim section is found.
    
=item I<end_element ('verbatim')>

This event closes a verbatim section.

=item I<start_element ('line_interpolation')>

This event is begun when any block of characters is interpolated for modifiers.

=item I<end_element ('line_interpolation')>

This event is fired when interpolation is complete.

=back

=head2 Modifier Events

=over 4

=item I<start_modifier>

This event is fired when a modifier is encountered.

=item I<end_modifier>

This event closes a modifier.

=back

=head2 Text Events

=over 4

=item I<verbatim>

This event handles the entire verbatim string.

=item I<string>

This event processes raw strings.

=item I<newline>

This event handles newlines in the text, it is a specialization of the 'string' event.

=back

=head1 SEE ALSO

=over 4

=item Any of the Perl5 POD parsers

=item XML::SAX

=back

=head1 AUTHOR

stevan little, E<lt>stevan@iinteractive.comE<gt>

=head1 COPYRIGHT

Copyright (c) 2005. Stevan Little. All rights reserved.

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

See L<http://www.perl.com/perl/misc/Artistic.html>

=cut