The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
use v6;
module Kwid::Event::Parser-0.0.1;

my @EVENTS = (
    'start_document',
    'end_document',
    
    'start_element',
    'end_element',

    'start_modifier',
    'end_modifier',

    'verbatim',        
    'string',
    'newline'
);

sub parse (Str $filename, Hash %events is rw) returns Void is export {
    
    for @EVENTS -> $e { %events{$e} = sub { } unless ?%events{$e} }
    
    my $is_parsing = 0;    
    my $in_list    = 0;

    my $fh = open($filename);    
    loop {
        my $line = $fh.readline;
        last unless $line.defined; # exit as soon as possible
        if ($line ~~ rx:perl5{^=kwid}) {
            $is_parsing = 1;
            %events<start_document>();
        }
        else {
            if ($is_parsing) {
                given $line {
                    when rx:perl5{^=cut} { 
                        if ($in_list) {
                            %events<end_element>('list');
                            $in_list = 0;
                        }                    
                        $is_parsing = 0;
                        %events<end_document>();                    
                    }
                    when rx:perl5{^(=+)\s(.*?)$} {
                        if ($in_list) {
                            %events<end_element>('list');
                            $in_list = 0;
                        }
                        my $size = $0.bytes;
                        %events<start_element>('header', $size);
                        interpolate($1, %events);
                        %events<end_element>('header', $size);                       
                    }
                    when rx:perl5{^([\*\-]+)\s(.*?)$} {
                        if (!$in_list) {
                            %events<start_element>('list');
                            $in_list = 1;
                        }
                        my $depth = $0.bytes;
                        %events<start_element>('item');
                        interpolate($1, %events);
                        %events<end_element>('item');                     
                    }                   
                    when rx:perl5{^\s(.*?)$} {
                        if ($in_list) {
                            %events<end_element>('list');
                            $in_list = 0;
                        }                    
                        my $verbatim = "$0\n";
                        my $_line = $fh.readline;
                        while (defined($_line)             && 
                              !($_line ~~ rx:perl5{^\n$})  && 
                                $_line ~~ rx:perl5{^\s(.*?)$} ) {                                
                            $verbatim ~= "$0\n";
                            $_line = $fh.readline;
                        }           
                        $verbatim .= chomp;
                        %events<start_element>('verbatim');
                        %events<verbatim>($verbatim);
                        %events<end_element>('verbatim');                        
                    }
                    default {
                        if ($line ne '') {
                            if ($in_list) {
                                %events<end_element>('list');
                                $in_list = 0;
                            }                        
                            %events<start_element>('paragraph');
                            interpolate($line, %events);   
                            my $_line = $fh.readline;
                            while (defined($_line)             && 
                                   !($_line ~~ rx:perl5{^\n$}) ) {
                                interpolate($_line, %events);
                                $_line = $fh.readline;
                            }                          
                            %events<end_element>('paragraph');
                        }
                        else {
                            %events<newline>();
                        }
                    }
                }
            } 
        }
    }
    $fh.close();
}

sub interpolate (Str $text, Hash %events) returns Void {  
    %events<start_element>('line_interpolation');
    # grab all the tokens with a split
    my @tokens = split(rx:perl5{([\*\/\`])}, $text); #`
    @tokens = @tokens.grep:{ defined($_) }; 
    # this is a memory stack for modifiers
    # it helps us track down problems
    my @modifier_stack;
    my $in_code = 0;
    for (@tokens) -> $token {    
        given $token {
            when rx:perl5{([\*\/\`])} { #`
                if ($in_code && $token ne '`') {
                    %events<string>($token);                    
                }
                else {
                    # if the stack is empty then ...
                    unless (@modifier_stack) {
                        # push the latest modifier on to it ...
                        @modifier_stack.push($token);
                        # and start an event ..
                        %events<start_modifier>($token);    
                        $in_code = 1 if $token eq '`';
                    }
                    else {
                        # if the top of the stack is 
                        # equal to our token then ...
                        if ($token eq @modifier_stack[-1]) {
                            # pop the top off ...
                            @modifier_stack.pop();
                            # and end the event ...
                            %events<end_modifier>($token);    
                            $in_code = 0 if $token eq '`';                            
                        }
                        else {
                            # push the latest modifier on to it ...
                            @modifier_stack.push($token);
                            # and start an event ..
                            %events<start_modifier>($token);                         
                            $in_code = 1 if $token eq '`';                            
                        }
                    }
                }
            }
            default {
                %events<string>($token);
            }
        }
    }
    # if we find we still have some modifiers
    # on the stack, then we have an error, so 
    # we need to throw an exception about it.
    (!@modifier_stack) 
        || die "Unbalanced modifier(s) found (" ~ join(", ", @modifier_stack) ~ ") in the string (@tokens)";
    %events<end_element>('line_interpolation');      
}

=kwid

= THOUGHTS

Some basic notes here as I develop this.

- how should I handle blocks of text which seem to still be inside a list?

Basically the item body, much like this text is.

- how should I handle *, / and ` inside text? 

One idea is that if I get to the end of the line and find no closing match, I should
ignore the opening one. However, this means I should defer events until I get to the
end of the line. I could probably do that, but it could get ugly.s

= TODO

- add in URL handling with []

=cut