The London Perl and Raku Workshop takes place on 26th Oct 2024. If your company depends on Perl, please consider sponsoring and/or attending.
package Language::P::Parser::Regex;

use strict;
use warnings;

use base qw(Class::Accessor::Fast);

use Language::P::Lexer qw(:all);
use Language::P::ParseTree qw(:all);

__PACKAGE__->mk_ro_accessors( qw(lexer generator runtime
                                 interpolate) );

# will be used to parse embedded code blocks
sub parser { die; }

sub parse_string {
    my( $self, $string ) = @_;

    $self->{lexer} = Language::P::Lexer->new( { string => $string } );
    $self->_parse;
}

sub _parse {
    my( $self ) = @_;
    my( @values );

    $self->lexer->quote( { interpolate          => $self->interpolate,
                           pattern              => 1,
                           interpolated_pattern => 0,
                           } );

    my( $in_group, $st ) = ( 0, \@values );
    for(;;) {
        my $value = $self->lexer->lex_quote;

        if( $value->[O_TYPE] == T_STRING ) {
            push @$st, Language::P::ParseTree::Constant->new
                           ( { flags => CONST_STRING,
                               value => $value->[O_VALUE],
                               } );
        } elsif( $value->[O_TYPE] == T_PATTERN ) {
            if( $value->[O_VALUE] eq ')' ) {
                die 'Unmatched ) in regex' unless $in_group;

                --$in_group;
                $st = pop @values;
            } elsif( $value->[O_VALUE] eq '(?' ) {
                ++$in_group;
                my $type = $self->lexer->lex_pattern_group;

                if( $type->[O_VALUE] eq ':' ) {
                    push @$st, Language::P::ParseTree::RXGroup->new
                                   ( { components => [],
                                       capture    => 0,
                                       } );
                } else {
                    # remaining (?...) constructs
                    die "Unhandled (?" . $type->[O_VALUE] . ") in regex";
                }

                my $nst = $st->[-1]->components;
                push @values, $st;
                $st = $nst;
            } elsif( $value->[O_VALUE] eq '(' ) {
                ++$in_group;
                push @$st, Language::P::ParseTree::RXGroup->new
                               ( { components => [],
                                   capture    => 1,
                                   } );
                my $nst = $st->[-1]->components;
                push @values, $st;
                $st = $nst;
            } elsif( $value->[O_VALUE] eq '|' ) {
                my $alt = Language::P::ParseTree::RXAlternation->new
                              ( { left  => [ @$st ],
                                  right => [],
                                  } );
                @$st = $alt;
                $st = $alt->right;
            } elsif( $value->[O_RX_REST]->[0] == T_QUANTIFIER ) {
                die 'Nothing to quantify in regex' unless @$st;

                if(    $st->[-1]->is_constant
                    && length( $st->[-1]->value ) > 1 ) {
                    my $last = chop $st->[-1]->{value}; # XXX

                    push @$st, Language::P::ParseTree::Constant->new
                                   ( { flags => CONST_STRING,
                                       value => $last,
                                       } );
                }

                $st->[-1] = Language::P::ParseTree::RXQuantifier->new
                                ( { node   => $st->[-1],
                                    min    => $value->[O_RX_REST]->[1],
                                    max    => $value->[O_RX_REST]->[2],
                                    greedy => $value->[O_RX_REST]->[3],
                                    } );
            } elsif( $value->[O_RX_REST]->[0] == T_ASSERTION ) {
                push @$st, Language::P::ParseTree::RXAssertion->new
                               ( { type => $value->[O_RX_REST]->[1],
                                   } );
            } elsif( $value->[O_RX_REST]->[0] == T_CLASS ) {
                push @$st, Language::P::ParseTree::RXSpecialClass->new
                               ( { type => $value->[O_RX_REST]->[1],
                                   } );
            } elsif( $value->[O_RX_REST]->[0] == T_CLASS_START ) {
                push @$st, Language::P::ParseTree::RXClass->new
                               ( { elements => [],
                                   } );

                _parse_charclass( $self, $st->[-1] );
            } else {
                Carp::confess $value->[O_TYPE], ' ', $value->[O_VALUE], ' ',
                              $value->[O_RX_REST]->[0];
            }
        } elsif( $value->[O_TYPE] == T_EOF ) {
            last;
        } elsif( $value->[O_TYPE] == T_DOLLAR || $value->[O_TYPE] == T_AT ) {
                Carp::confess $value->[O_TYPE], ' ', $value->[O_VALUE];
        }
    }

    die 'Unmatched ( in regex' if $in_group;

    return \@values;
}

sub _parse_charclass {
    my( $self, $class ) = @_;
    my $st = $class->elements;
    my @la;

    for(;;) {
        my $value = @la ? pop @la : $self->lexer->lex_charclass;
        last if $value->[O_TYPE] == T_CLASS_END;
        if( $value->[O_TYPE] == T_STRING ) {
            my $next = $self->lexer->lex_charclass;

            if( $next->[O_TYPE] == T_MINUS ) {
                my $next_next = $self->lexer->lex_charclass;
                if( $next_next->[O_TYPE] == T_STRING ) {
                    push @$st, Language::P::ParseTree::RXRange->new
                                   ( { start => $value->[O_VALUE],
                                       end   => $next_next->[O_VALUE],
                                       } );
                    next;
                } else {
                    push @la, $next_next, $next;
                }
            } else {
                push @la, $next;
            }
        } elsif( $value->[O_TYPE] == T_CLASS ) {
            push @$st, Language::P::ParseTree::RXSpecialClass->new
                           ( { type => $value->[O_VALUE],
                                } );
            next;
        }

        push @$st, $value->[O_VALUE];
    }
}

1;