use strict;
use warnings;
use Clone;
package Regexp::Whitespace::Parser;
$Regexp::Whitespace::Parser::VERSION = '0.001_1'; # TRIAL
use YAPE::Regex qw( Regexp::Whitespace::Parser );
package Regexp::Whitespace::Parser::Element;
$Regexp::Whitespace::Parser::Element::VERSION = '0.001_1'; # TRIAL
our @ISA;
BEGIN {
push @ISA, qw( Clone );
}
sub convert {
# clone by default
return shift->clone;
}
package Regexp::Whitespace::Parser::container;
$Regexp::Whitespace::Parser::container::VERSION = '0.001_1'; # TRIAL
sub convert {
my $self = shift;
my $clone = $self->clone(1); # shallow copy
my @content = map { $_->convert } @{$self->{CONTENT}};
$clone->{CONTENT} = \@content;
return $clone;
}
# register this package into @ISA of container types
{
my @container_types = qw( cut lookahead lookbehind group capture );
no strict 'refs';
for my $type (@container_types) {
unshift @{ 'Regexp::Whitespace::Parser::' . $type . '::ISA' }, __PACKAGE__;
}
}
package Regexp::Whitespace::Parser::exact;
$Regexp::Whitespace::Parser::exact::VERSION = '0.001_1'; # TRIAL
sub convert {
my $self = shift;
my $exact = $self->exact_text;
# are there ocurrences of \s ?
if ( $exact =~ /\s/ ) {
my @pieces;
if ( length $exact > 1 ) {
# note: only 'text' types need this loop,
# being the only one whose exact text may have
# a length greater than 1
# assertions
die "panic: quantity modifier should not present for multi-character text" if $self->quant;
die "panic: non-greedy modifier should not present for multi-character text" if $self->ngreed;
LOOP : {
if ( $exact =~ / \G \z /xgc ) {
last LOOP;
}
if ( $exact =~ / \G \s+ /xgc ) {
# replace matches of /\s+/ with a macro '\s+'
push @pieces, Regexp::Whitespace::Parser::macro->new( 's', '+', '' );
redo LOOP;
}
if ( $exact =~ / \G (\S+) /xgc ) {
push @pieces, Regexp::Whitespace::Parser::text->new( $1, '', '' );
redo LOOP;
}
}
} else {
# FIXME: these conversion rules needs checking
# s is any char that matches /\s/
# s becomes \s+
# s? becomes \s*
# s* becomes \s*
# s{0} becomes \s*
# s{0,N} becomes \s*
# s{M,N} becomes \s+
# s+ becomes \s+
#
# the non-greedy flag is kept (don't know if that's correct)
my ($q, $ng) = ($self->quant, $self->ngreed);
my $nq = ( $q =~ /\A ( [?*] | [{]0 ) /x ) ? '*' : '+';
return Regexp::Whitespace::Parser::macro->new( 's', $nq, $ng );
# TODO: some tests would be nice
}
return @pieces;
} else {
# no needed conversion
return $self->clone;
}
}
# register this package into @ISA of exact types
{
my @exact_types = qw( text oct hex slash ctrl named ); # ?! utf8hex
no strict 'refs';
for my $type (@exact_types) {
unshift @{ 'Regexp::Whitespace::Parser::' . $type . '::ISA' }, __PACKAGE__;
}
}
package Regexp::Whitespace::Parser::text;
$Regexp::Whitespace::Parser::text::VERSION = '0.001_1'; # TRIAL
sub exact_text {
return shift->{TEXT};
}
package Regexp::Whitespace::Parser::oct;
$Regexp::Whitespace::Parser::oct::VERSION = '0.001_1'; # TRIAL
sub exact_text {
return chr oct(shift->{TEXT});
}
package Regexp::Whitespace::Parser::hex;
$Regexp::Whitespace::Parser::hex::VERSION = '0.001_1'; # TRIAL
sub exact_text {
return chr hex(shift->{TEXT});
}
#package ...::utf8hext; # FIXME wait for new release of YAPE::Regex
package Regexp::Whitespace::Parser::slash;
$Regexp::Whitespace::Parser::slash::VERSION = '0.001_1'; # TRIAL
my %known_sequences = (
't' => "\t",
'n' => "\n",
'r' => "\r",
'a' => "\a",
'f' => "\f",
'b' => "\b",
'e' => "\e",
);
sub exact_text {
my $t = shift->{TEXT};
return $known_sequences{$t} || $t;
}
package Regexp::Whitespace::Parser::ctrl;
$Regexp::Whitespace::Parser::ctrl::VERSION = '0.001_1'; # TRIAL
sub exact_text {
my $t = shift->{TEXT};
return chr( ord(uc $t) ^ 0x40 );
}
package Regexp::Whitespace::Parser::named;
$Regexp::Whitespace::Parser::named::VERSION = '0.001_1'; # TRIAL
sub exact_text {
require charnames;
return charnames::vianame(shift->{TEXT});
}
1;