package Pugs::Emitter::Rule::Perl5::CharClass;
use strict;
use Data::Dumper;
use vars qw( %char_class );
BEGIN {
%char_class = map { $_ => 1 } qw(
alpha alnum ascii blank
cntrl digit graph lower
print punct space upper
word xdigit
);
}
# input format:
# [
# '+alpha'
# '-[z]'
# ]
# TODO - set composition logic
# ( ( ( before +alpha ) | before +digit ) before not-alpha ) before not-digit )
sub emit {
#print Dumper( $_[0] );
#print Dumper( @{$_[0]} );
my @c = map { "$_" } @{$_[0]};
#print Dumper( @c );
my $out = '';
#my $last_cmd = '';
for ( @c ) {
my ( $op, $cmd ) = /(.)(.*)/;
$cmd =~ s/\s//g;
#if ( $last_cmd eq '-'
# && substr($cmd,0,1) eq '+'
# )
#{
# $out .= '|';
#}
#$last_cmd = substr($cmd,0,1);
$cmd =~ s/\.\./-/g; # ranges
# TODO - \o \O
if ( $cmd =~ /^ \[ \\ c \[ (.*) \] \] /x ) {
#$cmd = "(?:\\N{" . join( "}|\\N{", split( /\s*;\s*/, $1 ) ) . "})";
$cmd = "[\\N{" . join( "}\\N{", split( /\s*;\s*/, $1 ) ) . "}]";
}
elsif ( $cmd =~ /^ \[ \\ C \[ (.*) \] \] /x ) {
#$cmd = "(?!\\N{" . join( "}|\\N{", split( /\s*;\s*/, $1 ) ) . "})\\X";
$cmd = "[^\\N{" . join( "}\\N{", split( /\s*;\s*/, $1 ) ) . "}]";
}
elsif ( $cmd =~ /^ \[ \\ x \[ (.*) \] \] /x ) {
$cmd = "(?:\\x{$1})";
}
elsif ( $cmd =~ /^ \[ \\ X \[ (.*) \] \] /x ) {
$cmd = "(?!\\x{$1})\\X";
#$cmd = "[^\\x{$1}]";
}
elsif ( $cmd =~ /^ \s* \[ (.*) /x ) {
$cmd = '[' . $1;
}
elsif ( $cmd =~ /^ \s* (.*) /x ) {
my $name = $1;
$cmd = ( exists $char_class{$name} )
? "[[:$name:]]"
: "\\p{$name}";
}
if ( $op eq '+' ) {
$out .=
( $out eq '' )
? '(?=' . $cmd . ')'
: '|(?=' . $cmd . ')';
}
elsif ( $op eq '-' ) {
$out .= '(?!' . $cmd . ')';
}
else {
#print Dumper( @c ), ' == ', $out, "\n";
die "invalid character set op: $op";
}
}
$out = "(?:$out)\\X";
#print Dumper( @c ), ' == ', $out, "\n";
return $out;
}
1;