package Inline::CPP::grammar; ## no critic (Package)
use strict;
use warnings;
use vars qw($TYPEMAP_KIND $class_part $class_decl $star);
# Dev versions will have a _0xx suffix.
# We eval the $VERSION to accommodate dev version numbering as described in
# perldoc perlmodstyle
our $VERSION = '0.49';
#$VERSION = eval $VERSION; ## no critic (eval)
# Parse::RecDescent 1.90 and later have an incompatible change
# 'The key of an %item entry for a repeated subrule now includes
# the repetition specifier.'
# Hence various hash keys may or may not need trailing '(s?)' depending on
# the version of Parse::RecDescent we are using.
require Parse::RecDescent;
# Deal with Parse::RecDescent's version numbers for development
# releases (eg, '1.96_000') resulting in a warning about non-numeric in >
# comparison.
{ # Lexical scope.
# Eval away the underscore. "1.96_000" => "1.96000".
# Use that "stable release" version number as the basis for our numeric
# comparison.
my $stable_version = eval $Parse::RecDescent::VERSION; ## no critic (eval)
( $class_part, $class_decl, $star ) =
map {
( $stable_version > 1.89 )
? "$_(s?)"
: $_
} qw ( class_part class_decl star );
} # End lexical scope.
#============================================================================
# Regular expressions to match code blocks, numbers, strings, parenthesized
# expressions, function calls, and macros. The more complex regexes are only
# implemented in 5.6.0 and above, so they're in eval-blocks.
#
# These are all adapted from the output of Damian Conway's excellent
# Regexp::Common module. In future, Inline::CPP may depend directly on it,
# but for now I'll just duplicate the code.
use vars qw( $code_block $string $number $parens $funccall );
#============================================================================
# $RE{balanced}{-parens=>q|{}()[]"'|}
eval <<'END'; ## no critic (eval)
$code_block = qr'(?-xism:(?-xism:(?:[{](?:(?>[^][)(}{]+)|(??{$Inline::CPP::grammar::code_block}))*[}]))|(?-xism:(?-xism:(?:[(](?:(?>[^][)(}{]+)|(??{$Inline::CPP::grammar::code_block}))*[)]))|(?-xism:(?-xism:(?:[[](?:(?>[^][)(}{]+)|(??{$Inline::CPP::grammar::code_block}))*[]]))|(?-xism:(?!)))))';
END
$code_block = qr'{[^}]*}' if $@; # For the stragglers: here's a lame regexp.
# $RE{balanced}{-parens=>q|()"'|}
eval <<'END'; ## no critic (eval)
$parens = qr'(?-xism:(?-xism:(?:[(](?:(?>[^)(]+)|(??{$Inline::CPP::grammar::parens}))*[)]))|(?-xism:(?!)))';
END
$parens = qr'\([^)]*\)' if $@; # For the stragglers: here's another
# $RE{quoted}
$string = qr'(?:(?:\")(?:[^\\\"]*(?:\\.[^\\\"]*)*)(?:\")|(?:\')(?:[^\\\']*(?:\\.[^\\\']*)*)(?:\')|(?:\`)(?:[^\\\`]*(?:\\.[^\\\`]*)*)(?:\`))';
# $RE{num}{real}|$RE{num}{real}{-base=>16}|$RE{num}{int}
$number = qr'(?:(?i)(?:[+-]?)(?:(?=[0123456789]|[.])(?:[0123456789]*)(?:(?:[.])(?:[0123456789]{0,}))?)(?:(?:[E])(?:(?:[+-]?)(?:[0123456789]+))|))|(?:(?i)(?:[+-]?)(?:(?=[0123456789ABCDEF]|[.])(?:[0123456789ABCDEF]*)(?:(?:[.])(?:[0123456789ABCDEF]{0,}))?)(?:(?:[G])(?:(?:[+-]?)(?:[0123456789ABCDEF]+))|))|(?:(?:[+-]?)(?:\d+))';
$funccall = qr/(?:[_a-zA-Z][_a-zA-Z0-9]*::)*[_a-zA-Z][_a-zA-Z0-9]*(?:$Inline::CPP::grammar::parens)?/;
#============================================================================
# Inline::CPP's grammar
#============================================================================
sub grammar {
return <<'END';
{ use Data::Dumper; }
{
sub handle_class_def {
my ($thisparser, $def) = @_;
# print "Found a class: $def->[0]\n";
my $class = $def->[0];
my @parts;
for my $part (@{$def->[1]}) { push @parts, @$_ for @$part }
push @{$thisparser->{data}{classes}}, $class
unless defined $thisparser->{data}{class}{$class};
$thisparser->{data}{class}{$class} = \@parts;
# print "Class $class:\n", Dumper \@parts;
Inline::CPP::grammar::typemap($thisparser, $class);
[$class, \@parts];
}
sub handle_typedef {
my ($thisparser, $t) = @_;
my ($name, $type) = @{$t}{qw(name type)};
# print "found a typedef: $name => $type\n";
# XXX: this doesn't handle non-class typedefs that we could handle,
# e.g. "typedef int my_int_t"
if ($thisparser->{data}{class}{$type}
&& !exists($thisparser->{data}{class}{$name})) {
push @{$thisparser->{data}{classes}}, $name;
$thisparser->{data}{class}{$name} = $thisparser->{data}{class}{$type};
Inline::CPP::grammar::typemap($thisparser, $name);
}
$t;
}
sub handle_enum {
my ($thisparser, $t) = @_;
$t;
}
}
code: part(s) {1}
part: comment
| typedef
{
handle_typedef($thisparser, $item[1]);
1;
}
| enum
{
my $t = handle_enum($thisparser, $item[1]);
push @{$thisparser->{data}{enums}}, $t;
1;
}
| class_def
{
handle_class_def($thisparser, $item[1]);
1;
}
| function_def
{
# print "found a function: $item[1]->{name}\n";
my $name = $item[1]->{name};
my $i=0;
for my $arg (@{$item[1]->{args}}) {
$arg->{name} = 'dummy' . ++$i unless defined $arg->{name};
}
Inline::CPP::grammar::strip_ellipsis($thisparser,
$item[1]->{args});
push @{$thisparser->{data}{functions}}, $name
unless defined $thisparser->{data}{function}{$name};
$thisparser->{data}{function}{$name} = $item[1];
# print Dumper $item[1];
1;
}
| all
typedef: 'typedef' class IDENTIFIER(?) '{' <commit> class_part(s?) '}' IDENTIFIER ';'
{
my ($class, $parts);
$class = $item[3][0] || 'anon_class'.($thisparser->{data}{anonclass}++);
($class, $parts)= handle_class_def($thisparser, [$class, $item{$Inline::CPP::grammar::class_part}]);
{ thing => 'typedef', name => $item[8], type => $class, body => $parts }
}
| 'typedef' IDENTIFIER IDENTIFIER ';'
{ { thing => 'typedef', name => $item[3], type => $item[2] } }
| 'typedef' /[^;]*/ ';'
{
# dprint "Typedef $item{__DIRECTIVE1__} is too heinous\n";
{ thing => 'comment'}
}
enum: 'enum' IDENTIFIER(?) '{' <leftop: enum_item ',' enum_item> '}' ';'
{
{ thing => 'enum', name => $item{IDENTIFIER}[0],
body => $item{__DIRECTIVE1__} }
}
enum_item: IDENTIFIER '=' <commit> /[0-9]+/
{ [$item{IDENTIFIER}, $item{__PATTERN1__}] }
| IDENTIFIER
{ [$item{IDENTIFIER}, undef] }
class_def: class IDENTIFIER '{' <commit> class_part(s?) '}' ';'
{
[@item{'IDENTIFIER',$Inline::CPP::grammar::class_part}]
}
| class IDENTIFIER ':' <commit> <leftop: inherit ',' inherit>
'{' class_part(s?) '}' ';'
{
push @{$item{$Inline::CPP::grammar::class_part}}, [$item{__DIRECTIVE2__}];
[@item{'IDENTIFIER',$Inline::CPP::grammar::class_part}]
}
inherit: scope IDENTIFIER
{ {thing => 'inherits', name => $item[2], scope => $item[1]} }
class_part: comment { [ {thing => 'comment'} ] }
| scope ':' <commit> class_decl(s?)
{
for my $part (@{$item{$Inline::CPP::grammar::class_decl}}) {
$_->{scope} = $item[1] for @$part;
}
$item{$Inline::CPP::grammar::class_decl}
}
| class_decl(s)
{
for my $part (@{$item[1]}) {
$_->{scope} = $thisparser->{data}{defaultscope}
for @$part;
}
$item[1]
}
class_decl: comment { [{thing => 'comment'}] }
| typedef { [ handle_typedef($thisparser, $item[1]) ] }
| enum { [ handle_enum($thisparser, $item[1]) ] }
| class_def
{
my ($class, $parts) = handle_class_def($thisparser, $item[1]);
[{ thing => 'class', name => $class, body => $parts }];
}
| method_def
{
$item[1]->{thing} = 'method';
# print "class_decl found a method: $item[1]->{name}\n";
my $i=0;
for my $arg (@{$item[1]->{args}}) {
$arg->{name} = 'dummy' . ++$i unless defined $arg->{name};
}
Inline::CPP::grammar::strip_ellipsis($thisparser,
$item[1]->{args});
[$item[1]];
}
| member_def
{
# print "class_decl found one or more members:\n", Dumper(\@item);
$_->{thing} = 'member' for @{$item[1]};
$item[1];
}
function_def: operator <commit> ';'
{
$item[1]
}
| operator <commit> smod(?) code_block
{
$item[1]
}
| IDENTIFIER '(' <commit> <leftop: arg ',' arg>(s?) ')' smod(?) code_block
{
{name => $item{IDENTIFIER}, args => $item{__DIRECTIVE2__}, rtype => '' }
}
| rtype IDENTIFIER '(' <leftop: arg ',' arg>(s?) ')' ';'
{
{rtype => $item[1], name => $item[2], args => $item{__DIRECTIVE1__} }
}
| rtype IDENTIFIER '(' <leftop: arg ',' arg>(s?) ')' smod(?) code_block
{
{rtype => $item{rtype}, name => $item[2], args => $item{__DIRECTIVE1__} }
}
method_def: operator <commit> method_imp
{
# print "method operator:\n", Dumper $item[1];
$item[1];
}
| IDENTIFIER '(' <commit> <leftop: arg ',' arg>(s?) ')' method_imp
{
# print "con-/de-structor found: $item[1]\n";
{name => $item[1], args => $item{__DIRECTIVE2__}, abstract => ${$item{method_imp}} };
}
| rtype IDENTIFIER '(' <leftop: arg ',' arg>(s?) ')' method_imp
{
# print "method found: $item[2]\n";
$return =
{name => $item[2], rtype => $item[1], args => $item[4],
abstract => ${$item[6]},
rconst => $thisparser->{data}{smod}{const},
};
$thisparser->{data}{smod}{const} = 0;
}
operator: rtype(?) 'operator' /\(\)|[^()]+/ '(' <leftop: arg ',' arg>(s?) ')'
{
# print "Found operator: $item[1][0] operator $item[3]\n";
{name=> "operator $item[3]", args => $item[5], ret => $item[1][0]}
}
# By adding smod, we allow 'const' member functions. This would also bind to
# incorrect C++ with the word 'static' after the argument list, but we don't
# care at all because such code would never be compiled successfully.
# By adding init, we allow constructors to initialize references. Again, we'll
# allow them anywhere, but our goal is not to enforce c++ standards -- that's
# the compiler's job.
method_imp: smod(?) ';' { \0 }
| smod(?) '=' <commit> '0' ';' { \1 }
| smod(?) initlist(?) code_block { \0 }
| smod(?) '=' '0' code_block { \0 }
initlist: ':' <leftop: subexpr ',' subexpr>
member_def: anytype <leftop: var ',' var> ';'
{
my @retval;
for my $def (@{$item[2]}) {
my $type = join '', $item[1], @{$def->[0]};
my $name = $def->[1];
# print "member found: type=$type, name=$name\n";
push @retval, { name => $name, type => $type };
}
\@retval;
}
var: star(s?) IDENTIFIER '=' expr { [@item[1,2]] }
| star(s?) IDENTIFIER '[' expr ']' { [@item[1,2]] }
| star(s?) IDENTIFIER { [@item[1,2]] }
arg: type IDENTIFIER '=' expr
{
# print "argument $item{IDENTIFIER} found\n";
# print "expression: $item{expr}\n";
{type => $item[1], name => $item{IDENTIFIER}, optional => 1,
offset => $thisoffset}
}
| type IDENTIFIER
{
# print "argument $item{IDENTIFIER} found\n";
{type => $item[1], name => $item{IDENTIFIER}, offset => $thisoffset}
}
| type { {type => $item[1]} }
| '...'
{ {name => '...', type => '...', offset => $thisoffset} }
ident_part: /[~_a-z]\w*/i '<' <commit> <leftop: IDENTIFIER ',' IDENTIFIER>(s?) '>'
{
$item[1].'<'.join('', @{$item[4]}).'>'
}
| /[~_a-z]\w*/i
{
$item[1]
}
IDENTIFIER: <leftop: ident_part '::' ident_part>
{
my $x = join '::', @{$item[1]};
# print "IDENTIFIER: $x\n";
$x
}
# Parse::RecDescent is retarded in this one case: if a subrule fails, it
# gives up the entire rule. This is a stupid way to get around that.
rtype: rtype2 | rtype1
rtype1: TYPE star(s?)
{
$return = $item[1];
$return .= join '',' ',@{$item[2]} if @{$item[2]};
# print "rtype1: $return\n";
# return undef
# unless(defined$thisparser->{data}{typeconv}{valid_rtypes}{$return});
}
rtype2: modifier(s) TYPE star(s?)
{
$return = $item[2];
$return = join ' ',grep{$_}@{$item[1]},$return
if @{$item[1]};
$return .= join '',' ',@{$item[3]} if @{$item[3]};
# print "rtype2: $return\n";
# return undef
# unless(defined$thisparser->{data}{typeconv}{valid_rtypes}{$return});
$return = 'static ' . $return
if $thisparser->{data}{smod}{static};
$thisparser->{data}{smod}{static} = 0;
}
type: type2 | type1
type1: TYPE star(s?)
{
$return = $item[1];
$return .= join '',' ',@{$item{$Inline::CPP::grammar::star}} if @{$item{$Inline::CPP::grammar::star}};
# print "type1: $return\n";
# return undef
# unless(defined$thisparser->{data}{typeconv}{valid_types}{$return});
}
type2: modifier(s) TYPE star(s?)
{
$return = $item{TYPE};
$return = join ' ',grep{$_}@{$item[1]},$return if @{$item[1]};
$return .= join '',' ',@{$item{$Inline::CPP::grammar::star}} if @{$item{$Inline::CPP::grammar::star}};
# print "type2: $return\n";
# return undef
# unless(defined$thisparser->{data}{typeconv}{valid_types}{$return});
}
anytype: anytype2 | anytype1
anytype1: TYPE star(s?)
{
$return = $item[1];
$return .= join '',' ',@{$item[2]} if @{$item[2]};
}
anytype2: modifier(s) TYPE star(s?)
{
$return = $item[2];
$return = join ' ',grep{$_}@{$item[1]},$return if @{$item[1]};
$return .= join '',' ',@{$item[3]} if @{$item[3]};
}
comment: m{\s* // [^\n]* \n }x
| m{\s* /\* (?:[^*]+|\*(?!/))* \*/ ([ \t]*)? }x
# long and short aren't recognized as modifiers because they break when used
# as regular types. Another Parse::RecDescent problem is greedy matching; I
# need tmodifier to "give back" long or short in cases where keeping them would
# cause the modifier rule to fail. One side-effect is 'long long' can never
# be parsed correctly here.
modifier: tmod
| smod { ++$thisparser->{data}{smod}{$item[1]}; ''}
| nmod { '' }
tmod: 'unsigned' # | 'long' | 'short'
smod: 'const' | 'static'
nmod: 'extern' | 'virtual' | 'mutable' | 'volatile' | 'inline'
scope: 'public' | 'private' | 'protected'
class: 'class' { $thisparser->{data}{defaultscope} = 'private'; $item[1] }
| 'struct' { $thisparser->{data}{defaultscope} = 'public'; $item[1] }
star: '*' | '&'
code_block: /$Inline::CPP::grammar::code_block/
# Consume expressions
expr: <leftop: subexpr OP subexpr> {
my $o = join '', @{$item[1]};
# print "expr: $o\n";
$o;
}
subexpr: /$Inline::CPP::grammar::funccall/ # Matches a macro, too
| /$Inline::CPP::grammar::string/
| /$Inline::CPP::grammar::number/
| UOP subexpr
OP: '+' | '-' | '*' | '/' | '^' | '&' | '|' | '%' | '||' | '&&'
UOP: '~' | '!' | '-' | '*' | '&'
TYPE: IDENTIFIER
all: /.*/
END
}
#============================================================================
# Generate typemap code for the classes and structs we bind to. This allows
# functions declared after a class to return or accept class objects as
# parameters.
#============================================================================
$TYPEMAP_KIND = 'O_Inline_CPP_Class';
sub typemap {
my ( $parser, $typename ) = @_;
# print "Inline::CPP::grammar::typemap(): typename=$typename\n";
my ($TYPEMAP, $INPUT, $OUTPUT);
$TYPEMAP = "$typename *\t\t$TYPEMAP_KIND\n";
$INPUT = <<"END";
if (sv_isobject(\$arg) && (SvTYPE(SvRV(\$arg)) == SVt_PVMG)) {
\$var = (\$type)SvIV((SV*)SvRV( \$arg ));
}
else {
warn ( \\"\${Package}::\$func_name() -- \$var is not a blessed reference\\" );
XSRETURN_UNDEF;
}
END
$OUTPUT = <<"END";
sv_setref_pv( \$arg, CLASS, (void*)\$var );
END
my $ctypename = $typename . ' *';
$parser->{data}{typeconv}{input_expr}{$TYPEMAP_KIND} ||= $INPUT;
$parser->{data}{typeconv}{output_expr}{$TYPEMAP_KIND} ||= $OUTPUT;
$parser->{data}{typeconv}{type_kind}{$ctypename} = $TYPEMAP_KIND;
$parser->{data}{typeconv}{valid_types}{$ctypename}++;
$parser->{data}{typeconv}{valid_rtypes}{$ctypename}++;
return;
}
#============================================================================
# Default action is to strip ellipses from the C++ code. This allows having
# _only_ a '...' in the code, just like XS. It is the default.
#============================================================================
sub strip_ellipsis {
my( $parser, $args ) = @_;
return if $parser->{ILSM}{PRESERVE_ELLIPSIS};
for ( my $i = 0; $i < @$args; $i++ ) {
next unless $args->[$i]{name} eq '...';
# if it's the first one, just strip it
if ($i==0) {
substr(
$parser->{ILSM}{code},
$args->[$i]{offset} - 3,
3,
' '
);
}
else {
my $prev = $i - 1;
my $prev_offset = $args->[$prev]{offset};
my $length = $args->[$i]{offset} - $prev_offset;
substr($parser->{ILSM}{code}, $prev_offset, $length) =~ s/\S/ /g;
}
}
return;
}
1;
__END__
=head1 Inline::CPP::grammar
All functions are internal. No documentation necessary.
=cut